<a href="https://colab.research.google.com/github/bishalmaji/CourseMaster/blob/master/index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd
import librosa
import numpy as

# Define the root directory.
root_dir = '/content'  # This is the root directory in Google Colab.

# Initialize lists to store extracted features and category labels.
chroma_list = []
spectral_centroid_list = []
spectral_contrast_list = []
mfcc_list = []
zero_crossing_rate_list = []
cry_category_list = []

# Define a mapping from folder names to category labels.
category_mapping = {
    'hungry': 1,
    'burping': 2,
    'belly_pain': 3,
    'discomfort': 4,
    'tired': 5
}

# Iterate through each category folder.
for category_name, category_label in category_mapping.items():
    category_dir = os.path.join(root_dir, 'audios', category_name)

    # Iterate through audio files in the category folder.
    for audio_file in os.listdir(category_dir):
        audio_path = os.path.join(category_dir, audio_file)

        # Extract audio features, such as MFCC, chroma, spectral centroid, and spectral contrast.
        audio_data, sample_rate = librosa.load(audio_path)
        mfcc = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)
        spectral_centroid = librosa.feature.spectral_centroid(y=audio_data, sr=sample_rate)
        spectral_contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sample_rate)

        zero_crossing_rate = librosa.feature.zero_crossing_rate(y=audio_data)

        # Append the extracted features and category label to their respective lists.
        chroma_list.append(chroma)
        spectral_centroid_list.append(spectral_centroid)
        spectral_contrast_list.append(spectral_contrast)
        mfcc_list.append(mfcc)
        zero_crossing_rate_list.append(zero_crossing_rate)
        cry_category_list.append(category_label)

# Create a Pandas DataFrame with separate columns for each feature.
data = pd.DataFrame({
    'Chroma': chroma_list,
    'SpectralCentroid': spectral_centroid_list,
    'SpectralContrast': spectral_contrast_list,
    'MFCC': mfcc_list,
    'ZeroCrossingRate': zero_crossing_rate_list,
    'CryCategory': cry_category_list
})

# Save the dataset to a CSV file.
data.to_csv('/content/baby_cry_dataset.csv', index=False)


In [None]:
# Flatten nested lists in the DataFrame
data['Chroma'] = data['Chroma'].apply(lambda x: [item for sublist in x for item in sublist])
data['SpectralCentroid'] = data['SpectralCentroid'].apply(lambda x: [item for sublist in x for item in sublist])
data['SpectralContrast'] = data['SpectralContrast'].apply(lambda x: [item for sublist in x for item in sublist])
data['MFCC'] = data['MFCC'].apply(lambda x: [item for sublist in x for item in sublist])
data['ZeroCrossingRate'] = data['ZeroCrossingRate'].apply(lambda x: [item for sublist in x for item in sublist])

# Save the dataset to a CSV file.
data.to_csv('/content/baby_cry_dataset.csv', index=False)

**Agegrating with scaled data**

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaled_data=data.copy();

# Apply the Min-Max scaling to the numeric columns
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
numeric_columns = scaled_data.select_dtypes(include=['int64', 'float64'])
for column in numeric_columns.columns.difference(['CryCategory']):
    scaled_data[column] = scaler.fit_transform(scaled_data[[column]])
scaled_data.to_csv('/content/scaled_baby_cry_dataset.csv', index=False)

# Now, add aggregation for the required features
scaled_data['MFCC_Mean'] = scaled_data['MFCC'].apply(lambda x: np.mean(x))
scaled_data['Chroma_Mean'] = scaled_data['Chroma'].apply(lambda x: np.mean(x))
scaled_data['SpectralCentroid_Mean'] = scaled_data['SpectralCentroid'].apply(lambda x: np.mean(x))
scaled_data['SpectralContrast_Mean'] = scaled_data['SpectralContrast'].apply(lambda x: np.mean(x))
scaled_data['ZeroCrossingRate_Mean'] = scaled_data['ZeroCrossingRate'].apply(lambda x: np.mean(x))

# Drop the original sequence columns
scaled_data.drop(columns=['MFCC', 'Chroma', 'SpectralCentroid', 'SpectralContrast', 'ZeroCrossingRate'], inplace=True)



**Feature Selection**

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Split the data into features (X) and the target variable (y)
X = scaled_data.drop(columns=['CryCategory'])
y = scaled_data['CryCategory']

# Create a Random Forest Classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X, y)

# Get feature importances
feature_importances = clf.feature_importances_

# Create a DataFrame to store feature importance
feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importances})

# Sort features by importance
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# You can select the top N features based on their importance
# For example, select the top 10 features
selected_features = feature_importance_df.head(10)['Feature'].tolist()



In [None]:
print(selected_features)

['SpectralCentroid_Mean', 'ZeroCrossingRate_Mean', 'MFCC_Mean', 'Chroma_Mean', 'SpectralContrast_Mean']


**Building the model**

---

1.** Desicion Tree**

In [None]:
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Split the data into features (X) and the target (y)
selected_f = ['SpectralContrast_Mean', 'SpectralCentroid_Mean','Chroma_Mean']
X = scaled_data[selected_f]  # Use the selected features
y = scaled_data['CryCategory']  # Your target variable

# Split the data into a training set and a testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the Decision Tree classifier
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

# Make predictions on the test data
y_pred = decision_tree.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

**2.Random Forest **

In [None]:
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

selected_f = ['ZeroCrossingRate_Mean', 'SpectralCentroid_Mean','SpectralContrast_Mean']
X = scaled_data[selected_f]  # Use the selected features
y = scaled_data['CryCategory']  # Your target variable

# Split the data into a training set and a testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a Random Forest Classifier
model = RandomForestClassifier(random_state=42)

# Train the model on the training data
model.fit(X_train, y_train)


# Make predictions on the test data
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')


**3. SVM**

In [None]:
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

selected_f = ['SpectralCentroid_Mean']
X = scaled_data[selected_features]  # Use the selected features
y = scaled_data['CryCategory']  # Your target variable

# Split the data into a training set and a testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

svm_classifier = SVC(random_state=42)

# Train the model on the training data
svm_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(f'Predicted class: {y_pred}')
# Make predictions with the model




Accuracy: 0.80
Predicted class: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


**Combined model**

In [None]:
from sklearn.ensemble import VotingClassifier

# Create base models
decision_tree = DecisionTreeClassifier()
random_forest = RandomForestClassifier(random_state=42)

# Create a voting classifier
ensemble_model = VotingClassifier(estimators=[('dt', decision_tree), ('rf', random_forest)], voting='soft')

# Train the ensemble model
ensemble_model.fit(X_train, y_train)

# Make predictions
y_pred = ensemble_model.predict(X_test)

# Evaluate the ensemble model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')


Accuracy: 0.71
