<a href="https://colab.research.google.com/github/anbimasud/Exploratory-Data-Analysis-EDA-of-Diwali-Sales-Data/blob/main/bangla_voice_emmotion_rg_XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [7]:
#!unzip /content/drive/MyDrive/datasets/bng_voic_emm_thesis.zip -d /content/datasets


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/datasets/bng_voic_emm_thesis/surprise/M_06_SHUKANTO_S_8_SURPRISE_2.wav  
  inflating: /content/datasets/__MACOSX/bng_voic_emm_thesis/surprise/._M_06_SHUKANTO_S_8_SURPRISE_2.wav  
  inflating: /content/datasets/bng_voic_emm_thesis/surprise/03-01-04-02-03-02-04.wav  
  inflating: /content/datasets/__MACOSX/bng_voic_emm_thesis/surprise/._03-01-04-02-03-02-04.wav  
  inflating: /content/datasets/bng_voic_emm_thesis/surprise/03-01-04-02-02-03-33.wav  
  inflating: /content/datasets/__MACOSX/bng_voic_emm_thesis/surprise/._03-01-04-02-02-03-33.wav  
  inflating: /content/datasets/bng_voic_emm_thesis/surprise/03-01-04-02-03-02-10.wav  
  inflating: /content/datasets/__MACOSX/bng_voic_emm_thesis/surprise/._03-01-04-02-03-02-10.wav  
  inflating: /content/datasets/bng_voic_emm_thesis/surprise/03-01-04-02-02-03-27.wav  
  inflating: /content/datasets/__MACOSX/bng_voic_emm_thesis/surprise/._03-01-04-02-02-03-27.

In [10]:
DATA_DIR = "/content/datasets/bng_voic_emm_thesis"
EMOTIONS = ["happy", "sad", "angry", "surprise"]

def load_audio_files(data_dir, emotions):
    records = []
    for emo in emotions:
        folder = os.path.join(data_dir, emo)
        for fname in os.listdir(folder):
            if fname.endswith(".wav"):
                path = os.path.join(folder, fname)
                records.append((path, emo))
    return pd.DataFrame(records, columns=["path", "emotion"])

df = load_audio_files(DATA_DIR, EMOTIONS)
print("Total files:", len(df))
df.head()

Total files: 6352


Unnamed: 0,path,emotion
0,/content/datasets/bng_voic_emm_thesis/happy/03...,happy
1,/content/datasets/bng_voic_emm_thesis/happy/M_...,happy
2,/content/datasets/bng_voic_emm_thesis/happy/F_...,happy
3,/content/datasets/bng_voic_emm_thesis/happy/M_...,happy
4,/content/datasets/bng_voic_emm_thesis/happy/ss...,happy


In [11]:
def extract_features(file_path, sr=10000, n_mfcc=40):
    y, _ = librosa.load(file_path, sr=sr)

    y = np.append(y[0], y[1:] - 0.97 * y[:-1])

    zcr = np.mean(librosa.feature.zero_crossing_rate(y=y))

    rms = np.mean(librosa.feature.rms(y=y))

    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_delta = librosa.feature.delta(mfcc)
    mfcc_delta_mean = np.mean(mfcc_delta, axis=1)
    chom_st = librosa.feature.chroma_stft(y=y, sr=sr)
    chom_st_mean = np.mean(chom_st, axis=1)
    spec_cen = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_cen_mean = np.mean(spec_cen, axis=1)
    spec_con = librosa.feature.spectral_contrast(y=y, sr=sr, fmin=50.0, n_bands=4)
    spec_con_mean = np.mean(spec_con, axis=1)
    spec_ban = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spec_ban_mean = np.mean(spec_ban, axis=1)


    feature_vector = np.concatenate([
        [zcr, rms],
        mfcc_mean,
        mfcc_delta_mean,
        chom_st_mean,
        spec_cen_mean,
        spec_con_mean,
        spec_ban_mean,

    ])
    return feature_vector

In [12]:
f0 = extract_features(df.loc[0, "path"], sr=16000)

print("Feature vector length:", len(f0))


Feature vector length: 101


In [13]:
features = []
for idx, row in df.iterrows():
    feat = extract_features(row["path"])
    features.append(feat)
features = np.vstack(features)
print("Features array shape:", features.shape)

Features array shape: (6352, 101)


In [14]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
le = LabelEncoder()
y = le.fit_transform(df["emotion"])

In [15]:
X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.3, stratify=y, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [16]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

xgb_clf = xgb.XGBClassifier(eval_metric='mlogloss')

param_grid = {
    "learning_rate": [0.05, 0.1, 0.2],
    "max_depth": [3, 4, 5, 6],
    "n_estimators": [100, 200, 300],
    "subsample": [0.8, 1.0],
    "colsample_bytree": [0.8, 1.0]
}

grid = GridSearchCV(
    estimator=xgb_clf,
    param_grid=param_grid,
    cv=3,
    scoring="accuracy",
    n_jobs=2
)

grid.fit(X_train, y_train)
print("Best params:", grid.best_params_)

best_model = grid.best_estimator_


Best params: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 300, 'subsample': 0.8}


In [17]:
y_pred = best_model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))


Classification Report:
              precision    recall  f1-score   support

       angry       0.84      0.78      0.80       491
       happy       0.74      0.72      0.73       492
         sad       0.78      0.87      0.82       492
    surprise       0.73      0.73      0.73       431

    accuracy                           0.77      1906
   macro avg       0.77      0.77      0.77      1906
weighted avg       0.77      0.77      0.77      1906



In [18]:
importances = best_model.feature_importances_
indices = np.argsort(importances)[::-1]
def ablation_study(model, X_tr, y_tr, X_te, y_te, feature_importances, top_k_list):
    results = {}
    sorted_idx = np.argsort(feature_importances)[::-1]
    for k in top_k_list:
        sel = sorted_idx[:k]
        model.fit(X_tr[:, sel], y_tr)
        score = model.score(X_te[:, sel], y_te)
        results[k] = score
    return results

top_results = ablation_study(best_model, X_train, y_train, X_test, y_test, importances, top_k_list=[5, 10, 20, len(importances)])
print("Ablation study results:", top_results)

Ablation study results: {5: 0.5671563483735572, 10: 0.6998950682056663, 20: 0.7675760755508919, 101: 0.7707240293809025}


In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import xgboost as xgb

# ধরে নিচ্ছি তুমি আগেই features এবং df বানিয়ে ফেলেছো
# features -> তোমার X, df["emotion"] -> তোমার labels

# 🎯 Label encoding
le = LabelEncoder()
y = le.fit_transform(df["emotion"])

# 🔪 Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    features, y, test_size=0.3, stratify=y, random_state=42
)

# ⚖️ Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 🔹 1️⃣ Support Vector Machine
svm_clf = SVC(kernel='rbf', C=1, gamma='scale')
svm_clf.fit(X_train, y_train)
svm_pred = svm_clf.predict(X_test)

# 🔹 2️⃣ Random Forest
rf_clf = RandomForestClassifier(n_estimators=200, random_state=42)
rf_clf.fit(X_train, y_train)
rf_pred = rf_clf.predict(X_test)

# 🔹 3️⃣ XGBoost
xgb_clf = xgb.XGBClassifier(eval_metric='mlogloss', learning_rate=0.1, max_depth=3, n_estimators=100)
xgb_clf.fit(X_train, y_train)
xgb_pred = xgb_clf.predict(X_test)

# 🔹 4️⃣ MLP (Neural Network)
mlp_clf = MLPClassifier(hidden_layer_sizes=(128, 64), activation='relu', solver='adam', max_iter=300, random_state=42)
mlp_clf.fit(X_train, y_train)
mlp_pred = mlp_clf.predict(X_test)

# 📊 Model Evaluation
models = {
    "SVM": (svm_pred, svm_clf),
    "Random Forest": (rf_pred, rf_clf),
    "XGBoost": (xgb_pred, xgb_clf),
    "MLP": (mlp_pred, mlp_clf)
}

print("🔰 Model Comparison Results:\n")
for name, (pred, model) in models.items():
    acc = accuracy_score(y_test, pred)
    print(f"{name} Accuracy: {acc:.3f}")
    print(classification_report(y_test, pred, target_names=le.classes_))
    print("-" * 60)


🔰 Model Comparison Results:

SVM Accuracy: 0.699
              precision    recall  f1-score   support

       angry       0.76      0.67      0.71       491
       happy       0.69      0.61      0.64       492
         sad       0.68      0.83      0.75       492
    surprise       0.69      0.68      0.69       431

    accuracy                           0.70      1906
   macro avg       0.70      0.70      0.70      1906
weighted avg       0.70      0.70      0.70      1906

------------------------------------------------------------
Random Forest Accuracy: 0.735
              precision    recall  f1-score   support

       angry       0.81      0.72      0.76       491
       happy       0.73      0.67      0.70       492
         sad       0.67      0.87      0.76       492
    surprise       0.78      0.68      0.72       431

    accuracy                           0.74      1906
   macro avg       0.74      0.73      0.73      1906
weighted avg       0.74      0.74      0.73  