In [None]:
import os
import numpy as np
import librosa
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import resample
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
#import tensorflow-addons as tfa

# ==== Configuration ====
DATA_DIRS = ["Audio_Song_Actors_01-24", "Audio_Speech_Actors_01-24"]
SAMPLE_RATE = 22050
N_MFCC = 40
MAX_LEN = 216  # longer padding for full clips

# ==== Feature Extraction ====
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC).T
    chroma = librosa.feature.chroma_stft(y=y, sr=sr).T
    zcr = librosa.feature.zero_crossing_rate(y).T
    rms_feature = librosa.feature.rms(y=y).T

    def pad(x):
        return x[:MAX_LEN] if x.shape[0] >= MAX_LEN else np.pad(x, ((0, MAX_LEN - x.shape[0]), (0, 0)), mode='constant')

    mfcc = pad(mfcc)
    chroma = pad(chroma)
    zcr = pad(zcr)
    rms_feature = pad(rms_feature)

    return np.concatenate([mfcc, chroma, zcr, rms_feature], axis=1)


X, y = [], []

# ==== Dataset Preparation ====
for data_dir in DATA_DIRS:
    for emotion_label in os.listdir(data_dir):
        subfolder_path = os.path.join(data_dir, emotion_label)
        if not os.path.isdir(subfolder_path):
            continue
        for file in os.listdir(subfolder_path):
            if file.endswith(".wav"):
                try:
                    path = os.path.join(subfolder_path, file)
                    features = extract_features(path)
                    X.append(features)
                    y.append(emotion_label)
                except Exception as e:
                    print(f"Error processing {file}: {e}")

X = np.array(X)

# ==== Balance Dataset ====
df = pd.DataFrame({'x': X.tolist(), 'y': y})
min_count = df['y'].value_counts().min()
balanced_df = df.groupby('y').apply(lambda g: g.sample(min_count, random_state=42)).reset_index(drop=True)

X_balanced = np.stack(balanced_df['x'].values)
y_balanced = balanced_df['y'].values

le = LabelEncoder()
y_encoded = le.fit_transform(y_balanced)
y_cat = to_categorical(y_encoded)
np.save("classes.npy", le.classes_)

# ==== Train-Validation Split ====
X_train, X_val, y_train, y_val = train_test_split(X_balanced, y_cat, test_size=0.2, stratify=y_cat, random_state=42)

# ==== Model Definition ====
model = Sequential([
    Conv1D(64, 5, activation='relu', padding='same', input_shape=(X_train.shape[1], X_train.shape[2])),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    Conv1D(256, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(y_cat.shape[1], activation='softmax')
])

optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

# ==== Callbacks ====
callbacks = [
    EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', patience=4, factor=0.5, verbose=1)
]

# ==== Training ====
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=100, batch_size=32, callbacks=callbacks)

# ==== Save Model ====
model.save("trained_model.h5")

# ==== Evaluation ====


In [None]:
from sklearn.metrics import accuracy_score

for i, emotion in enumerate(le.classes_):
    idx = y_true_labels == i
    acc = accuracy_score(y_true_labels[idx], y_pred_labels[idx])
    if acc < 0.75:
        print(f"❌ {emotion}: {acc:.2%} (needs improvement)")


In [None]:
from sklearn.metrics import accuracy_score, f1_score

# Get true and predicted emotion class labels
y_true = np.argmax(y_val, axis=1)
y_pred = np.argmax(model.predict(X_val), axis=1)

# Emotion-wise accuracy
print("\n🎯 Emotion-wise Accuracy:")
for i, emotion in enumerate(le.classes_):
    indices = (y_true == i)
    class_acc = accuracy_score(y_true[indices], y_pred[indices])
    print(f"{emotion}: {class_acc:.2%} accuracy")

# Overall metrics
overall_acc = accuracy_score(y_true, y_pred)
macro_f1 = f1_score(y_true, y_pred, average='macro')

print(f"\n✅ Overall Accuracy: {overall_acc:.2%}")
print(f"✅ Macro F1 Score: {macro_f1:.2%}")


In [None]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Get predicted labels
y_pred_labels = np.argmax(model.predict(X_val), axis=1)
y_true_labels = np.argmax(y_val, axis=1)

# Print emotion-wise accuracy
print("\n🎯 Emotion-wise Accuracy:")
for i, emotion in enumerate(le.classes_):
    idx = y_true_labels == i
    acc = accuracy_score(y_true_labels[idx], y_pred_labels[idx])
    print(f"{emotion}: {acc:.2%} accuracy")

# Final Metrics
overall_acc = accuracy_score(y_true_labels, y_pred_labels)
macro_f1 = f1_score(y_true_labels, y_pred_labels, average='macro')

print(f"\n✅ Overall Accuracy: {overall_acc:.2%}")
print(f"✅ Macro F1 Score: {macro_f1:.2%}")


In [None]:
model.summary
model = load_model("trained_model.h5")
y_pred = model.predict(X_val)
y_val_labels = np.argmax(y_val, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

print("\n✅ Classification Report:")
print(classification_report(y_val_labels, y_pred_labels, target_names=le.classes_))

cm = confusion_matrix(y_val_labels, y_pred_labels)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=le.classes_, yticklabels=le.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.show()


In [None]:
import os
import numpy as np
import librosa
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
from sklearn.utils import resample, compute_class_weight
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# ==== Configuration ====
DATA_DIRS = ["Audio_Song_Actors_01-24", "Audio_Speech_Actors_01-24"]
SAMPLE_RATE = 22050
N_MFCC = 40
MAX_LEN = 216

# ==== Feature Extraction ====
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC).T
    chroma = librosa.feature.chroma_stft(y=y, sr=sr).T
    zcr = librosa.feature.zero_crossing_rate(y).T
    rms_feature = librosa.feature.rms(y=y).T

    def pad(x):
        return x[:MAX_LEN] if x.shape[0] >= MAX_LEN else np.pad(x, ((0, MAX_LEN - x.shape[0]), (0, 0)), mode='constant')

    mfcc = pad(mfcc)
    chroma = pad(chroma)
    zcr = pad(zcr)
    rms_feature = pad(rms_feature)

    return np.concatenate([mfcc, chroma, zcr, rms_feature], axis=1)

X, y = [], []

# ==== Dataset Preparation ====
for data_dir in DATA_DIRS:
    for emotion_label in os.listdir(data_dir):
        subfolder_path = os.path.join(data_dir, emotion_label)
        if not os.path.isdir(subfolder_path):
            continue
        for file in os.listdir(subfolder_path):
            if file.endswith(".wav"):
                try:
                    path = os.path.join(subfolder_path, file)
                    features = extract_features(path)
                    X.append(features)
                    y.append(emotion_label)
                except Exception as e:
                    print(f"Error processing {file}: {e}")

X = np.array(X)

# ==== Upsample to Balance Classes ====
df = pd.DataFrame({'x': X.tolist(), 'y': y})
grouped = df.groupby('y')
max_count = grouped.size().max()
upsampled_df = grouped.apply(lambda g: g.sample(max_count, replace=True, random_state=42)).reset_index(drop=True)

X_balanced = np.stack(upsampled_df['x'].values)
y_balanced = upsampled_df['y'].values

le = LabelEncoder()
y_encoded = le.fit_transform(y_balanced)
y_cat = to_categorical(y_encoded)
np.save("classes.npy", le.classes_)

# ==== Train-Validation Split ====
X_train, X_val, y_train, y_val = train_test_split(X_balanced, y_cat, test_size=0.2, stratify=y_cat, random_state=42)

# ==== Compute Class Weights ====
y_int = np.argmax(y_train, axis=1)
class_weights = compute_class_weight('balanced', classes=np.unique(y_int), y=y_int)
class_weights = dict(enumerate(class_weights))

# ==== Model Definition ====
model = Sequential([
    Conv1D(64, 5, activation='relu', padding='same', input_shape=(X_train.shape[1], X_train.shape[2])),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    Conv1D(128, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    Conv1D(256, 3, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(y_cat.shape[1], activation='softmax')
])

optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# ==== Callbacks ====
callbacks = [
    EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', patience=4, factor=0.5, verbose=1)
]

# ==== Training ====
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                    epochs=100, batch_size=32, callbacks=callbacks, class_weight=class_weights)

# ==== Save Model ====
model.save("trained_model.keras")

# ==== Evaluation ====
y_pred = model.predict(X_val)
y_val_labels = np.argmax(y_val, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

print("\n✅ Classification Report:")
print(classification_report(y_val_labels, y_pred_labels, target_names=le.classes_))

# Emotion-wise accuracy
print("\n🎯 Emotion-wise Accuracy:")
for i, emotion in enumerate(le.classes_):
    idx = y_val_labels == i
    acc = accuracy_score(y_val_labels[idx], y_pred_labels[idx])
    status = "✅" if acc >= 0.75 else "❌"
    print(f"{status} {emotion}: {acc:.2%} accuracy")

# Overall metrics
overall_acc = accuracy_score(y_val_labels, y_pred_labels)
macro_f1 = f1_score(y_val_labels, y_pred_labels, average='macro')
print(f"\n✅ Overall Accuracy: {overall_acc:.2%}")
print(f"✅ Macro F1 Score: {macro_f1:.2%}")

# Confusion matrix
cm = confusion_matrix(y_val_labels, y_pred_labels)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=le.classes_, yticklabels=le.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.show()

# ==== Optional: Predict New File ====
def predict_emotion(filepath):
    model = load_model("trained_model.h5")
    classes = np.load("classes.npy")
    features = extract_features(filepath)
    features = np.expand_dims(features, axis=0)
    pred = model.predict(features)
    return classes[np.argmax(pred)]


In [None]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Get predicted labels
y_pred_labels = np.argmax(model.predict(X_val), axis=1)
y_true_labels = np.argmax(y_val, axis=1)

# Print emotion-wise accuracy
print("\n🎯 Emotion-wise Accuracy:")
for i, emotion in enumerate(le.classes_):
    idx = y_true_labels == i
    acc = accuracy_score(y_true_labels[idx], y_pred_labels[idx])
    print(f"{emotion}: {acc:.2%} accuracy")

# Final Metrics
overall_acc = accuracy_score(y_true_labels, y_pred_labels)
macro_f1 = f1_score(y_true_labels, y_pred_labels, average='macro')

print(f"\n✅ Overall Accuracy: {overall_acc:.2%}")
print(f"✅ Macro F1 Score: {macro_f1:.2%}")
