In [None]:
#%pip install datasets
#%pip install pydub
import os
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
import tensorflow as tf

import librosa
import librosa.display
import IPython.display as ipd

from glob import glob
from IPython.display import Audio
from itertools import cycle
from tensorflow import keras

import warnings
warnings.filterwarnings('ignore')

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])
plt.style.use('ggplot')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [None]:
TESS = '/TESS Toronto emotional speech set data/'
RAVDESS = '/RAVDESS Emotional speech audio/'
CREMA = '/CREMA-D/'
SAVEE = '/SAVEE/'
JL = '/JL corpus/'
EMOV = '/EMOV/'
ESD = '/ESD/'
ESD_F = '/ESD-F/'
ASVP_ESD = '/ASVP-ESD/'
DESD = '/DESD-E/'

In [None]:
tess_dir_list = os.listdir(TESS)
path_list = []
gender_list = []
emotion_list = []

emotion_dic = {
    'happy'   : 'happy',
    'neutral' : 'neutral',
    'sad'     : 'sad',
    'Sad'     : 'sad',
    'angry'   : 'angry',
    'fear'    : 'fear',
    'disgust'  : 'disgust',
}

for directory in tess_dir_list:
    audio_files = os.listdir(os.path.join(TESS, directory))
    for audio_file in audio_files:
        part = audio_file.split('.')[0]
        key = part.split('_')[2]
        if key in emotion_dic:
            path_list.append(f"{TESS}{directory}/{audio_file}")
            gender_list.append('female') # female only dataset
            emotion_list.append(emotion_dic[key])

tess_df = pd.concat([
    pd.DataFrame(path_list, columns=['path']),
    pd.DataFrame(gender_list, columns=['sex']),
    pd.DataFrame(emotion_list, columns=['emotion'])
], axis=1)

emotions_to_limit = ['angry', 'happy', 'neutral', 'sad']

for emotion in emotions_to_limit:
    emotion_rows = tess_df[(tess_df['sex'] == 'female') & (tess_df['emotion'] == emotion)]
    
    # If there are more than 200 instances, remove some
    if len(emotion_rows) > 200:
        # Get the indices of the instances to remove
        indices_to_remove = emotion_rows.sample(200).index
        
        # Drop these instances
        tess_df = tess_df.drop(indices_to_remove)



tess_df.head()
tess_df.emotion.value_counts()

In [None]:
ravdess_dir_lis = os.listdir(RAVDESS)
path_list = []
gender_list = []
emotion_list = []

emotion_dic = {
    '03' : 'happy',
    '01' : 'neutral',
    '04' : 'sad',
    '05' : 'angry',
    '06' : 'fear',
    '07' : 'disgust',
}

for directory in ravdess_dir_lis:
    actor_files = os.listdir(os.path.join(RAVDESS, directory))
    for audio_file in actor_files:
        part = audio_file.split('.')[0]
        key = part.split('-')[2]
        if key in emotion_dic:
            gender_code = int(part.split('-')[6])
            path_list.append(f"{RAVDESS}{directory}/{audio_file}")
            gender_list.append('female' if gender_code & 1 == 0 else 'male')
            emotion_list.append(emotion_dic[key])

ravdess_df = pd.concat([
    pd.DataFrame(path_list, columns=['path']),
    pd.DataFrame(gender_list, columns=['sex']),
    pd.DataFrame(emotion_list, columns=['emotion'])
], axis=1)

ravdess_df.head()


In [None]:
crema_dir_list = os.listdir(CREMA)
path_list = []
gender_list = []
emotion_list = []

emotion_dic = {
    'HAP' : 'happy',
    'NEU' : 'neutral',
    'SAD' : 'sad',
    'ANG' : 'angry',
    'FEA' : 'fear',
    'DIS' : 'disgust',
}

female_id_list = [
    '1002', '1003', '1004', '1006', '1007', '1008', '1009', '1010', '1012', '1013', '1018',
    '1020', '1021', '1024', '1025', '1028', '1029', '1030', '1037', '1043', '1046', '1047',
    '1049', '1052', '1053', '1054', '1055', '1056', '1058', '1060', '1061', '1063', '1072',
    '1073', '1074', '1075', '1076', '1078', '1079', '1082', '1084', '1089', '1091',
]

for audio_file in crema_dir_list:
    part = audio_file.split('_')
    key = part[2]
    if key in emotion_dic:
        path_list.append(f"{CREMA}{audio_file}")
        gender_list.append('female' if part[0] in female_id_list else 'male')
        emotion_list.append(emotion_dic[key])

crema_df = pd.concat([
    pd.DataFrame(path_list, columns=['path']),
    pd.DataFrame(gender_list, columns=['sex']),
    pd.DataFrame(emotion_list, columns=['emotion'])
], axis=1)

crema_df.head()


In [None]:
savee_dir_list = os.listdir(SAVEE)
path_list = []
gender_list = []
emotion_list = []

emotion_dic = {
    'h'  : 'happy',
    'n'  : 'neutral',
    'sa' : 'sad',
    'a'  : 'angry',
    'f'  : 'fear',
    'd'  : 'disgust'
}

for audio_file in savee_dir_list:
    part = audio_file.split('_')[1]
    key = part[:-6]
    if key in emotion_dic:
        path_list.append(f"{SAVEE}{audio_file}")
        gender_list.append('male') # male only dataset
        emotion_list.append(emotion_dic[key])

savee_df = pd.concat([
    pd.DataFrame(path_list, columns=['path']),
    pd.DataFrame(gender_list, columns=['sex']),
    pd.DataFrame(emotion_list, columns=['emotion'])
], axis=1)

savee_df.head()


In [None]:
savee_dir_list = os.listdir(SAVEE)
path_list = []
gender_list = []
emotion_list = []

emotion_dic = {
    'h'  : 'happy',
    'n'  : 'neutral',
    'sa' : 'sad',
    'a'  : 'angry',
    'f'  : 'fear',
    'd'  : 'disgust'
}

for audio_file in savee_dir_list:
    part = audio_file.split('_')[1]
    key = part[:-6]
    if key in emotion_dic:
        path_list.append(f"{SAVEE}{audio_file}")
        gender_list.append('male') # male only dataset
        emotion_list.append(emotion_dic[key])

savee_df = pd.concat([
    pd.DataFrame(path_list, columns=['path']),
    pd.DataFrame(gender_list, columns=['sex']),
    pd.DataFrame(emotion_list, columns=['emotion'])
], axis=1)

savee_df.head()

In [None]:
JL_dir_list = os.listdir(JL)
path_list = []
gender_list = []
emotion_list = []

emotion_dic = {
    'happy'   : 'happy',
    'sad'     : 'sad',
    'angry'   : 'angry',
    'neutral' : 'neutral',
    'excited' : 'happy',
}

for audio_file in JL_dir_list:
    key = audio_file.split('_')[1]
    gender = audio_file.split('_')[0]
    if key in emotion_dic:
        path_list.append(f"{JL}{audio_file}")
        if gender[:-1] == 'female':
          gender_list.append('female')
        else:
          gender_list.append('male')
        emotion_list.append(emotion_dic[key])

JL_df = pd.concat([
    pd.DataFrame(path_list, columns=['path']),
    pd.DataFrame(gender_list, columns=['sex']),
    pd.DataFrame(emotion_list, columns=['emotion'])
], axis=1)

JL_df.head()

In [None]:
emov_dir_list = os.listdir(EMOV)
path_list = []
gender_list = []
emotion_list = []

emotion_dic = {
    'disgust'   : 'disgust',
    'anger'     : 'angry'
}

for audio_file in emov_dir_list:
    key = audio_file.split('_')[1]
    gender = audio_file.split('_')[0]
    if key in emotion_dic:
        path_list.append(f"{EMOV}{audio_file}")
        if gender == 'female':
          gender_list.append('female')
        else:
          gender_list.append('male')
        emotion_list.append(emotion_dic[key])

EMOV_df = pd.concat([
    pd.DataFrame(path_list, columns=['path']),
    pd.DataFrame(gender_list, columns=['sex']),
    pd.DataFrame(emotion_list, columns=['emotion'])
], axis=1)

EMOV_df.head()

In [None]:
ESD_dir_list = os.listdir(ESD)
path_list = []
gender_list = []
emotion_list = []

emotion_dic = {
    'happy'   : 'happy',
    'angry'   : 'angry',
    'anger'   : 'angry',
    'neutral' : 'neutral',
    'sad'     : 'sad'
}

for audio_file in ESD_dir_list:
    key = audio_file.split('_')[1]
    gender = audio_file.split('_')[0]
    if key in emotion_dic:
        path_list.append(f"{ESD}{audio_file}")
        if gender == 'female':
          gender_list.append('female')
        else:
          gender_list.append('male')
        emotion_list.append(emotion_dic[key])

ESD_df = pd.concat([
    pd.DataFrame(path_list, columns=['path']),
    pd.DataFrame(gender_list, columns=['sex']),
    pd.DataFrame(emotion_list, columns=['emotion'])
], axis=1)

ESD_df.head()


In [None]:
ESD_F_dir_list = os.listdir(ESD_F)
path_list = []
gender_list = []
emotion_list = []

emotion_dic = {
    'happy'   : 'happy',
    'angry'   : 'angry',
    'anger'   : 'angry',
    'neutral' : 'neutral',
    'sad'     : 'sad'
}

for audio_file in ESD_F_dir_list:
    key = audio_file.split('_')[1]
    gender = audio_file.split('_')[0]
    if key in emotion_dic:
        path_list.append(f"{ESD_F}{audio_file}")
        if gender == 'female':
          gender_list.append('female')
        else:
          gender_list.append('male')
        emotion_list.append(emotion_dic[key])

ESD_F_df = pd.concat([
    pd.DataFrame(path_list, columns=['path']),
    pd.DataFrame(gender_list, columns=['sex']),
    pd.DataFrame(emotion_list, columns=['emotion'])
], axis=1)

ESD_F_df.head()

In [None]:
# Combine all datasets

df = pd.concat([
    ravdess_df,
    tess_df,
    crema_df,
    savee_df,
    JL_df,
    EMOV_df,
    ESD_df,
    ESD_F_df
], axis=0)
df.head()

In [None]:
# Preprocessing

def plot_distribution(df):
    countTable = df.groupby(['emotion', 'sex']).count()
    pivotTable = countTable.pivot_table(index='emotion', columns='sex', values='path')

    pivotTable.plot(kind='bar', figsize=(6, 6), color=['pink', 'blue'])
    plt.title('Emotion and Gender Distribution')
    plt.xlabel('Emotion')
    plt.ylabel('Count')
    plt.show()

plot_distribution(df)

In [None]:
df['sex'].value_counts()

In [None]:
df.drop('sex', axis=1, inplace=True)
df.head()

In [None]:
from pydub import AudioSegment, effects

emotion_dic = {
    'neutral' : 0,
    'happy'   : 1,
    'sad'     : 2,
    'angry'   : 3,
    'fear'    : 4,
    'disgust' : 5
}

def encode(label):
    return emotion_dic.get(label)


## Feature Extraction

In [None]:
import numpy as np
import librosa
import random
from tqdm import tqdm

# NOISE
def noise(data):
    noise_amp = 0.035 * np.random.uniform() * np.amax(data)
    data = data + noise_amp * np.random.normal(size=data.shape[0])
    return data

# STRETCH
def stretch(data, rate=0.9):
    return librosa.effects.time_stretch(data,rate=rate)

# SHIFT
def shift(data):
    shift_range = int(np.random.uniform(low=-2, high=2) * 1000)
    return np.roll(data, shift_range)



# Combined Augmentation Function
def augment_audio(data, sr):
    augmentations = [noise, stretch, shift]  # List of augmentation functions
    applied_augmentations = random.sample(augmentations, random.randint(1, 3))

    for augmentation in applied_augmentations:
        data = augmentation(data)

    return data

# Example of using the augment_audio function
def preprocess_audio(path):
    raw_audio, sr = librosa.load(path,sr=16000)
    trimmed, _ = librosa.effects.trim(raw_audio, top_db=25, frame_length=256, hop_length=64)
    raw_audio = augment_audio(trimmed, sr)
    audio_duration=len(raw_audio)/sr
    if audio_duration > 4:
        raw_audio=raw_audio[:4*sr]
    else:
        raw_audio = np.pad(raw_audio, (0, (4*sr)-len(raw_audio)), 'constant')


    return raw_audio, sr


def preprocess_audio_aug(path):
    raw_audio, sr = librosa.load(path,sr=16000)
    trimmed, _ = librosa.effects.trim(raw_audio, top_db=25, frame_length=256, hop_length=64)
    raw_audio = augment_audio(trimmed, sr)
    audio_duration = len(raw_audio)/sr
    if audio_duration > 4:
        raw_audio = raw_audio[:4*sr]
    else:
        raw_audio = np.pad(raw_audio, (0, (4*sr)-len(raw_audio)), 'constant')


    return raw_audio, sr

# normal
def preprocess_audio_n(path):
    raw_audio, sr = librosa.load(path,sr=16000)
    raw_audio, _ = librosa.effects.trim(raw_audio, top_db=25, frame_length=256, hop_length=64)
    audio_duration = len(raw_audio)/sr
    if audio_duration > 4:
        raw_audio = raw_audio[:4*sr]
    else:
        raw_audio = np.pad(raw_audio, (0, (4*sr)-len(raw_audio)), 'constant')


    return raw_audio, sr

In [None]:
zcr_list = []
rms_list = []
mfccs_list = []
emotion_list = []

FRAME_LENGTH = 400
HOP_LENGTH = 160
sr=16000

# 20 mfcc features + 1 zcr + 1 rms
for row in tqdm(df.itertuples(index=False)):
    try:
        # normal preprocessing 
        y,_= preprocess_audio_n(row.path)

        zcr = librosa.feature.zero_crossing_rate(y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        rms = librosa.feature.rms(y=y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20, hop_length=HOP_LENGTH)

        zcr_list.append(zcr)
        rms_list.append(rms)
        mfccs_list.append(mfccs)

        emotion_list.append(encode(row.emotion))

        # augmentated preprocessing 
        y,_= preprocess_audio_aug(row.path)

        zcr = librosa.feature.zero_crossing_rate(y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        rms = librosa.feature.rms(y=y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20, hop_length=HOP_LENGTH)

        zcr_list.append(zcr)
        rms_list.append(rms)
        mfccs_list.append(mfccs)

        emotion_list.append(encode(row.emotion))


        # oversample augmentated preprocessing for fear and disgust
        if row.emotion == "fear" and np.random.rand() <= 0.5 or row.emotion == "disgust" and np.random.rand() <= 0.3:
            y,_= preprocess_audio_aug(row.path)

            zcr = librosa.feature.zero_crossing_rate(y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
            rms = librosa.feature.rms(y=y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20, hop_length=HOP_LENGTH)

            zcr_list.append(zcr)
            rms_list.append(rms)
            mfccs_list.append(mfccs)

            emotion_list.append(encode(row.emotion))

        # y,_= preprocess_audio(row.path)

        # zcr = librosa.feature.zero_crossing_rate(y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        # rms = librosa.feature.rms(y=y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        # mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20, hop_length=HOP_LENGTH)

        # zcr_list.append(zcr)
        # rms_list.append(rms)
        # mfccs_list.append(mfccs)

        # emotion_list.append(encode(row.emotion))
    except:
        print(f"Failed for path: {row.path}")

In [None]:
#Check shape
mfccs_list[7].shape

## Train test split

In [None]:
# combining the different features
X = np.concatenate((zcr_list,rms_list,mfccs_list),axis=1)
X = X.astype('float32')

y = np.asarray(emotion_list)
y = np.expand_dims(y, axis=1).astype('int8')

In [None]:
# save features
np.save('/X_Extract.npy', X)
np.save('/Y_Extract.npy', y)

In [None]:
# load features
X = np.load('/X_Extracted.npy')
y = np.load('/Y_Extracted.npy')

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras import layers, optimizers, callbacks

In [None]:
X_train, X_to_split, y_train, y_to_split = train_test_split(X, y, test_size=0.08, random_state=1)
X_val, X_test, y_val, y_test = train_test_split(X_to_split, y_to_split, test_size=0.3, random_state=1)

y_val_class = to_categorical(y_val, 6)
y_test_class = to_categorical(y_test, 6)

X_train.shape

## Metric setup


In [None]:
def summarize_history_accuracy(history):

  plt.figure(figsize=(12, 6))

  # Accuracy subplot
  plt.subplot(1, 2, 1)
  plt.plot(history.history['categorical_accuracy'])
  plt.plot(history.history['val_categorical_accuracy'])
  plt.title('Model Accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Validation'], loc='upper left')

  # Loss subplot
  plt.subplot(1, 2, 2)
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('Model Loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Validation'], loc='upper left')

  plt.tight_layout()
  plt.show()

from sklearn.metrics import confusion_matrix


def plot_confusion_matrix(xv, yv, MODEL):
    y_pred = np.argmax(MODEL.predict(xv), axis=1)
    labels = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust']
    cm = confusion_matrix(np.argmax(yv, axis=1), y_pred, labels=range(6))

    sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", xticklabels=labels, yticklabels=labels)
    plt.xlabel('Predicted label')
    plt.ylabel('True label')
    plt.title('Confusion matrix')
    plt.show()

    from tensorflow.keras.metrics import Precision, Recall, F1Score


def model_info(model, test_x, test_y):
    # Evaluate the model on validation data and get predictions
    loss, accuracy = model.evaluate(test_x, test_y)
    print("Validation Loss:", loss)
    print("Validation Accuracy:", accuracy)

    # Create metric objects
    precision = Precision(name='precision')
    recall = Recall(name='recall')
    f1_score = F1Score(name='f1_score')

    # Get model predictions
    y_pred = model.predict(test_x)

    # Update metrics with true labels and predictions
    precision.update_state(test_y, y_pred)
    recall.update_state(test_y, y_pred)
    f1_score.update_state(test_y, y_pred)

    # Print calculated metrics
    print("Precision:", precision.result().numpy())
    print("Recall:", recall.result().numpy())

    # Calculate and print F1-score for each class
    f1_scores = f1_score.result().numpy()
    emotion_labels = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust']  # Replace with your actual labels

    for i, emotion in enumerate(emotion_labels):
        print(f"F1-Score ({emotion}):", f1_scores[i])

In [None]:
# Callbacks
# Reduce learning rate when validation categorical accuracy does not improve for 'patience' epochs
# The learning rate will be reduced by a 'factor' of 0.5 (50% reduction)
rlrop = callbacks.ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=10, min_lr=1e-6, verbose=1)

# Early stopping to halt training when the validation loss doesn't improve for 'patience' epochs
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1)

## Audio test setup

In [None]:
path = '/male1_angry_1.wav'


def preprocess_audio(path):
    raw_audio, sr = librosa.load(path,sr=16000)
    raw_audio, _ = librosa.effects.trim(raw_audio, top_db=25, frame_length=256, hop_length=64)
    audio_duration=len(raw_audio)/sr
    if audio_duration > 4:
        raw_audio=raw_audio[:4*sr]
    else:
        raw_audio = np.pad(raw_audio, (0, (4*sr)-len(raw_audio)), 'constant')


    return raw_audio, sr



zcr_list = []
rms_list = []
mfccs_list = []
emotion_list = []

FRAME_LENGTH = 400
HOP_LENGTH = 160
sr=16000


y,_= preprocess_audio(path)
zcr = librosa.feature.zero_crossing_rate(y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
rms = librosa.feature.rms(y=y, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=25, hop_length=HOP_LENGTH)

zcr_list.append(zcr)
rms_list.append(rms)
mfccs_list.append(mfccs)
pda = np.concatenate((zcr_list,rms_list,mfccs_list),axis=1)
pda = pda.astype('float32')

# Model training 

## LSTM

In [None]:
# Initialize the model
MODEL = Sequential()
MODEL.add(layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1:])))
MODEL.add(layers.Dropout(0.5))  # Adding dropout for regularization
MODEL.add(layers.LSTM(64))
MODEL.add(layers.Dropout(0.5))  # Adding dropout for regularization
MODEL.add(layers.Dense(6, activation='softmax'))

# Compile the model with Adam optimizer
MODEL.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

print(MODEL.summary())

# Fit the model with the callbacks
history = MODEL.fit(X_train, y_train_class,
                    epochs=200,
                    batch_size=20,  # Increased batch size for smoother gradients
                    validation_data=(X_val, y_val_class),
                    callbacks=[rlrop, early_stopping])

In [None]:
summarize_history_accuracy(history)
plot_confusion_matrix(X_test, y_test_class, MODEL)
model_info(MODEL, X_test, y_test_class)
MODEL.save('/model_LSTM_1_.h5')

In [None]:
pr = MODEL.predict(pda)
print(pr)
emotion_labels = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust']
predicted_emotion = emotion_labels[np.argmax(pr)]
print(predicted_emotion)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

model_lstm = Sequential()
model_lstm.add(layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1:])))
model_lstm.add(layers.Dropout(0.5))  # Adding dropout for regularization
model_lstm.add(layers.LSTM(64, return_sequences=True))
model_lstm.add(layers.Dropout(0.5))  # Adding dropout for regularization
model_lstm.add(layers.LSTM(64, return_sequences=True))
model_lstm.add(layers.Dropout(0.5))  # Adding dropout for regularization
model_lstm.add(layers.LSTM(64))
model_lstm.add(layers.Dropout(0.5))  # Adding dropout for regularization

model_lstm.add(layers.Dense(128, activation='relu'))
model_lstm.add(layers.Dropout(0.5))  # Adding dropout for regularization
model_lstm.add(layers.Dense(64, activation='relu'))
model_lstm.add(layers.Dropout(0.5))  # Adding dropout for regularization
model_lstm.add(layers.Dense(6, activation='softmax'))

opt = optimizers.Adam(clipvalue=0.5)  # clipvalue to prevent exploding gradients
model_lstm.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])
print(model_lstm.summary())
history = model_lstm.fit(X_train, y_train_class,epochs=200, validation_data=(X_val, y_val_class), batch_size=64, callbacks=[rlrop, early_stopping])

In [None]:
summarize_history_accuracy(history)
plot_confusion_matrix(X_test, y_test_class, model_lstm)
model_info(model_lstm, X_test, y_test_class)
MODEL.save('/model_LSTM_2_.h5')

pr = model_lstm.predict(pda)
print(pr)
emotion_labels = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust']
predicted_emotion = emotion_labels[np.argmax(pr)]
print(predicted_emotion)

## GRU

In [None]:
from tensorflow import keras
from tensorflow.keras.layers import GRU, Dropout, Dense

model_GRU = keras.Sequential([
  keras.layers.Input(shape=X_train.shape[1:]),
  keras.layers.GRU(64, return_sequences=True, dropout=0.5),  # First GRU layer
  keras.layers.BatchNormalization(),  # Normalize activations before next layer

  keras.layers.GRU(32, return_sequences=True, dropout=0.5),  # Second GRU with fewer units

  # Additional GRU layers (optional)
  keras.layers.GRU(16, return_sequences=True, dropout=0.3),  # Third GRU with even fewer units
  keras.layers.GRU(8, dropout=0.3),  # Fourth GRU layer

  # Additional normal layers (optional)
  keras.layers.Dense(64, activation='relu'),  # Dense layer with ReLU activation
  keras.layers.Dropout(0.2),  # Dropout for regularization

  keras.layers.Dense(6, activation="softmax")  # Output layer
])

model_GRU.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['categorical_accuracy'])

print(model_GRU.summary())

# Fit the model with the callbacks
history_GRU = model_GRU.fit(X_train, y_train_class,
                    epochs=200,
                    batch_size=20,  # Increased batch size for smoother gradients
                    validation_data=(X_val, y_val_class),
                    callbacks=[rlrop, early_stopping])

In [None]:
summarize_history_accuracy(history_GRU)
plot_confusion_matrix(X_test, y_test_class, model_GRU )
model_info(model_GRU, X_test, y_test_class)

pr = model_GRU.predict(pda)
print(pr)
emotion_labels = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust']
predicted_emotion = emotion_labels[np.argmax(pr)]
print(predicted_emotion)

## Bi-LSTM

In [None]:
model_lstm_bi = Sequential([
  layers.Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(X_train.shape[1:])),
  layers.Dropout(0.5),
  layers.Bidirectional(layers.LSTM(32, return_sequences=True)),
  layers.Dropout(0.5),
  layers.Bidirectional(layers.LSTM(16)),
  layers.Dropout(0.5),
  layers.Dense(128, activation='relu'),  # Dense layer with ReLU activation
  layers.Dropout(0.2),
  layers.Dense(6, activation='softmax')  # Output layer
])


# Compile the model with Adam optimizer and gradient clipping
opt = optimizers.Adam(clipvalue=0.5)  # clipvalue to prevent exploding gradients
model_lstm_bi.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])


print(model_lstm_bi.summary())

# Fit the model with the callbacks
history1 = model_lstm_bi.fit(X_train, y_train_class,
                    epochs=200,
                    batch_size=32,  # Adjusted batch size
                    validation_data=(X_val, y_val_class),
                    callbacks=[rlrop, early_stopping])


In [None]:
summarize_history_accuracy(history1)
plot_confusion_matrix(X_test, y_test_class, model_lstm_bi)
model_info(model_lstm_bi, X_test, y_test_class)

pr = model_lstm_bi.predict(pda)
print(pr)
emotion_labels = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust']
predicted_emotion = emotion_labels[np.argmax(pr)]
print(predicted_emotion)

## CNN

In [None]:
import tensorflow.keras.layers as L

model_CNN = Sequential([
    L.Conv1D(512,kernel_size=5, strides=1,padding='same', activation='relu',input_shape=(X_train.shape[1:])),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=5,strides=2,padding='same'),

    L.Conv1D(512,kernel_size=5,strides=1,padding='same',activation='relu'),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=5,strides=2,padding='same'),
    L.Dropout(0.2),  # Add dropout layer after the second max pooling layer

    L.Conv1D(256,kernel_size=5,strides=1,padding='same',activation='relu'),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=5,strides=2,padding='same'),

    L.Conv1D(256,kernel_size=3,strides=1,padding='same',activation='relu'),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=5,strides=2,padding='same'),
    L.Dropout(0.2),  # Add dropout layer after the fourth max pooling layer

    L.Conv1D(128,kernel_size=3,strides=1,padding='same',activation='relu'),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=3,strides=2,padding='same'),
    L.Dropout(0.2),  # Add dropout layer after the fifth max pooling layer

    L.Flatten(),
    L.Dense(512,activation='relu'),
    L.BatchNormalization(),
    L.Dense(6,activation='softmax')
])

opt = optimizers.Adam(clipvalue=0.5)  # clipvalue to prevent exploding gradients
model_CNN.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

print(model_CNN.summary())

history_CNN = model_CNN.fit(X_train, y_train_class,epochs=200, validation_data=(X_val, y_val_class), batch_size=64, callbacks=[rlrop, early_stopping])


In [None]:
summarize_history_accuracy(history_CNN)
plot_confusion_matrix(X_test, y_test_class, model_CNN)
model_info(model_CNN,X_test, y_test_class)

pr = model_CNN.predict(pda)
print(pr)
emotion_labels = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust']
predicted_emotion = emotion_labels[np.argmax(pr)]
print(predicted_emotion)

## C-LSTM

In [None]:
import tensorflow.keras.layers as L

modelCLSTM = Sequential([
    L.Conv1D(1024, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(X_train.shape[1:])),
    L.MaxPooling1D(pool_size=2, strides = 2, padding = 'same'),
    L.BatchNormalization(),
    L.Dropout(0.3),

    L.Conv1D(512, kernel_size=5, strides=1, padding='same', activation='relu'),
    L.MaxPooling1D(pool_size=2, strides = 2, padding = 'same'),
    L.BatchNormalization(),
    L.Dropout(0.3),

    L.Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'),
    L.MaxPooling1D(pool_size=2, strides = 2, padding = 'same'),
    L.BatchNormalization(),
    L.Dropout(0.3),

    L.LSTM(128, return_sequences=True),
    L.Dropout(0.3),

    L.LSTM(128, return_sequences=True),
    L.Dropout(0.3),
    L.LSTM(128),
    L.Dropout(0.3),

    L.Dense(128, activation='relu'),
    #L.Dropout(0.3),

    L.Dense(64, activation='relu'),
    #L.Dropout(0.3),

    L.Dense(32, activation='relu'),
    #L.Dropout(0.3),

    L.Dense(6, activation='softmax')
])


opt = optimizers.Adam(clipvalue=0.5)  # clipvalue to prevent exploding gradients
modelCLSTM.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

# Callbacks with adjusted ReduceLROnPlateau parameters for more aggressive reduction
# rlrop = callbacks.ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.5, patience=10, min_lr=1e-6, verbose=1)
# early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1)

print(modelCLSTM.summary())

#history_CLSTM = modelCLSTM.fit(X_train, y_train_class,epochs=200, validation_data=(X_val, y_val_class), batch_size=64, callbacks=[rlrop, early_stopping])

history_CLSTM = modelCLSTM.fit(X_train, y_train_class,epochs=200, validation_data=(X_to_split, y_to_splitt), batch_size=64, callbacks=[rlrop, early_stopping])

In [None]:
summarize_history_accuracy(history_CLSTM)
plot_confusion_matrix(X_test, y_test_class, modelCLSTM )
model_info(modelCLSTM, X_test, y_test_class)

pr = modelCLSTM.predict(pda)
print(pr)
emotion_labels = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust']
predicted_emotion = emotion_labels[np.argmax(pr)]
print(predicted_emotion)