In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)  # forcibly remounting for clarity

In [None]:
dataset_path = '/content/drive/MyDrive/Datasets/TESS'
paths = []
labels = []
for dirname, _, filenames in os.walk(dataset_path):
    for filename in filenames:
        paths.append(os.path.join(dirname, filename))
        label = filename.split('_')[-1]
        label = label.split('.')[0]
        labels.append(label.lower())
    if len(paths) == 2800:
        break
print('Dataset is Loaded')

In [None]:
## Create a dataframe
df = pd.DataFrame()
df['speech'] = paths
df['label'] = labels
df.head()

In [None]:
df['label']

In [None]:
df['label'].value_counts()

In [None]:
sns.countplot(data=df, x='label')

In [None]:
def waveplot(data, sr, emotion):
    plt.figure(figsize=(10,4))
    plt.title(emotion, size=20)
    librosa.display.waveshow(data, sr=sr)
    plt.show()

def spectogram(data, sr, emotion):
    x = librosa.stft(data)
    xdb = librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(11,4))
    plt.title(emotion, size=20)
    librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()

In [None]:
emotion = 'fear'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc

def extract_mel(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
    return mel

def extract_chroma(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
    return chroma

In [None]:
def extract_features(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128).T, axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr, n_chroma=12).T, axis=0)
    return np.concatenate((mfcc, mel, chroma))



In [None]:
extract_mfcc(df['speech'][0])

In [None]:
extract_mel(df['speech'][0])

In [None]:
extract_chroma(df['speech'][0])

In [None]:
# X_mfcc = df['speech'].apply(lambda x: extract_mfcc(x))
X_features = df['speech'].apply(lambda x: extract_features(x))

In [None]:
# X_mfcc
X_features

In [None]:
# X = [x for x in X_mfcc]
# X = np.array(X)
# X.shape
X = [x for x in X_features]
X = np.array(X)
X.shape

In [None]:
## input split
X = np.expand_dims(X, -1)
X.shape

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from keras.utils import to_categorical


# Encode the labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(df['label'])

# Convert integer encoded labels to one-hot encoded labels
y = to_categorical(y_encoded)

In [None]:
y.shape

In [None]:
# Save the encoder object
import pickle

encoder_path = '/content/drive/MyDrive/Datasets/new_label_encoder.pkl'  # Define the path to save the encoder file
with open(encoder_path, 'wb') as f:
    pickle.dump(label_encoder, f)

In [None]:
# # Create a dictionary to map encoded labels to emotions
# encoded_emotions = dict(zip(range(len(label_encoder.classes_)), label_encoder.classes_))

# # Print the encoded labels and their corresponding emotions
# for encoded_label, emotion in encoded_emotions.items():
#     print(f"{encoded_label}: {emotion}")


In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import ModelCheckpoint
import numpy as np

best_accuracy = 0
best_epochs = 0
best_batch_size = 0

# Define ranges for epochs and batch size
epoch_range = range(50, 101, 10)
batch_size_range = range(16, 41, 4)

for epochs in epoch_range:
    for batch_size in batch_size_range:
        print(f"Training with epochs={epochs} and batch_size={batch_size}...")

        # Define the model
        model = Sequential([
            LSTM(256, return_sequences=False, input_shape=(X.shape[1], X.shape[2])),
            Dropout(0.2),
            Dense(128, activation='relu'),
            Dropout(0.2),
            Dense(64, activation='relu'),
            Dropout(0.2),
            Dense(7, activation='softmax')
        ])

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        # Create a checkpoint to save the best model based on validation accuracy
        checkpoint = ModelCheckpoint('/content/drive/MyDrive/Datasets/new_best_model.h5', monitor='val_accuracy', mode='max', verbose=0, save_best_only=True)

        # Train the model with checkpoint callback
        history = model.fit(X, y, validation_split=0.2, epochs=epochs, batch_size=batch_size, callbacks=[checkpoint], verbose=0)

        # Evaluate model
        val_accuracy = max(history.history['val_accuracy'])
        print(f"Validation accuracy: {val_accuracy}")

        # Check if this is the best accuracy so far
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_epochs = epochs
            best_batch_size = batch_size

print(f"Best validation accuracy: {best_accuracy}")
print(f"Best epochs: {best_epochs}")
print(f"Best batch size: {best_batch_size}")


In [None]:
# from keras.models import Sequential
# from keras.layers import Dense, LSTM, Dropout
# from keras.callbacks import ModelCheckpoint

# model = Sequential([
#     LSTM(256, return_sequences=False, input_shape=(X.shape[1], X.shape[2])),   #(40,1)
#     Dropout(0.2),
#     Dense(128, activation='relu'),
#     Dropout(0.2),
#     Dense(64, activation='relu'),
#     Dropout(0.2),
#     Dense(7, activation='softmax')
# ])

# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.summary()

# # Create a checkpoint to save the best model based on validation accuracy
# checkpoint = ModelCheckpoint('/content/drive/MyDrive/Datasets/newbest_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

# # Train the model with checkpoint callback
# history = model.fit(X, y, validation_split=0.2, epochs=50, batch_size=40, callbacks=[checkpoint])


In [None]:
# # Print information about the saved model
# print("Best model training completed.")
# print("Accuracy:", history.history['accuracy'])
# print("Validation Accuracy:", history.history['val_accuracy'])
# print("Loss:", history.history['loss'])
# print("Validation Loss:", history.history['val_loss'])
# print("Label encoder saved as label_encoder.pkl.")


In [None]:
import numpy as np
import librosa
from keras.models import load_model
import pickle

# Load the saved model
saved_model_path = '/content/drive/MyDrive/Datasets/new_best_model.h5'
loaded_model = load_model(saved_model_path)

# Load the encoder
encoder_path = '/content/drive/MyDrive/Datasets/new_label_encoder.pkl'
with open(encoder_path, 'rb') as f:
    label_encoder = pickle.load(f)

# Create a dictionary to map encoded labels to emotions
encoded_emotion = dict(zip(range(len(label_encoder.classes_)), label_encoder.classes_))

# Print the encoded labels and their corresponding emotions
for encoded_label, emotion in encoded_emotion.items():
    print(f"{encoded_label}: {emotion}")

# Define a function to extract MFCC, Mel, and Chroma features from an audio file
def extract_features(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
    return np.concatenate((mfcc, mel, chroma))

# Define a function to predict emotion from an audio file
def predict_emotion(audio_file):
    # Extract features from the audio file
    features = extract_features(audio_file)
    # Reshape the features for model input
    features = np.expand_dims(features, axis=0)
    features = np.expand_dims(features, axis=-1)
    # Predict the emotion using the loaded model
    predicted_probabilities = loaded_model.predict(features)
    # Get the predicted emotion label index
    predicted_emotion_index = np.argmax(predicted_probabilities)
    # Decode the predicted emotion label
    predicted_emotion_label = label_encoder.classes_[predicted_emotion_index]
    return predicted_emotion_label


In [None]:

# # Define the path to the audio file you want to test
# audio_file_path = '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/OAF_happy/OAF_wife_happy.wav'

# # Predict the emotion from the audio file
# predicted_emotion = predict_emotion(audio_file_path)

# print("Predicted Emotion:", predicted_emotion)

# # Define the paths to the audio files you want to test
# audio_files = [
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/YAF_sad/YAF_white_sad.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/YAF_pleasant_surprised/YAF_yearn_ps.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/YAF_neutral/YAF_white_neutral.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/YAF_happy/YAF_white_happy.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/YAF_fear/YAF_wag_fear.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/YAF_disgust/YAF_wire_disgust.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/YAF_angry/YAF_wife_angry.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/OAF_Sad/OAF_whip_sad.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/OAF_Pleasant_surprise/OAF_week_ps.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/OAF_neutral/OAF_when_neutral.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/OAF_happy/OAF_wife_happy.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/OAF_Fear/OAF_young_fear.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/OAF_disgust/OAF_wife_disgust.wav',
#     '/content/drive/MyDrive/Datasets/TESS Toronto emotional speech set data/OAF_angry/OAF_wife_angry.wav',

# ]

# # Define the ground truth emotions for each audio file
# ground_truth_emotions = [
#     'sad',
#     'ps',
#     'neutral',
#     'happy',
#     'fear',
#     'disgust',
#     'angry',
#     'sad',
#     'ps',
#     'natural',
#     'happy',
#     'fear',
#     'disgust',
#     'angry'

# ]

# # Calculate the validation accuracy of predicted emotions
# total_samples = len(audio_files)
# correct_predictions = 0

# for audio_file, true_emotion in zip(audio_files, ground_truth_emotions):
#     predicted_emotion = predict_emotion(audio_file)
#     print(f"Predicted Emotion: {predicted_emotion}")
#     if predicted_emotion == true_emotion:
#         correct_predictions += 1
# print(f"Correct Predictions: {correct_predictions}")
# validation_accuracy = (correct_predictions / total_samples) * 100
# print(f"Validation Accuracy: {validation_accuracy}%")
