In [None]:
!pip install kaggle


In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
!kaggle datasets download ejlok1/toronto-emotional-speech-set-tess



In [None]:
!unzip toronto-emotional-speech-set-tess.zip

In [None]:
import os

paths = []
labels = []

for dirname, _, filenames in os.walk('/content/TESS Toronto emotional speech set data'):
    for filename in filenames:
        paths.append(os.path.join(dirname, filename))
        label = filename.split('_')[-1]
        label = label.split('.')[0]
        labels.append(label.lower())
        if len(paths) == 2800:
            break

print('Dataset is Loaded')

In [None]:
len(paths)

In [None]:
paths[:5]

In [None]:
import pandas as pd

## Create a dataframe
df = pd.DataFrame()
df['audio_paths'] = paths
df['labels'] = labels
display(df.head())

In [None]:
df['labels'].value_counts()

In [None]:
import seaborn as sns
sns.countplot(data=df,x='labels')

In [None]:
def waveplot(data, sr, emotion):
    plt.figure(figsize=(10, 4))
    plt.title(emotion, size=20)
    librosa.display.waveshow(data, sr=sr)
    plt.show()

def spectogram(data, sr, emotion):
    x = librosa.stft(data)
    xdb = librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(11, 4))
    plt.title(emotion, size=20)
    librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()


In [None]:
print(df['labels'].unique())
print(df['labels'].dtype)


In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
from IPython.display import Audio
import numpy as np

emotion = 'fear'
path = np.array(df['audio_paths'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'disgust'
path = np.array(df['audio_paths'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)


In [None]:
emotion = 'neutral'
path = np.array(df['audio_paths'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)


In [None]:
emotion = 'ps'
path = np.array(df['audio_paths'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path)
waveplot(data, sampling_rate, emotion)
spectogram(data, sampling_rate, emotion)
Audio(path)


In [None]:
emotion = 'ps'
path = np.array(df['audio_paths'][df['labels'] == emotion])[0]
data, sampling_rate = librosa.load(path)
len(data)
sampling_rate

In [None]:
import librosa
import numpy as np

def extract_features(file_path):
    data, sample_rate = librosa.load(file_path, res_type='kaiser_fast', duration=3, sr=22050*2, offset=0.5)
    mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40).T, axis=0)
    return mfccs


In [None]:
import os
import pandas as pd
import librosa
import numpy as np

def extract_features(file_path):
    try:
        data, sample_rate = librosa.load(file_path, duration=3, sr=22050*2, offset=0.5) # Removed res_type='kaiser_fast'
        mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40).T, axis=0)
        return mfccs
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

features, emotions = [], []
base_path = "/content/TESS Toronto emotional speech set data" # Corrected path

for dirpath, _, filenames in os.walk(base_path):
    for file in filenames:
        if file.endswith('.wav'):
            emotion = file.split('_')[-1].split('.')[0]
            file_path = os.path.join(dirpath, file)
            print(f"Processing: {file_path}") # Added print statement
            data = extract_features(file_path)
            if data is not None:
                features.append(data)
                emotions.append(emotion)
            else:
                print(f"Skipping file due to extraction error: {file_path}") # Added print statement

print(f"Number of features extracted: {len(features)}") # Added print statement

df = pd.DataFrame(features)
df['emotion'] = emotions

if not df.empty:
    display(df.head())
else:
    print("DataFrame is empty after feature extraction.") # Added print statement

In [None]:
!pip install resampy

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

X = df.drop('emotion', axis=1)
y = df['emotion']

encoder = LabelEncoder()
y = encoder.fit_transform(y)
y = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
!pip install --upgrade librosa resampy

In [None]:
import resampy

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
    Dense(256, activation='relu', input_shape=(40,)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(y.shape[1], activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))


In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.legend()
plt.show()

test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.2f}")


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np

y_pred = np.argmax(model.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)

cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=encoder.classes_)
disp.plot(cmap='Blues', xticks_rotation=45)
plt.show()


In [None]:
# Plot training and validation loss
plt.subplot(1, 1, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
import pickle

# Save encoder
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(encoder, f)

print("Label encoder saved successfully as 'label_encoder.pkl'")
