In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets list

In [None]:
! kaggle datasets download -d ejlok1/toronto-emotional-speech-set-tess

In [None]:
! unzip toronto-emotional-speech-set-tess.zip

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display  import Audio
import warnings
warnings.filterwarnings('ignore')
os.environ['CUDA_DEVICE_ORDER']="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES']="0"

import tensorflow as tf
device_name = tf.test.gpu_device_name()
if len(device_name) > 0:
    print("Found GPU at: {}".format(device_name))
else:
    device_name = "/device:CPU:0"
    print("No GPU, using {}.".format(device_name))

# **LOADING DATASET**

In [None]:
paths = []
labels = []
for dirname, _, filenames in os.walk('/content/TESS Toronto emotional speech set data'):
    for filename in filenames:
        paths.append(os.path.join(dirname, filename))
        label = filename.split('_')[-1]
        sas = filename.split('/')[-1].split('A')[0]
        if sas == 'Y':
            sas = 'Female_'
        else:
            sas = 'Male_'
        label = label.split('.')[0]
        label = sas + label
        labels.append(label.lower())
        print(label)
    
print('Dataset is Loaded')

In [None]:
len(paths)

In [None]:
paths[:5]

In [None]:
labels[:5]

In [None]:
## Create a dataframe
df = pd.DataFrame()
df['speech'] = paths
df['label'] = labels
df.head()

In [None]:
df['label'].value_counts()

# **Exploratory Data Analysis**

In [None]:
def waveplot(data, sr, emotion):
    plt.figure(figsize=(10,4))
    plt.title(emotion, size=20)
    librosa.display.waveplot(data, sr=sr)
    plt.show()
    
def spectogram(data, sr, emotion):
    x = librosa.stft(data)
    xdb = librosa.amplitude_to_db(abs(x))
    plt.figure(figsize=(11,4))
    plt.title(emotion, size=20)
    librosa.display.specshow(xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()

In [None]:
emotion = 'female_fear'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)


In [None]:
emotion = 'male_fear'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'female_angry'
path = np.array(df['speech'][df['label']==emotion])[1]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'male_angry'
path = np.array(df['speech'][df['label']==emotion])[1]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'female_disgust'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'male_disgust'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'female_neutral'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'male_neutral'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'female_sad'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'male_sad'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'female_ps'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'male_ps'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'female_happy'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

In [None]:
emotion = 'male_happy'
path = np.array(df['speech'][df['label']==emotion])[0]
data, sampling_rate = librosa.load(path)
librosa.display.waveshow(data,sr=sampling_rate)
spectogram(data, sampling_rate, emotion)
Audio(path)

# **FEATURE EXTRACTION**

In [None]:
def extract_mfcc(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    return mfcc

In [None]:
extract_mfcc(df['speech'][0])

In [None]:
X_mfcc = df['speech'].apply(lambda x: extract_mfcc(x))

In [None]:
X_mfcc

In [None]:
X = [x for x in X_mfcc]
X = np.array(X)
X.shape

In [None]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
y = enc.fit_transform(df[['label']])

In [None]:
y = y.toarray()
y.shape

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

model = Sequential([
    LSTM(256, return_sequences=False, input_shape=(40,1)),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(14, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X, y, validation_split=0.2, epochs=100, batch_size=64)

In [None]:
epochs = list(range(100))
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
plt.plot(epochs, acc, label='train accuracy')
plt.plot(epochs, val_acc, label='val accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(epochs, loss, label='train loss')
plt.plot(epochs, val_loss, label='val loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

In [None]:
list1 = df['label']
print(list1)
from sklearn.model_selection import train_test_split
#Xtrain, Ytrain, Xtest, Ytest = train_test_split(X, y, test_size= 0.3, random_state = 0)

#Abhinav
Xtrain, Xtest,YTrain, Ytest = train_test_split(X, y, test_size= 0.3, random_state = 0)

Xtrain.shape, Xtest.shape