## Import libraries

In [None]:
import os
import librosa
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style('whitegrid')
import IPython.display as ipd
import librosa.display
import numpy as np
import cv2
import pickle

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering

from tqdm.notebook import tqdm

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, AveragePooling1D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

# from livelossplot import PlotLossesKeras
tf.config.list_physical_devices('GPU')

## Get data from datasets:

In [None]:
main_path = 'D://emotion_recognition_data'
TESS = os.path.join(main_path, "data3/")
RAV = os.path.join(main_path, "data/")
SAVEE = os.path.join(main_path, "data2/")

dir_list = os.listdir(SAVEE)
dir_list[0:5]

### RAVDESS dataset

In [None]:
lst = []
emotion = []
voc_channel = []
full_path = []
modality = []
intensity = []
actors = []
phrase =[]

for root, dirs, files in tqdm(os.walk(RAV)):
    for file in files:
        try:
            #Load librosa array, obtain mfcss, store the file and the mfcss information in a new array
            # X, sample_rate = librosa.load(os.path.join(root,file), res_type='kaiser_fast')
            # mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0) 
            # The instruction below converts the labels (from 1 to 8) to a series from 0 to 7
            # This is because our predictor needs to start from 0 otherwise it will try to predict also 0.
           
            modal = int(file[1:2])
            vchan = int(file[4:5])
            lab = int(file[7:8])
            ints = int(file[10:11])
            phr = int(file[13:14])
            act = int(file[19:20])
            
            # arr = mfccs, lab
            # lst.append(arr)
            
            modality.append(modal)
            voc_channel.append(vchan)
            emotion.append(lab) #only labels
            intensity.append(ints)
            phrase.append(phr)
            actors.append(act)
            
            full_path.append((root, file)) # only files
          # If the file is not valid, skip it
        except ValueError:
            continue

In [None]:
# 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised
emotions_list = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']
emotion_dict = {em[0]+1:em[1] for em in enumerate(emotions_list)}

df = pd.DataFrame([emotion, voc_channel, modality, intensity, actors, phrase, full_path]).T
df.columns = ['emotion', 'voc_channel', 'modality', 'intensity', 'actors', 'phrase', 'path']
df['emotion'] = df['emotion'].map(emotion_dict)
df['voc_channel'] = df['voc_channel'].map({1: 'speech', 2:'song'})
df['modality'] = df['modality'].map({1: 'full AV', 2:'video only', 3:'audio only'})
df['intensity'] = df['intensity'].map({1: 'normal', 2:'strong'})
df['actors'] = df['actors'].apply(lambda x: 'female' if x%2 == 0 else 'male')
df['phrase'] = df['phrase'].map({1: 'Kids are talking by the door', 2:'Dogs are sitting by the door'})
df['path'] = df['path'].apply(lambda x: x[0] + '/' + x[1])


In [None]:
df.head()

### SAVEE

In [None]:
# Get the data location for SAVEE
dir_list = os.listdir(SAVEE)

# parse the filename to get the emotions
emotion=[]
path = []
for i in dir_list:
    if i[-8:-6]=='_a':
        emotion.append('angry_male')
    elif i[-8:-6]=='_d':
        emotion.append('disgust_male')
    elif i[-8:-6]=='_f':
        emotion.append('fear_male')
    elif i[-8:-6]=='_h':
        emotion.append('happy_male')
    elif i[-8:-6]=='_n':
        emotion.append('neutral_male')
    elif i[-8:-6]=='sa':
        emotion.append('sad_male')
    elif i[-8:-6]=='su':
        emotion.append('surprise_male')
    else:
        emotion.append('Unknown') 
    path.append(SAVEE + i)
    
# Now check out the label count distribution 
SAVEE_df = pd.DataFrame(emotion, columns = ['emotion_label'])
SAVEE_df['source'] = 'SAVEE'
SAVEE_df = pd.concat([SAVEE_df, pd.DataFrame(path, columns = ['path'])], axis = 1)
SAVEE_df.emotion_label.value_counts()

In [None]:
SAVEE_df.head()

### TESS dataset

In [None]:
dir_list = os.listdir(TESS)
dir_list.sort()
dir_list

path = []
emotion = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry' or i == 'YAF_angry':
            emotion.append('angry_female')
        elif i == 'OAF_disgust' or i == 'YAF_disgust':
            emotion.append('disgust_female')
        elif i == 'OAF_Fear' or i == 'YAF_fear':
            emotion.append('fear_female')
        elif i == 'OAF_happy' or i == 'YAF_happy':
            emotion.append('happy_female')
        elif i == 'OAF_neutral' or i == 'YAF_neutral':
            emotion.append('neutral_female')                                
        elif i == 'OAF_Pleasant_surprise' or i == 'YAF_pleasant_surprised':
            emotion.append('surprise_female')               
        elif i == 'OAF_Sad' or i == 'YAF_sad':
            emotion.append('sad_female')
        else:
            emotion.append('Unknown')
        path.append(TESS + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['emotion_label'])
TESS_df['source'] = 'TESS'
TESS_df = pd.concat([TESS_df,pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.emotion_label.value_counts()

In [None]:
TESS_df.head()

### Combining the datasets:

In [None]:
RAV_df = df.copy()

# only speech
RAV_df = RAV_df.loc[RAV_df.voc_channel == 'speech']

RAV_df.insert(0, "emotion_label", RAV_df.emotion+'_'+RAV_df.actors, True)
RAV_df.insert(1, "source", "RAV", True)
RAV_df = RAV_df.drop(['emotion', 'voc_channel', 'modality', 'intensity', 'phrase'], 1)
RAV_df.head()

In [None]:
TESS_df.insert(2, "actors", "female", True)
SAVEE_df.insert(2, "actors", "male", True)

In [None]:
SAVEE_df['emotion_label'].unique()

In [None]:
df_combined = pd.concat([TESS_df, RAV_df, SAVEE_df])
df_combined.info()

In [None]:
for col in ['emotion_label', 'source', 'actors']:
    print('\nColumn values for ', col.upper())
    print(df_combined[col].value_counts())

In [None]:
new_labels_dict_comb = {'angry_male':'negative_male', 'angry_female':'negative_female', 
                        'calm_male':'neutral_male', 'calm_female':'neutral_female',
                        'disgust_male':'negative_male', 'disgust_female':'negative_female', 
                        'fearful_male':'negative_male','fearful_female':'negative_female',
                        'fear_male':'negative_male', 'fear_female':'negative_female',
                        'happy_male':'positive_male', 'happy_female':'positive_female',
                        'neutral_male':'neutral_male', 'neutral_female':'neutral_female',
                        'sad_male':'negative_male', 'sad_female':'negative_female',
                        'surprised_male':'positive_male', 'surprised_female':'positive_female',
                        'surprise_male':'positive_male', 'surprise_female':'positive_female',
                        'Unknown': 'unk'}

df_combined['emotion2'] = df_combined['emotion_label'].map(new_labels_dict_comb)
df_combined.head()

In [None]:
addit_labels_dict_comb = {'angry_male':'angry', 'angry_female':'angry', 
                        'calm_male':'neutral', 'calm_female':'neutral',
                        'disgust_male':'negative', 'disgust_female':'negative', 
                        'fearful_male':'fear','fearful_female':'fear',
                        'fear_male':'fear', 'fear_female':'fear',
                        'happy_male':'positive', 'happy_female':'positive',
                        'neutral_male':'neutral', 'neutral_female':'neutral',
                        'sad_male':'sadness', 'sad_female':'sadness',
                        'surprised_male':'surprise', 'surprised_female':'surprise',
                        'surprise_male':'surprise', 'surprise_female':'surprise',
                        'Unknown': 'unk'}

df_combined['emotion3'] = df_combined['emotion_label'].map(addit_labels_dict_comb)
df_combined.head()

In [None]:
df_combined.emotion2.value_counts()

In [None]:
df_combined.emotion3.value_counts()

In [None]:
df_combined = df_combined.loc[df_combined['emotion2'] != 'unk']

### Save the data:

In [None]:
df_combined.to_csv(os.path.join(main_path,"combined.csv"), index=False)

### (Using the combined DF, MFCC's and column 'emotion2' as target)

In [None]:
import pickle

df_path = 'D://emotion_recognition_data//combined.csv'
# mfccs_path = 'd://itc//final_project//mfccs.pickle'
# new_y_path = 'd://itc//final_project//y.pickle'

# with open('d://downloads//y.pickle', 'wb') as f:
#     pickle.dump(new_y, f)
    
# with open('d://downloads//mfccs.pickle', 'wb') as f:
#     pickle.dump(mfccs, f)

mydf = pd.read_csv(df_path)

# with open(mfccs_path, 'rb') as f:
#     mfccs = pickle.load(f)

# with open(new_y_path, 'rb') as f:
#     new_y = pickle.load(f)

### EDA

In [None]:
mydf.head()

In [None]:
mydf.info()

In [None]:
ind = np.random.randint(0,len(mydf))
data, sampling_rate = librosa.load(mydf['path'][ind], sr=44100)
emotion = mydf['emotion2'][ind]

plt.title(f'Sound wave of- {emotion}')
librosa.display.waveplot(data, sampling_rate)
plt.show()

#### Linear-scale spectrum

In [None]:
D = np.abs(librosa.stft(data))
librosa.display.specshow(D, sr=sampling_rate, x_axis='time', y_axis='linear');
plt.colorbar()
plt.show()

#### Log-scale Spectrogram

In [None]:
DB = librosa.amplitude_to_db(D, ref=np.max)
librosa.display.specshow(DB, sr=sampling_rate, x_axis='time', y_axis='log');
plt.colorbar(format='%+2.0f db')
plt.show()

#### Log-scale spectrum

In [None]:
a = plt.magnitude_spectrum(data, scale='dB')
plt.show()

In [None]:
a1 = np.log(a[0])
a2 = a[1]

In [None]:
plt.plot(a2,a1)
plt.show()

In [None]:
def hl_envelopes_idx(s, dmin=1, dmax=1, split=False):
    """
    Input :
    s: 1d-array, data signal from which to extract high and low envelopes
    dmin, dmax: int, optional, size of chunks, use this if the size of the input signal is too big
    split: bool, optional, if True, split the signal in half along its mean, might help to generate the envelope in some cases
    Output :
    lmin,lmax : high/low envelope idx of input signal s
    """

    # locals min      
    lmin = (np.diff(np.sign(np.diff(s))) > 0).nonzero()[0] + 1 
    # locals max
    lmax = (np.diff(np.sign(np.diff(s))) < 0).nonzero()[0] + 1 
    

    if split:
        # s_mid is zero if s centered around x-axis or more generally mean of signal
        s_mid = np.mean(s) 
        # pre-sorting of locals min based on relative position with respect to s_mid 
        lmin = lmin[s[lmin]<s_mid]
        # pre-sorting of local max based on relative position with respect to s_mid 
        lmax = lmax[s[lmax]>s_mid]


    # global max of dmax-chunks of locals max 
    lmin = lmin[[i+np.argmin(s[lmin[i:i+dmin]]) for i in range(0,len(lmin),dmin)]]
    # global min of dmin-chunks of locals min 
    lmax = lmax[[i+np.argmax(s[lmax[i:i+dmax]]) for i in range(0,len(lmax),dmax)]]
    
    return lmin,lmax

In [None]:
data.shape

In [None]:
high_idx, _ = hl_envelopes_idx(data[:len(a1)], dmin=250)

# plot
plt.plot(a2[high_idx], a1[high_idx], 'b', label='low')
plt.show()

### Create DF from MFCC's and 'emotion2' columns as labels

In [None]:
new_y = mydf['emotion2'].copy()

In [None]:
X = []
for i in tqdm(mydf['path']):
    X.append(librosa.load(i, res_type='kaiser_fast', sr=44000))

In [None]:
new_x = []
for ind,i in enumerate(X):
    new_x.append(i[0])

In [None]:
lengths = [len(x) for x in new_x]

plt.title('Lengths distribution')
sns.boxplot(lengths)
plt.show()

In [None]:
thresh = 300000

In [None]:
lengths = np.array(lengths)
print((lengths > thresh).sum())
new_lengths = lengths[lengths < thresh]

sns.boxplot(new_lengths)
plt.show()

In [None]:
new_lengths.mean()

In [None]:
length_chosen = 120378

In [None]:
import math

X_new = []
y_new = []
for ind,i in enumerate(new_x):
    if i.shape[0] < 300000:
        if i.shape[0] > length_chosen:
            new = i[:length_chosen]
            X_new.append(new)
        elif i.shape[0] < length_chosen:
            new = np.pad(i,math.ceil((length_chosen-i.shape[0])/2), mode='median')
            X_new.append(new)
        else:
            X_new.append(i)
            
        y_new.append(new_y[ind])
            

In [None]:
X = np.array(X_new)
y = np.array(y_new)
print(X.shape, y.shape)

In [None]:
mfccs = []
for i in tqdm(X):
    mfcc = librosa.feature.mfcc(y=i, sr=44000, n_mfcc=20)
    mfcc = mfcc.T
    mfccs.append(mfcc)


In [None]:
mfccs = np.array(mfccs)

In [None]:
mydf.shape, mfccs.shape, y.shape

### Save the data:

In [None]:
mfccs_path = 'd://ITC//final_project//mfccs.pickle'
y_path = 'd://ITC//final_project//y.pickle'

with open(mfccs_path, 'wb') as f:
    pickle.dump(mfccs,f)
    
with open(y_path, 'wb') as f:
    pickle.dump(y,f)

# BASELINE MODEL

### At this point, we can perform a normal classification, using our mfccs coefficients as our features.

In [None]:
# random:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(mfccs, y, test_size=0.20)
X_train.shape, X_test.shape, y_train.shape, y_test.shape


### We'll change the values in our target variable and expand the dimension of our features to fit the neural networks.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense

model = Sequential(
    [
     layers.Conv1D(64, 3, activation='relu', input_shape=(236,20)),
     layers.MaxPooling1D(),
     layers.Conv1D(64, 3, activation='relu'),
     layers.MaxPooling1D(),
     layers.Conv1D(64, 3, activation='relu'),
     layers.MaxPooling1D(),
     layers.Flatten(),
     layers.Dense(64, activation="relu"),
     layers.Dense(6, activation="softmax")
    ]
)


In [None]:
model.summary()

In [None]:
model.input_shape, model.output_shape

### Encoding the labels:

In [None]:
set(y)

In [None]:
emotions_encode = {'negative_female':0, 'negative_male':1, 'neutral_female':2, 'neutral_male':3,
                  'positive_female':4, 'positive_male':5}

In [None]:
y_train = pd.Series(y_train).map(emotions_encode)
y_test = pd.Series(y_test).map(emotions_encode)

In [None]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics='accuracy')
model.fit(X_train, y_train, batch_size=16, epochs=50, validation_data=(X_test, y_test))

In [None]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_test)
y_pred = [np.argmax(x) for x in y_pred]
print(classification_report(y_test, y_pred, target_names = list(emotions_encode.keys())))

In [None]:
mydf['emotion2'].unique()

In [None]:
import tensorflow as tf

model2 = Sequential()

model2.add(layers.Conv1D(256, 5,padding='same',
                 input_shape=(236,20)))
model2.add(layers.Activation('relu'))
model2.add(layers.MaxPooling1D(pool_size=(8)))
model2.add(layers.Dropout(0.1))

model2.add(layers.Conv1D(128, 5,padding='same'))
model2.add(layers.Activation('relu'))
model2.add(layers.Dropout(0.1))

model2.add(layers.Flatten())
model2.add(layers.Dense(6))
model2.add(layers.Activation('softmax'))

model2.summary()


In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.00001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=40, 
                                              verbose=1)

weight_path = 'd://ITC//final_project//best_weights.hdf5'
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=weight_path, 
                                                      save_weights_only=True, 
                                                      monitor='val_accuracy', 
                                                      mode='max', 
                                                      save_best_only=True)


In [None]:
model2.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics='accuracy')
model2.fit(X_train, y_train, batch_size=16, epochs=500, validation_data=(X_test, y_test),
           callbacks=[reduce_lr, early_stop, model_checkpoint])

In [None]:
model2.load_weights('d://ITC//final_project//best_weights.hdf5')

In [None]:
y_pred = model2.predict(X_test)
y_pred = [np.argmax(x) for x in y_pred]
print(classification_report(y_test, y_pred, target_names = list(emotions_encode.keys())))

## Classifiy emotions- first we'll need to create new MFCC's and target arrays:

In [None]:
mydf['emotion3'].unique()

In [None]:
# mydf['emotion3'].replace(['fear_female', 'fear_male'], 'fear', inplace=True)
# mydf['emotion3'].replace(['surprise_female', 'surprise_male'], 'surprise', inplace=True)
# mydf['emotion3'].replace(['sad_female', 'sad_male'], 'sadness', inplace=True)
# mydf['emotion3'].replace(['negative_female', 'negative_male'], 'negetive', inplace=True)
# mydf['emotion3'].replace(['positive_female', 'positive_male'], 'positive', inplace=True)
# mydf['emotion3'].replace(['neutral_female', 'neutral_male'], 'neutral', inplace=True)

In [None]:
plt.title('Emotions distribution')
plt.hist(mydf['emotion3'])
# plt.hist(y)
plt.show()

In [None]:
y = mydf['emotion3'].copy()

In [None]:
mydf.head()

In [None]:
y.shape

In [None]:
X = []
for i in tqdm(mydf['path']):
    X.append(librosa.load(i, res_type='kaiser_fast', sr=44000))

In [None]:
new_x = []
for ind,i in enumerate(X):
    new_x.append(i[0])

In [None]:
lengths = [len(x) for x in new_x]

In [None]:
sns.boxplot(lengths)
plt.show()

In [None]:
lengths = np.array(lengths)
print((lengths > 300000).sum())
new_lengths = lengths[lengths < 300000]

sns.boxplot(new_lengths)
plt.show()

In [None]:
print(new_lengths.mean())

In [None]:
length_chosen = 120378

In [None]:
import math

X_new = []
y_new = []
for ind,i in enumerate(new_x):
    if i.shape[0] < 300000:
        if i.shape[0] > length_chosen:
            new = i[:length_chosen]
            X_new.append(new)
        elif i.shape[0] < length_chosen:
            new = np.pad(i,math.ceil((length_chosen-i.shape[0])/2), mode='median')
            X_new.append(new)
        else:
            X_new.append(i)
            
        y_new.append(y[ind])
            

In [None]:
X = np.array(X_new)
y = np.array(y_new)
print(X.shape, y.shape)

In [None]:
mfccs = []
for i in tqdm(X):
    mfcc = librosa.feature.mfcc(y=i, sr=44000, n_mfcc=40)
    mfcc = mfcc.T
    mfccs.append(mfcc)


In [None]:
mfccs = np.array(mfccs)
mfccs.shape

In [None]:
set(y)

### Save new data:

In [None]:
import pickle 

mfccs2_path = 'D://emotion_recognition_data//mfccs2.pickle'
y2_path = 'D://emotion_recognition_data//y2.pickle'

# with open(mfccs2_path, 'wb') as f:
#     pickle.dump(mfccs,f)
    
# with open(y2_path, 'wb') as f:
#     pickle.dump(y,f)
    
with open(mfccs2_path, 'rb') as f:
    mfccs = pickle.load(f)
    
with open(y2_path, 'rb') as f:
    y = pickle.load(f)

In [None]:
set(y)

In [None]:
y = np.where(y=='positive', 'happy', y)
y = np.where(y=='negative', 'disgust', y)

In [None]:
emotion_enc = {'fear':0, 'disgust':1, 'neutral':2, 'happy':3, 'sadness':4, 'surprise':5, 'angry':6}

In [None]:
X_train, X_val, y_train, y_val = train_test_split(mfccs, y, test_size=0.2, random_state=12)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=15)

X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape

In [None]:
y_train = pd.Series(y_train).map(emotion_enc)
y_val = pd.Series(y_val).map(emotion_enc)
y_test = pd.Series(y_test).map(emotion_enc)

In [None]:
y_train.unique(), y_test.unique(), y_val.unique()

In [None]:
model3 = Sequential()

model3.add(layers.Conv1D(256, 5,padding='same',
                 input_shape=(236,40)))
model3.add(layers.Activation('relu'))
model3.add(layers.MaxPooling1D(pool_size=(8)))
model3.add(layers.Dropout(0.2))

model3.add(layers.Conv1D(128, 5,padding='same'))
model3.add(layers.Activation('relu'))
model3.add(layers.MaxPooling1D(pool_size=(4)))
model3.add(layers.Dropout(0.1))

model3.add(layers.Flatten())
model3.add(layers.Dense(64))
model3.add(layers.Dense(7))
model3.add(layers.Activation('softmax'))

model3.summary()


In [None]:
# weight_path2 = 'd://ITC//final_project_data//best_weights3.hdf5'
weight_path2 = 'D://emotion_recognition_data//best_weights2.hdf5'

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.00001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1)

model_checkpoint2 = tf.keras.callbacks.ModelCheckpoint(filepath=weight_path2, 
                                                      save_weights_only=True, 
                                                      monitor='val_accuracy', 
                                                      mode='max', 
                                                      save_best_only=True)


In [None]:
model3.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics='accuracy')
model3.fit(X_train, y_train, batch_size=16, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, model_checkpoint2])

In [None]:
# model3.load_weights(weight_path2)

## Transfer learning

In [None]:
# pretrained_model = tf.keras.applications.DenseNet201(include_top=False, 
#                                                      weights='imagenet', 
#                                                      input_shape=(236,40,3))
# # pretrained_model.trainable = False
# for layer in pretrained_model.layers:
#   if 'conv5' in layer.name:
#     layer.trainable = True
#   else:
#     layer.trainable = False

# pretrained_model.input_shape, pretrained_model.output_shape

In [None]:
# X_train.shape

In [None]:
# X_train_expand = np.expand_dims(X_train, 3)
# X_test_expand = np.expand_dims(X_test, 3)

In [None]:
# X_train_expand.shape, X_test_expand.shape

In [None]:
# before_model = Sequential()

# before_model.add(layers.Conv1D(256, 5,padding='same',
#                  input_shape=(236,40,1)))
# before_model.add(layers.Activation('relu'))
# before_model.add(layers.Dropout(0.2))
# # before_model.add(layers.UpSampling2D(size=2))
# before_model.add(layers.Dense(64))
# before_model.add(layers.Dense(3))

# before_model.summary()

In [None]:
# after_model = tf.keras.models.Sequential()
# after_model.add(before_model)
# after_model.add(pretrained_model)
# after_model.add(tf.keras.layers.GlobalAveragePooling2D())
# after_model.add(tf.keras.layers.Flatten())

# after_model.add(tf.keras.layers.Dense(256))
# after_model.add(tf.keras.layers.Dropout(0.2))

# after_model.add(tf.keras.layers.Dense(128))
# after_model.add(tf.keras.layers.Dropout(0.1))
# after_model.add(tf.keras.layers.Dense(6, activation='softmax'))

# after_model.summary()

In [None]:
# after_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics='accuracy')
# after_model.fit(X_train_expand, y_train, batch_size=32, epochs=500, validation_data=(X_test_expand, y_test),
#            callbacks=[reduce_lr, early_stop, model_checkpoint2])

### Final evaluation:

In [None]:
def report_res_and_plot_matrix(y_test, y_pred, plot_classes):

  #report metrics
  acc = accuracy_score(y_test, y_pred)
  print(f"Accuracy: {acc:.4f}")
  # print(f"Classes: {plot_classes}")

  #plot matrix
  cnf_matrix = confusion_matrix(y_test, y_pred)
  fig, ax = plt.subplots()
    
  tick_marks = np.arange(len(plot_classes))
  plt.xticks(ticks=tick_marks, labels=plot_classes, rotation=90)
  plt.yticks(ticks=tick_marks, labels=plot_classes, rotation=90)

  group_counts = [f'{value:0.0f}' for value in cnf_matrix.flatten()]
  group_percentages = [f'{100 * value:0.1f} %' for value in 
                       cnf_matrix.flatten()/np.sum(cnf_matrix)]
  labels = [f'{v1}\n({v2})' for v1, v2 in
            zip(group_counts,group_percentages)]
  n = int(np.sqrt(len(labels)))
  labels = np.asarray(labels).reshape(n,n)
  sns.heatmap(cnf_matrix, annot=labels, fmt='', cmap='Blues',
              xticklabels=plot_classes, yticklabels=plot_classes)


  ax.xaxis.set_label_position("bottom")
  plt.tight_layout()
  plt.title('Confusion matrix', y=1.1)
  plt.ylabel('Actual label')
  plt.xlabel('Predicted label')
  plt.show()

  # return metrics
  return [acc, cnf_matrix]

In [None]:
from sklearn.metrics import classification_report

y_pred = model3.predict(X_test).argmax(axis=1)

print(classification_report(y_test, y_pred, target_names=list(emotion_enc.keys())))
params = report_res_and_plot_matrix(y_test, y_pred, list(emotion_enc.keys()))

### Visualization of our classes

In [None]:
X_test.shape, X_train.shape, y_pred.shape

In [None]:
X_test_new = X_test.reshape(472,-1).copy()
X_train_new = X_train.reshape(3773,-1).copy()

In [None]:
X_test_new.shape, X_train_new.shape

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = StandardScaler()
scaler.fit(X_train_new)
X_train_scaled = scaler.transform(X_train_new)
X_test_scaled = scaler.transform(X_test_new)


In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
pca.fit_transform(X_train_scaled)
X_pca = pca.transform(X_test_scaled)


In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=7, random_state=0).fit(X_pca)
labels = kmeans.labels_

In [None]:
set(labels)

### Figuring out which label resambles which class:

In [None]:
set(y_pred)

In [None]:
l = pd.get_dummies(labels)
p = pd.get_dummies(y_pred)

l.shape, p.shape

In [None]:
h = pd.merge(l, p, left_index=True, right_index=True)

In [None]:
corr = h.corr(method='spearman')
mask = np.tril(np.ones_like(corr, dtype=np.bool))
ax = plt.figure(figsize=(25,10))
ax = sns.heatmap(h.corr(method='spearman'), annot=True, fmt=".2f", mask = mask, square = True, cmap="Blues")

In [None]:
a = pd.DataFrame(labels)
b= pd.DataFrame(y_pred)
c= pd.DataFrame()
c['labels'] = a[0].copy()
c['preds'] = b[0].copy()

In [None]:
c.head()

In [None]:
plt.figure(figsize=(10,4))
sns.countplot(x='labels', hue="preds", data=c)
plt.xlabel('Labels', fontsize=14)
plt.ylabel('Count', fontsize=14)
plt.legend(title='Classes', bbox_to_anchor = (1,1), labels=list(emotion_enc.keys()), fontsize='large')
plt.show()

### Corresponding classes:

- label 0 - class 5
- label 1 - class 2
- label 2 - class 1
- label 3 - class 3
- label 4 - class 6
- label 5 - class 0
- label 6 - class 4


In [None]:
emotion_enc

In [None]:
colors = {0:'blue', 1:'red', 2:'green', 3:'orange', 4:'black', 5:'grey', 6:'brown'}
lab = ['surprise', 'neutral', 'disgust', 'happy', 'angry', 'fear', 'sadness']


fig = plt.figure(figsize = (15, 5))
ax = fig.add_subplot(111)
ax.scatter(X_pca[:, 0], X_pca[:, 1], c = pd.Series(labels).map(colors), alpha=0.5)
for i in range(7):
    plt.scatter(None, None, color=colors[i], label=lab[i])

plt.title('Emotions divided to clusters', fontsize=20)
plt.legend(fontsize=15, bbox_to_anchor= [1, 1.05])
plt.xlabel('PCA 1', fontsize=15)
plt.ylabel('PCA 2', fontsize=15)
plt.show()

### Save models:

In [None]:
import tensorflow as tf

# model2.save('d://ITC//final_project_data//model2.h5')
model3.save('D://emotion_recognition_data//model3.h5')

model3 = tf.keras.models.load_model('D://emotion_recognition_data//model3.h5')

In [None]:
emotion_enc

#### Check random samples from dataset

In [None]:
x,sr = librosa.load('D://emotion_recognition_data//data//data//Actor_01//03-01-04-02-02-01-01.wav',
             res_type='kaiser_fast', sr=44000)

In [None]:
x.shape, length_chosen

In [None]:
if x.shape[0] > length_chosen:
        new = x[:length_chosen]
elif x.shape[0] < length_chosen:
        new = np.pad(x,math.ceil((length_chosen-x.shape[0])/2), mode='median')
else:
        new = x



In [None]:
mfcc = librosa.feature.mfcc(y=new, sr=44000, n_mfcc=40)
mfcc = mfcc.T
mfcc.shape

In [None]:
mfcc = mfcc.reshape(1,236,40)
mfcc.shape
p = model3.predict(mfcc)

In [None]:
p.argmax()

In [None]:
emotion_enc