# FIRST INIT

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip -qq /content/drive/MyDrive/RAVDESS.zip -d /content/RAVDESS 

START CODE HERE

In [2]:
import librosa
import librosa.display
import os
from os.path import exists
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import IPython.display as ipd
from tqdm import tqdm
import seaborn as sns
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ReduceLROnPlateau
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Activation, Conv1D, MaxPooling1D, Flatten, BatchNormalization
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import RMSprop
import itertools
import seaborn as sns
from sklearn import svm
import csv
%matplotlib inline

  _resample_loop_p(x, t_out, interp_win, interp_delta, num_table, scale, y)


DATASET


In [None]:
DATASET_PATH = "/content/RAVDESS"
emotion_list = {
    '01':'neutral',
    '02':'calm',
    '03':'happy',
    '04':'sad',
    '05':'angry',
    '06':'fearful',
    '07':'disgust',
    '08': 'surprised'
}
arr = []
for root,dirs,files in os.walk(DATASET_PATH):
  # print(f"root = {root} | dirs = {dirs} | files = {files}")
  for f in files:
    # '03-01-06-01-01-01-23.wav'
    # i = 0 -> modality (dont care)
    # i = 1 -> vocal channel (dont care)
    # i = 2 -> emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised)
    # i = 3 -> Emotional intensity (01 = normal, 02 = strong). NOTE: There is no strong intensity for the 'neutral' emotion.
    # i = 4 -> Statement (01 = "Kids are talking by the door", 02 = "Dogs are sitting by the door").
    # i = 5 -> Repetition (01 = 1st repetition, 02 = 2nd repetition).
    # i = 6 -> Actor (01 to 24. Odd numbered actors are male, even numbered actors are female).
    x = f.split('-')   
    arr.append([x[6].split('.')[0],x[3],x[4],f"{root}/{f}",emotion_list[x[2]]])
      

arr = np.array(arr)
dataset= pd.DataFrame(arr,columns=['actor','intensity','statement','path','emotion'])
dataset.describe()

Unnamed: 0,actor,intensity,statement,path,emotion
count,1440,1440,1440,1440,1440
unique,24,2,2,1440,8
top,18,1,2,/content/RAVDESS/Actor_18/03-01-08-01-02-02-18...,surprised
freq,60,768,720,1,192


Data Preparation Split

In [None]:
# test = dataset.query("actor == '21' | actor == '22' | actor == '23'| actor == '24'")
# train = dataset.drop(test.index)
# train.reset_index(inplace=True)
# test.reset_index(inplace=True)

Data Augmentation

In [None]:
# NOISE
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data
# STRETCH
def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate)
# SHIFT
def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)
# PITCH
def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

In [None]:
X, y = [], []
# read data
for path, emotion in zip(dataset['path'], dataset['emotion']):
  data,sr = librosa.load(path)
  X.append(data)
  y.append(emotion)

  if (emotion == 'neutral'):
    # data with noise
    X.append(noise(data))
    y.append(emotion)
  
  new_data = stretch(data)
  X.append(pitch(new_data, sr))
  y.append(emotion)

X = np.array(X)
y = np.array(y)
print(X.shape, y.shape)

(2976,) (2976,)




In [None]:
unique, counts = np.unique(y, return_counts=True)

result = np.column_stack((unique, counts)) 
print (result)

[['angry' '384']
 ['calm' '384']
 ['disgust' '384']
 ['fearful' '384']
 ['happy' '384']
 ['neutral' '288']
 ['sad' '384']
 ['surprised' '384']]


Feature Extraction

In [None]:
mfccs = []
mels = []
chromas = []
contrasts = []
tonnetzs = []
sr = 22050
for x in tqdm(X):
  mfcc = librosa.feature.mfcc(x,sr,n_mfcc=40)
  mfcc = np.mean(mfcc,axis=1)
  chrom = librosa.feature.chroma_stft(x,sr)
  chrom = np.mean(chrom,axis=1)
  mel = librosa.feature.melspectrogram(x,sr)
  mel = np.mean(mel,axis=1)
  con = librosa.feature.spectral_contrast(x,sr)
  con = np.mean(con,axis=1)
  tonn = librosa.feature.tonnetz(x,sr)
  tonn = np.mean(tonn,axis=1)
  mfccs.append(mfcc)
  chromas.append(chrom)
  mels.append(mel)  
  tonnetzs.append(tonn)
  contrasts.append(con)

mfccs = np.array(mfccs)  
mels = np.array(mels)  
chromas = np.array(chromas)  
contrasts = np.array(contrasts)  
tonnetzs = np.array(tonnetzs)

  n_fft, y.shape[-1]
100%|██████████| 2976/2976 [13:22<00:00,  3.71it/s]


In [None]:
final = [mfccs,mels,chromas,contrasts,tonnetzs]

0 = MFCC
1 = Mel Spectrogram
2 = Chromagram
3 = Contrast
4 = Tonnetz

In [None]:
from itertools import permutations
l = list(permutations("01234"))
for d in l:
  nama_file = ""
  nama_file = nama_file.join(d)
  X = np.array(final[int(d[0])])
  X = np.concatenate((X,final[int(d[1])]),axis=1)
  X = np.concatenate((X,final[int(d[2])]),axis=1)
  X = np.concatenate((X,final[int(d[3])]),axis=1)
  X = np.concatenate((X,final[int(d[4])]),axis=1)
  np.savez_compressed(f'/content/drive/MyDrive/Experiment/Data/X/'+nama_file,X)
  np.savez_compressed(f'/content/drive/MyDrive/Experiment/Data/y/'+nama_file,y) 


Experiment

In [None]:
use_tpu = True #@param {type:"boolean"}

if use_tpu:
    assert 'COLAB_TPU_ADDR' in os.environ, 'Missing TPU; did you request a TPU in Notebook Settings?'

if 'COLAB_TPU_ADDR' in os.environ:
  TF_MASTER = 'grpc://{}'.format(os.environ['COLAB_TPU_ADDR'])
else:
  TF_MASTER=''

In [None]:
# Model specific parameters

# TPU address
tpu_address = TF_MASTER

# Number of epochs
epochs = 50

# Number of steps_per_epoch
steps_per_epoch = 5

# NOTE: Total number of training steps = Number of epochs * Number of steps_per_epochs

In [None]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(TF_MASTER)
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)

In [None]:
def get_model(shape, loss='categorical_crossentropy', optimizer=RMSprop(lr=0.00001, decay=1e-6), metrics=['accuracy']):
  # with strategy.scope():
  with tf.device('/device:GPU:0'):
        model = Sequential()
        model.add(Conv1D(256, 8, padding='same',input_shape=(shape,1)))  # X_train.shape[1] = No. of Columns
        model.add(Activation('relu'))

        model.add(Conv1D(256, 8, padding='same'))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(Dropout(0.25))
        model.add(MaxPooling1D(pool_size=(8)))

        model.add(Conv1D(128, 8, padding='same'))
        model.add(Activation('relu'))
        model.add(Conv1D(128, 8, padding='same'))
        model.add(Activation('relu'))
        model.add(Conv1D(128, 8, padding='same'))
        model.add(Activation('relu'))

        model.add(Conv1D(128, 8, padding='same'))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(Dropout(0.25))
        model.add(MaxPooling1D(pool_size=(8)))

        model.add(Conv1D(64, 8, padding='same'))
        model.add(Activation('relu'))

        model.add(Conv1D(64, 8, padding='same'))
        model.add(Activation('relu'))
        model.add(Flatten())

        model.add(Dense(8))
        model.add(Activation('softmax'))
        model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

        return model

  super(RMSprop, self).__init__(name, **kwargs)


Training

In [None]:
from itertools import permutations
l = list(permutations("01234"))

In [None]:
# a = []
# for d in l:
#   nama_file = ""
#   nama_file = nama_file.join(d)
#   a.append([nama_file,0])

# a = np.array(a)
# print(a.shape)
# # a.tofile(f'/content/drive/MyDrive/THESIS/Experiment/Data/hasil.csv',sep=',',)
# # np.savetxt('/content/drive/MyDrive/THESIS/Experiment/Data/hasil.csv', a, delimiter=',', fmt='%s')
# np.savez_compressed(f'/content/drive/MyDrive/Results/hasil',a)

(120, 2)


In [None]:
base_model = get_model(X.shape[1])
model.save('/content/drive/MyDrive/Results/Models/base_model.h5')

In [None]:
batch_size = 64
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.8, verbose=1, patience=15, min_lr=0.000001)
hasil = np.load('/content/drive/MyDrive/Results/hasil.npz')
hasil = hasil['arr_0']
for i,d in enumerate(l):
  epochs = 300
  nama_file = ""
  nama_file = nama_file.join(d)
  # if(float(hasil[i][1])>0):
  #   continue
  print(nama_file)

  X = np.load('/content/drive/MyDrive/Experiment/Data/X/'+nama_file+'.npz')
  y = np.load('/content/drive/MyDrive/Experiment/Data/y/'+nama_file+'.npz')
  X = X['arr_0']
  y = y['arr_0']
  
  encoder = OneHotEncoder()
  y = encoder.fit_transform(y.reshape(-1,1)).toarray()
  x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=0, shuffle=True)
  scaler = StandardScaler()
  x_train = scaler.fit_transform(x_train)
  x_test = scaler.transform(x_test)
  x_train = np.expand_dims(x_train, axis=2)
  x_test = np.expand_dims(x_test, axis=2)

  if (exists("/content/drive/MyDrive/Results/Models/"+nama_file+".h5")):
    model = tf.keras.models.load_model("/content/drive/MyDrive/Results/Models/"+nama_file+".h5")
  else :
    model = tf.keras.models.load_model("/content/drive/MyDrive/Results/Models/base_model.h5")
    history=model.fit(x_train, y_train, batch_size=64, epochs=epochs, validation_split=0.1, callbacks=[rlrp], verbose=0)
    model.save('/content/drive/MyDrive/Results/Models/'+nama_file+'.h5')
  print("Accuracy of our model on test data : " , model.evaluate(x_test,y_test)[1]*100 , "%")
  hasil[i][1] = model.evaluate(x_test,y_test)[1]*100
  epochs = [i for i in range(epochs)]
  fig , ax = plt.subplots(1,2)
  train_acc = history.history['accuracy']
  train_loss = history.history['loss']
  test_acc = history.history['val_accuracy']
  test_loss = history.history['val_loss']

  fig.set_size_inches(20,8)
  ax[0].plot(epochs , train_loss , label = 'Training Loss')
  ax[0].plot(epochs , test_loss , label = 'Testing Loss')
  ax[0].set_title('Training & Testing Loss')
  ax[0].legend()
  ax[0].set_xlabel("Epochs")

  ax[1].plot(epochs , train_acc , label = 'Training Accuracy')
  ax[1].plot(epochs , test_acc , label = 'Testing Accuracy')
  ax[1].set_title('Training & Testing Accuracy')
  ax[1].legend()
  ax[1].set_xlabel("Epochs")
  plt.savefig('/content/drive/MyDrive/Results/Plots/'+nama_file)
  plt.clf()
  pred_test = model.predict(x_test)

  y_pred = encoder.inverse_transform(pred_test)

  y_test = encoder.inverse_transform(y_test)

  df = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
  df['Predicted Labels'] = y_pred.flatten()
  df['Actual Labels'] = y_test.flatten()

  cm = confusion_matrix(y_test, y_pred,normalize='true')
  plt.figure(figsize = (12, 10))
  cm = pd.DataFrame(cm , index = [i for i in encoder.categories_] , columns = [i for i in encoder.categories_])
  sns.heatmap(cm, linecolor='white', cmap='Blues', linewidth=1, annot=True, fmt='.2%')
  plt.title('Confusion Matrix', size=20)
  plt.xlabel('Predicted Labels', size=14)
  plt.ylabel('Actual Labels', size=14)
  plt.savefig('/content/drive/MyDrive/Results/Confusion Matrix/'+nama_file)

  np.savez_compressed(f'/content/drive/MyDrive/Results/hasil',hasil)

In [None]:
hasil = np.load('/content/drive/MyDrive/Results/hasil.npz')
hasil = hasil['arr_0']
np.savetxt('/content/drive/MyDrive/Results/hasil.csv', hasil, delimiter=',', fmt='%s')

In [None]:
def precision_calc(cm,j):
  tp = cm[j][j]
  new_cm = np.transpose(cm)
  total = np.sum(new_cm[j],axis=0)
  pre = np.round(tp/total,2)
  return pre

def recall_calc(cm,j):
  tp = cm[j][j]
  total = np.sum(cm[j],axis=0)
  recall = np.round(tp/total,2)
  return recall

In [3]:
from itertools import permutations
l = list(permutations("01234"))
highest = np.zeros(8)
highest_pre = np.zeros(8)
highest_recall = np.zeros(8)

feature_order = np.empty(8,dtype=np.dtype('U5'))
for i,d in enumerate(l):
  nama_file = ""
  nama_file = nama_file.join(d)

  X = np.load('/content/drive/MyDrive/Experiment/Data/X/'+nama_file+'.npz')
  y = np.load('/content/drive/MyDrive/Experiment/Data/y/'+nama_file+'.npz')
  X = X['arr_0']
  y = y['arr_0']

  encoder = OneHotEncoder()
  y = encoder.fit_transform(y.reshape(-1,1)).toarray()
  x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=0, shuffle=True)
  scaler = StandardScaler()
  x_train = scaler.fit_transform(x_train)
  x_test = scaler.transform(x_test)
  x_train = np.expand_dims(x_train, axis=2)
  x_test = np.expand_dims(x_test, axis=2)

  model = tf.keras.models.load_model("/content/drive/MyDrive/Results/Models/"+nama_file+".h5")
  pred_test = model.predict(x_test)
  y_pred = encoder.inverse_transform(pred_test)

  y_test = encoder.inverse_transform(y_test)

  df = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
  df['Predicted Labels'] = y_pred.flatten()
  df['Actual Labels'] = y_test.flatten()
  cm = confusion_matrix(y_test, y_pred,normalize='true')
  for i in range(8):
    if((highest[i]<cm[i][i]) and (highest_pre[i]<precision_calc(cm,i))):
      highest[i] = cm[i][i]
      highest_pre[i] = precision_calc(cm,i)
      feature_order[i]=nama_file
  # for i in range(8):
  #   if(((highest[i]+highest_pre[i])/2)<((cm[i][i]+precision_calc(cm,i))/2)):
  #     highest[i] = cm[i][i]
  #     highest_pre[i] = precision_calc(cm,i)
  #     feature_order[i]=nama_file
  # cr = metrics.classification_report(y_test,y_pred,digits=3)
  # print(cr)
  # break

NameError: ignored

In [None]:
print(highest)

In [None]:
print(highest)

[0.79761905 0.91780822 0.91549296 0.86956522 0.70731707 0.7704918
 0.85365854 0.90540541]


In [None]:
print(highest_pre)

[0.91 0.86 0.74 0.79 0.92 0.91 0.73 0.73]


In [None]:
print(feature_order)

['23104' '34210' '42310' '32410' '43210' '34210' '41023' '42103']


In [None]:
from sklearn import metrics
for i,d in enumerate(feature_order):
  nama_file = ""
  nama_file = nama_file.join(d)

  X = np.load('/content/drive/MyDrive/Experiment/Data/X/'+nama_file+'.npz')
  y = np.load('/content/drive/MyDrive/Experiment/Data/y/'+nama_file+'.npz')
  X = X['arr_0']
  y = y['arr_0']

  encoder = OneHotEncoder()
  y = encoder.fit_transform(y.reshape(-1,1)).toarray()
  x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=0, shuffle=True)
  scaler = StandardScaler()
  x_train = scaler.fit_transform(x_train)
  x_test = scaler.transform(x_test)
  x_train = np.expand_dims(x_train, axis=2)
  x_test = np.expand_dims(x_test, axis=2)

  model = tf.keras.models.load_model("/content/drive/MyDrive/Results/Models/"+nama_file+".h5")
  pred_test = model.predict(x_test)
  y_pred = encoder.inverse_transform(pred_test)

  y_test = encoder.inverse_transform(y_test)

  df = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
  df['Predicted Labels'] = y_pred.flatten()
  df['Actual Labels'] = y_test.flatten()
  cm = confusion_matrix(y_test, y_pred,normalize='true')
  print(metrics.classification_report(y_test, y_pred, digits=3))
  # print(recall_calc(cm,i))

              precision    recall  f1-score   support

       angry      0.918     0.798     0.854        84
        calm      0.719     0.877     0.790        73
     disgust      0.713     0.803     0.755        71
     fearful      0.878     0.623     0.729        69
       happy      0.779     0.732     0.755        82
     neutral      0.854     0.672     0.752        61
         sad      0.729     0.756     0.743        82
   surprised      0.705     0.905     0.793        74

    accuracy                          0.773       596
   macro avg      0.787     0.771     0.771       596
weighted avg      0.786     0.773     0.773       596

              precision    recall  f1-score   support

       angry      0.851     0.750     0.797        84
        calm      0.859     0.918     0.887        73
     disgust      0.711     0.831     0.766        71
     fearful      0.806     0.783     0.794        69
       happy      0.871     0.659     0.750        82
     neutral      0.887 

In [None]:
from sklearn import metrics
metrics.classification_report(y_test, y_pred, digits=3)

In [None]:
hasil = np.load('/content/drive/MyDrive/Results/hasil.npz')
hasil = hasil['arr_0']

In [None]:
hi = float(0)
fe = ''
for f,a in hasil:
  a = float(a)
  if(float(a)>hi):
    hi=a
    fe=f

print(fe)
print(hi)

34210
80.03355860710144


In [None]:
nama_file = "34210"

X = np.load('/content/drive/MyDrive/Experiment/Data/X/'+nama_file+'.npz')
y = np.load('/content/drive/MyDrive/Experiment/Data/y/'+nama_file+'.npz')
X = X['arr_0']
y = y['arr_0']

encoder = OneHotEncoder()
y = encoder.fit_transform(y.reshape(-1,1)).toarray()
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=0, shuffle=True)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)

model = tf.keras.models.load_model("/content/drive/MyDrive/Results/Models/"+nama_file+".h5")
pred_test = model.predict(x_test)
y_pred = encoder.inverse_transform(pred_test)
y_test = encoder.inverse_transform(y_test)

df = pd.DataFrame(columns=['Predicted Labels', 'Actual Labels'])
df['Predicted Labels'] = y_pred.flatten()
df['Actual Labels'] = y_test.flatten()
cm = confusion_matrix(y_test, y_pred,normalize='true')
print(metrics.classification_report(y_test, y_pred, digits=3))

              precision    recall  f1-score   support

       angry      0.851     0.750     0.797        84
        calm      0.859     0.918     0.887        73
     disgust      0.711     0.831     0.766        71
     fearful      0.806     0.783     0.794        69
       happy      0.871     0.659     0.750        82
     neutral      0.887     0.770     0.825        61
         sad      0.786     0.805     0.795        82
   surprised      0.705     0.905     0.793        74

    accuracy                          0.800       596
   macro avg      0.809     0.803     0.801       596
weighted avg      0.809     0.800     0.800       596

