Conda environment (for when Dan forgets): speech-emotion

Code from <a href="https://data-flair.training/blogs/python-mini-project-speech-emotion-recognition/">here</a>.

Goal is to deploy model in an Android app, so I'm making sure the code is actually reproducible here.

In [1]:
!ls

index.ipynb                             [34mspeech-emotion-recognition-ravdess-data[m[m


In [2]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [3]:
#DataFlair - Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [4]:
#DataFlair - Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}
#DataFlair - Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [5]:
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("speech-emotion-recognition-ravdess-data/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

#load_data()

In [6]:
#DataFlair - Split the dataset
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

#DataFlair - Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))
#DataFlair - Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

(576, 192)
Features extracted: 180


In [40]:
#DataFlair - Initialize the Multi Layer Perceptron Classifier
import tqdm
tests = []
for test in tqdm.trange(1, 100):
    model=MLPClassifier(alpha=0.01, 
                        batch_size=256, 
                        epsilon=1e-08, 
                        hidden_layer_sizes=(300,), 
                        learning_rate='adaptive', 
                        max_iter=500)

    #DataFlair - Train the model
    model.fit(x_train,y_train)

    #DataFlair - Predict for the test set
    y_pred=model.predict(x_test)

    #DataFlair - Calculate the accuracy of our model
    accuracy=accuracy_score(y_true=y_test, y_pred=y_pred)

    #DataFlair - Print the accuracy
    #print("Accuracy: {:.2f}%".format(accuracy*100))
    tests.append(accuracy)

100%|██████████| 99/99 [01:18<00:00,  1.26it/s]


In [15]:
model.get_params()

{'activation': 'relu',
 'alpha': 0.01,
 'batch_size': 256,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (300,),
 'learning_rate': 'adaptive',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 500,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [24]:
model.coefs_

[array([[ 1.40663535e-02, -4.91742298e-02, -7.06286358e-02, ...,
          3.48195092e-02, -6.29562035e-02, -2.55354338e-03],
        [-1.37074710e-02, -9.55608251e-02, -4.83154159e-02, ...,
          1.25360982e-02,  3.09398390e-02,  3.33188122e-03],
        [ 1.15056847e-02,  2.79328467e-02,  7.55160243e-02, ...,
         -1.77551287e-03,  4.85729756e-02, -2.47912627e-02],
        ...,
        [ 8.08643788e-12,  1.24154748e-02, -3.05129909e-02, ...,
         -1.43436807e-14, -3.49724133e-02, -1.60586596e-02],
        [-1.28835633e-12,  7.54134501e-03, -1.77604585e-02, ...,
          1.40991862e-12, -2.24769294e-02, -9.16445319e-03],
        [-1.15172351e-10,  2.80721093e-03, -7.89028584e-03, ...,
         -2.35714341e-09, -9.96569678e-03, -5.30680644e-03]]),
 array([[ 1.37865805e-02,  1.96732023e-03,  1.38864996e-02,
         -4.89990260e-03],
        [ 8.02521231e-03,  1.13644661e-02, -1.00730538e-01,
          2.24345140e-02],
        [-3.37087893e-03, -8.04249563e-02,  1.07863714e

In [29]:
model.intercepts_

[array([ 0.04668475,  0.09886515,  0.0753171 ,  0.01345352,  0.07100046,
        -0.08227346, -0.0254244 , -0.03441039,  0.0234474 , -0.07787076,
         0.05180446, -0.01693423,  0.02104225, -0.01416796,  0.07082697,
         0.00419546,  0.07133234,  0.00552151, -0.0541261 ,  0.01682478,
         0.00423863, -0.04177588,  0.0046737 , -0.01874905, -0.10683817,
        -0.09597538, -0.00469558,  0.06793386, -0.02595527, -0.07821158,
        -0.12113162,  0.06780024, -0.06295834,  0.1059856 , -0.0736345 ,
        -0.07415066, -0.09368884, -0.0541321 ,  0.08860556, -0.0075903 ,
         0.08438605,  0.01164878,  0.11134638,  0.0174646 , -0.04468362,
         0.03873832,  0.07729911, -0.0936893 , -0.01425738, -0.06163256,
         0.11861844, -0.0324714 , -0.0586511 ,  0.07204019, -0.08781674,
        -0.04640388, -0.0109411 , -0.0178484 , -0.07777559,  0.01590799,
         0.00443274,  0.02632007, -0.08695251, -0.0540209 ,  0.11457499,
         0.040539  , -0.0573153 , -0.03854836,  0.0