<a href="https://colab.research.google.com/github/abhi10010/fastai-v3/blob/master/Speech_Emotion_Recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [181]:
pip install soundfile



In [182]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [183]:
def extract_feature(file_name, mfcc, chroma, mel):
  with soundfile.SoundFile(file_name) as sound_file:
    X = sound_file.read(dtype = "float32")
    sample_rate = sound_file.samplerate
    if chroma:
      stft = np.abs(librosa.stft(X))
    result = np.array([])
    
    if mfcc:
      mfccs = np.mean(librosa.feature.mfcc(y = X, sr = sample_rate, n_mfcc = 40).T, axis = 0)
      result = np.hstack((result, mfccs))

    if chroma:
      chroma = np.mean(librosa.feature.chroma_stft(S = stft, sr = sample_rate).T, axis = 0)
      result = np.hstack((result, chroma))
    
    if mel:
      mel = np.mean(librosa.feature.melspectrogram(X, sr = sample_rate).T, axis = 0)
      result = np.hstack((result, mel))
  return result

In [184]:
emotions = {
    '01': 'Neutral',
    '02': 'Calm',
    '03': 'Happy',
    '04': 'Sad',
    '05': 'Angry',
    '06': 'Fearful',
    '07': 'Disgust',
    '08': 'Surprise'
}
observed_emotions = ['Calm', 'Happy', 'Fearful', 'Angry']

In [185]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [186]:
cd /content/gdrive/My Drive/

/content/gdrive/My Drive


In [187]:
cd 'Colab Notebooks'/

/content/gdrive/My Drive/Colab Notebooks


In [188]:
import glob
def load_data(test_size = 0.2):
  x, y = [], []
  for file in glob.glob("Speech-Emotion-Dataset/Actor_*/*.wav"):
    file_name = os.path.basename(file)
    emotion = emotions[file_name.split('-')[2]]
    if emotion not in observed_emotions:
      continue
    feature = extract_feature(file, mfcc = True, chroma = True, mel = True)
    x.append(feature)
    y.append(emotion)
    np.array(x)
  return train_test_split(np.array(x), y, test_size = test_size, random_state = 1)

In [189]:
x_train, x_test, y_train, y_test = load_data()

In [190]:
print((x_train.shape[0], x_test.shape[0]))

(614, 154)


In [191]:
print(f'Features extracted: {x_train.shape[1]}')

Features extracted: 180


In [192]:
model = MLPClassifier(alpha = 0.01, 
                      batch_size = 256, epsilon = 1e-08,
                      hidden_layer_sizes = (300,), learning_rate = 'adaptive',
                      max_iter = 500)

In [193]:
model.fit(x_train, y_train)

MLPClassifier(activation='relu', alpha=0.01, batch_size=256, beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(300,), learning_rate='adaptive',
              learning_rate_init=0.001, max_fun=15000, max_iter=500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [194]:
y_pred = model.predict(x_test)

In [195]:
accuracy = accuracy_score(y_true = y_test, y_pred = y_pred)
print('Accuracy: {:.2f}%'.format(accuracy*100))

Accuracy: 82.47%
