In [122]:
import soundfile # to read audio file
import numpy as np
import librosa # to extract speech features
import glob
import os
import pickle # to save model after training
import pandas as pd
import sklearn
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split # for splitting training and testing
from sklearn.neural_network import MLPClassifier # multi-layer perceptron model
from sklearn.metrics import accuracy_score, f1_score # to measure how good we are
from sklearn.tree import DecisionTreeClassifier


import tensorflow as tf
from functools import partial
from tensorflow import keras
from tensorflow.keras.layers import Conv1D, Conv2D,MaxPooling1D,MaxPooling2D, Dense, Flatten, Reshape, Dropout

## Prepare Training Data
this will take some time

In [29]:
def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")

    if chroma or contrast:
        X, sample_rate = librosa.load(file_name)
        stft = np.abs(librosa.stft(X))
    result = np.array([])

    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
        result = np.hstack((result, mel))

    return result

# all emotions on RAVDESS dataset
# int2emotion = {
#     "01": "neutral",
#     "02": "calm",
#     "03": "happy",
#     "04": "sad",
#     "05": "angry",
#     "06": "fearful",
#     "07": "disgust",
#     "08": "surprised"
# }

# we allow only these emotions ( feel free to tune this on your need )
# AVAILABLE_EMOTIONS = {
#     "angry",
#     "sad",
#     "neutral",
#     "happy"
# }

# 1 positive
# 0 negative
EMOTIONS_AVAILABLE = [1, 1, 1, 0, 0, 0, 0, 1]

def load_data(test_size=0.2):
    X, y = [], []
    for file in glob.glob("data/Actor_*/*.wav"):
        # get the base name of the audio file
        basename = os.path.basename(file)
        # get the emotion label, we allow only AVAILABLE_EMOTIONS we set
        emotion = EMOTIONS_AVAILABLE[int(basename.split("-")[2])-1]
        # extract speech features
        features = extract_feature(file, mfcc=True, chroma=True, mel=True)
        # add to data
        X.append(features)
        y.append(emotion)
    # split the data to training and testing and return it
    return train_test_split(np.array(X), y, test_size=test_size, random_state=7)

# load RAVDESS dataset, 75% training 25% testing
X_train, X_test, y_train, y_test = load_data(test_size=0.25)

In [30]:
#save data to csv files
pd.DataFrame(X_train).to_csv("./prepared-data/X_train.csv")
pd.DataFrame(X_test).to_csv("./prepared-data/X_test.csv")
pd.DataFrame(y_train).to_csv("./prepared-data/y_train.csv")
pd.DataFrame(y_test).to_csv("./prepared-data/y_test.csv")

If data is avaliable in prepared-data folder, can start the program from here

In [106]:
#load data from csv files
X_train = pd.read_csv(os.path.join("./prepared-data/", "X_train.csv")).to_numpy()
X_test = pd.read_csv(os.path.join("./prepared-data/", "X_test.csv")).to_numpy()
y_train = pd.read_csv(os.path.join("./prepared-data/", "y_train.csv")).to_numpy()
y_test = pd.read_csv(os.path.join("./prepared-data/", "y_test.csv")).to_numpy()

X_train = X_train[:,1:]
X_test = X_test[:,1:]
y_train = y_train[:,1]
y_test = y_test[:,1]

### Random Forest

In [108]:
def en_model_rfc(xtrain,xtest,ytrain,ytest):
    print("\n Random Forest")
    reg = RandomForestClassifier(n_estimators=120, max_leaf_nodes=15, 
                                 n_jobs=-1, random_state=42)
    reg.fit(xtrain, ytrain)
    print("Accuracy: ", reg.score(xtest, ytest))
    y_pred = reg.predict(xtest)
    return reg
    
model_rf = en_model_rfc(X_train, X_test, y_train, y_test)


 Random Forest
Accuracy:  0.6916666666666667


In [109]:
pickle.dump(model_rf, open("trained-model/rf_classifier.model", "wb"))

### AdaBoost

In [123]:
def en_model_ada(xtrain,xtest,ytrain,ytest):
    print("\n AdaBoost")
    reg = AdaBoostClassifier(
            DecisionTreeClassifier(max_depth=3), n_estimators=100,
            algorithm="SAMME.R", learning_rate=0.5, random_state=42)
    
    reg.fit(xtrain, ytrain)
    print("Accuracy: ", reg.score(xtest, ytest))
    y_pred = reg.predict(xtest)
    return reg
    
model_ada = en_model_ada(X_train, X_test, y_train, y_test)


 AdaBoost
Accuracy:  0.7055555555555556


In [124]:
pickle.dump(model_ada, open("trained-model/ada_classifier.model", "wb"))

### Multi-layer Perceptron

In [110]:
# best model, determined by a grid search
model_params = {
    'alpha': 0.01,
    'batch_size': 256,
    'epsilon': 1e-08, 
    'hidden_layer_sizes': (300,), 
    'learning_rate': 'adaptive', 
    'max_iter': 500, 
}

model = MLPClassifier(**model_params)
model.fit(X_train, y_train)

# predict 25% of data to measure how good we are
y_pred = model.predict(X_test)

# calculate the accuracy
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)

print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 61.39%


## CNN

In [113]:
np.random.seed(42)
tf.random.set_seed(42)

# checkpoint_path = "/content/gdrive/MyDrive/Trained_Model/training_1/cp.ckpt"
checkpoint_path = "trained-model/training-1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

model = keras.models.Sequential([
    Conv1D(filters = 120, kernel_size = 180, input_shape=(180, 1)),
    Reshape((120,1)),
    Dropout(0.2),
    Conv1D(filters = 1, kernel_size = 10, input_shape=(120, 1)),
    MaxPooling1D(pool_size=(2)),
    Dropout(0.2),
    Conv1D(filters = 1, kernel_size = 2, input_shape=(55, 1)),
    MaxPooling1D(pool_size=(2)),
    Flatten(),
    Dense(units=27, activation='relu'),
    Dropout(0.1),
    Dense(units=1, activation='sigmoid'),
])

In [114]:
model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_36 (Conv1D)           (None, 1, 120)            21720     
_________________________________________________________________
reshape_16 (Reshape)         (None, 120, 1)            0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 120, 1)            0         
_________________________________________________________________
conv1d_37 (Conv1D)           (None, 111, 1)            11        
_________________________________________________________________
max_pooling1d_10 (MaxPooling (None, 55, 1)             0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 55, 1)             0         
_________________________________________________________________
conv1d_38 (Conv1D)           (None, 54, 1)           

In [115]:
X_train_cnn = X_train.reshape(X_train.shape[0],X_train.shape[1],1)
X_test_cnn = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [116]:
model.compile(
    loss="binary_crossentropy", 
    optimizer="adam",
    metrics=["accuracy"])

history = model.fit(X_train_cnn,
                    y_train, 
                    epochs=80, 
                    batch_size=32, 
                    validation_data = (X_test_cnn, y_test),
                    callbacks = [cp_callback])

Epoch 1/80
Epoch 00001: saving model to trained-model/training-1/cp.ckpt
Epoch 2/80
Epoch 00002: saving model to trained-model/training-1/cp.ckpt
Epoch 3/80
Epoch 00003: saving model to trained-model/training-1/cp.ckpt
Epoch 4/80
Epoch 00004: saving model to trained-model/training-1/cp.ckpt
Epoch 5/80
Epoch 00005: saving model to trained-model/training-1/cp.ckpt
Epoch 6/80
Epoch 00006: saving model to trained-model/training-1/cp.ckpt
Epoch 7/80
Epoch 00007: saving model to trained-model/training-1/cp.ckpt
Epoch 8/80
Epoch 00008: saving model to trained-model/training-1/cp.ckpt
Epoch 9/80
Epoch 00009: saving model to trained-model/training-1/cp.ckpt
Epoch 10/80
Epoch 00010: saving model to trained-model/training-1/cp.ckpt
Epoch 11/80
Epoch 00011: saving model to trained-model/training-1/cp.ckpt
Epoch 12/80
Epoch 00012: saving model to trained-model/training-1/cp.ckpt
Epoch 13/80
Epoch 00013: saving model to trained-model/training-1/cp.ckpt
Epoch 14/80
Epoch 00014: saving model to traine

Epoch 30/80
Epoch 00030: saving model to trained-model/training-1/cp.ckpt
Epoch 31/80
Epoch 00031: saving model to trained-model/training-1/cp.ckpt
Epoch 32/80
Epoch 00032: saving model to trained-model/training-1/cp.ckpt
Epoch 33/80
Epoch 00033: saving model to trained-model/training-1/cp.ckpt
Epoch 34/80
Epoch 00034: saving model to trained-model/training-1/cp.ckpt
Epoch 35/80
Epoch 00035: saving model to trained-model/training-1/cp.ckpt
Epoch 36/80
Epoch 00036: saving model to trained-model/training-1/cp.ckpt
Epoch 37/80
Epoch 00037: saving model to trained-model/training-1/cp.ckpt
Epoch 38/80
Epoch 00038: saving model to trained-model/training-1/cp.ckpt
Epoch 39/80
Epoch 00039: saving model to trained-model/training-1/cp.ckpt
Epoch 40/80
Epoch 00040: saving model to trained-model/training-1/cp.ckpt
Epoch 41/80
Epoch 00041: saving model to trained-model/training-1/cp.ckpt
Epoch 42/80
Epoch 00042: saving model to trained-model/training-1/cp.ckpt
Epoch 43/80
Epoch 00043: saving model 

Epoch 59/80
Epoch 00059: saving model to trained-model/training-1/cp.ckpt
Epoch 60/80
Epoch 00060: saving model to trained-model/training-1/cp.ckpt
Epoch 61/80
Epoch 00061: saving model to trained-model/training-1/cp.ckpt
Epoch 62/80
Epoch 00062: saving model to trained-model/training-1/cp.ckpt
Epoch 63/80
Epoch 00063: saving model to trained-model/training-1/cp.ckpt
Epoch 64/80
Epoch 00064: saving model to trained-model/training-1/cp.ckpt
Epoch 65/80
Epoch 00065: saving model to trained-model/training-1/cp.ckpt
Epoch 66/80
Epoch 00066: saving model to trained-model/training-1/cp.ckpt
Epoch 67/80
Epoch 00067: saving model to trained-model/training-1/cp.ckpt
Epoch 68/80
Epoch 00068: saving model to trained-model/training-1/cp.ckpt
Epoch 69/80
Epoch 00069: saving model to trained-model/training-1/cp.ckpt
Epoch 70/80
Epoch 00070: saving model to trained-model/training-1/cp.ckpt
Epoch 71/80
Epoch 00071: saving model to trained-model/training-1/cp.ckpt
Epoch 72/80
Epoch 00072: saving model 

In [117]:
score, acc = model.evaluate(X_test_cnn, y_test)
print("Accuracy: ",acc)
y_pred = np.round(model(X_test_cnn))
print("F1: ", sklearn.metrics.f1_score(y_pred, y_test))

Accuracy:  0.675000011920929
F1:  0.7167070217917676


In [119]:
model.save("trained-model/cnn_classifier")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: trained-model/cnn_classifier/assets
