In [1]:
import librosa
import librosa.display
import IPython.display as ipd
import soundfile
import os, glob
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Emotions to observe and list of file
observedEmotions=['neutral', 'happy', 'angry']
neutral = []
happy = []
angry = []

In [3]:
# Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

# Sum data set and split
for file in glob.glob("D:\\belajar IT\Purwadhika JC Data Science\Code\\Final Project\\ravdess-emotional-speech-audio\\Actor_*\\*.wav"):
        fileName=os.path.basename(file)
        emotion=emotions[fileName.split("-")[2]]
        if emotion not in observedEmotions:
            continue
        if emotion == 'neutral':
            neutral += [file]
        elif emotion == 'happy':
            happy += [file]
        elif emotion == 'angry':
            angry += [file]

In [4]:
# Emotions in the TESS Toronto emotional speech set data

# Sum data set and split
for file in glob.glob("D:\\belajar IT\Purwadhika JC Data Science\Code\\Final Project\\TESS Toronto emotional speech set data\\*_angry\\*.wav"):
        fileName=os.path.basename(file)
        angry += [file]
        
for file in glob.glob("D:\\belajar IT\Purwadhika JC Data Science\Code\\Final Project\\TESS Toronto emotional speech set data\\*_happy\\*.wav"):
        fileName=os.path.basename(file)
        happy += [file]
        
for file in glob.glob("D:\\belajar IT\Purwadhika JC Data Science\Code\\Final Project\\TESS Toronto emotional speech set data\\*_neutral\\*.wav"):
        fileName=os.path.basename(file)
        neutral += [file]

In [5]:
# Emotions in the CREMA-D dataset

# Sum data set and split
for file in glob.glob("D:\\belajar IT\Purwadhika JC Data Science\Code\\Final Project\\cremad\\AudioWAV\\*_NEU_*.wav"):
        fileName=os.path.basename(file)
        neutral += [file]
        
for file in glob.glob("D:\\belajar IT\Purwadhika JC Data Science\Code\\Final Project\\cremad\\AudioWAV\\*_HAP_*.wav"):
        fileName=os.path.basename(file)
        happy += [file]
        
for file in glob.glob("D:\\belajar IT\Purwadhika JC Data Science\Code\\Final Project\\cremad\\AudioWAV\\*_ANG_*.wav"):
        fileName=os.path.basename(file)
        angry += [file]

In [6]:
# Emotions in the SAVEE dataset

# Sum data set and split
for file in glob.glob("D:\\belajar IT\Purwadhika JC Data Science\Code\\Final Project\\surrey-audiovisual-expressed-emotion-savee\\ALL\\*.wav"):
        fileName=os.path.basename(file)
        emotion= fileName.split("_")[1][0]
        if emotion=='a':
            angry += [file]
        elif emotion=='h':
            happy += [file]
        elif emotion=='n':
            neutral += [file]

In [7]:
# total file to observe
print(len(neutral), len(happy), len(angry))

# join all file to 1 list for feature extraction
allFile = neutral + happy + angry
print(len(allFile))

1703 1923 1923
5549


In [8]:
# Extract features (mfcc, chroma) from a sound file
def extractFeature(file):
    data , sr = librosa.load(file)
    hasil=np.array([])
    mfccs=np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)
    hasil=np.hstack((hasil, mfccs))
    stft=np.abs(librosa.stft(data))
    chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)
    hasil=np.hstack((hasil, chroma))
    return hasil

In [9]:
# check data after feature extraction

x,y=[],[]
# neutral
fileTest = neutral[0]
emotion='neutral'
feature=extractFeature(fileTest)
x.append(feature)
y.append(emotion)


dfCheck = pd.DataFrame(x)
dfCheck['emotions'] = y

dfCheck

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,emotions
0,-697.984192,55.228489,0.323863,12.775377,7.396148,0.866224,-3.550276,-2.828331,-11.305533,-2.524927,...,0.782118,0.739672,0.680501,0.683999,0.728767,0.755843,0.7463,0.744283,0.722983,neutral


In [10]:
# make dataframe for Hyper Parameter Tuning using GridSearchCV

x,y=[],[]
for i in range(0,50):
    # neutral
    fileTest = neutral[i]
    emotion='neutral'
    feature=extractFeature(fileTest)
    x.append(feature)
    y.append(emotion)
    
    # angry
    fileTest = angry[i]
    emotion='angry'
    feature=extractFeature(fileTest)
    x.append(feature)
    y.append(emotion)
    
    # happy
    fileTest = neutral[i]
    emotion='happy'
    feature=extractFeature(fileTest)
    x.append(feature)
    y.append(emotion)


dfTest = pd.DataFrame(x)
dfTest['emotions'] = y
xtr, xts, ytr, yts = train_test_split(
        dfTest[[i for i in range(0,52)]], dfTest['emotions'], test_size=0.8
    )


In [11]:
# hyperparameter tuning for MPLClassifier

penalty = ['l1', 'l2', 'elasticnet', 'none']
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
max_iter = [50, 100, 200]

param = {'penalty': penalty, 'solver': solver, 'max_iter': max_iter}

modeltest = LogisticRegression()

modelgs = GridSearchCV(
    modeltest,
    param)
modelgs.fit(xtr, ytr)

GridSearchCV(cv=None, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'max_iter': [50, 100, 200],
                         'penalty': ['l1', 'l2', 'elasticnet', 'none'],
                         'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag',
                                    'saga']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=Non

In [12]:
modelgs.best_params_

{'max_iter': 50, 'penalty': 'l2', 'solver': 'lbfgs'}

In [13]:
# Load the data and extract features for each sound file
def loadData(testSize=0.2):
    x,y=[],[]
    for file in neutral:
            try:
                emotion='neutral' # 01 for neutral emotion
                feature=extractFeature(file)
                x.append(feature)
                y.append(emotion)
            except:
                continue
        
    for file in angry:
        try:
            emotion='angry' # 01 for neutral emotion
            feature=extractFeature(file)
            x.append(feature)
            y.append(emotion)
        except:
            continue
    for file in happy:
        try:
            emotion='happy' # 01 for neutral emotion
            feature=extractFeature(file)
            x.append(feature)
            y.append(emotion)
        except:
            continue
    df = pd.DataFrame(x)
    df['emotions'] = y
    xtr, xts, ytr, yts = train_test_split(
        df[[i for i in range(0,52)]], df['emotions'], test_size=testSize
    )
    return xtr, xts, ytr, yts

In [14]:
# Split the dataset
xtr,xts,ytr,yts=loadData(testSize=0.2)

In [15]:
# Get the shape of the training and testing datasets
print((xtr.shape[0], xts.shape[0]))

(4439, 1110)


In [16]:
# Get the number of features extracted
print(f'Features extracted: {xtr.shape[1]}')

Features extracted: 52


In [17]:
# Making model
model = LogisticRegression(max_iter= 50, penalty= 'l2', solver= 'lbfgs')

In [18]:
# Train the model
model.fit(xtr,ytr)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=50,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [19]:
# Predict for the test data
yPred=model.predict(xts)

In [20]:
# Calculate the accuracy of model
accuracy=accuracy_score(y_true=yts, y_pred=yPred)
print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 64.95%


In [21]:
print(classification_report(yts, yPred))

              precision    recall  f1-score   support

       angry       0.68      0.69      0.68       394
       happy       0.59      0.51      0.55       388
     neutral       0.68      0.76      0.72       328

    accuracy                           0.65      1110
   macro avg       0.65      0.66      0.65      1110
weighted avg       0.65      0.65      0.65      1110

