In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import librosa
import sklearn
import soundfile as sf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

In [2]:
DATADIR = "data"
CATEGORIES = ["Segmented_Laugh", "Segmented_NonLaugh"]

In [3]:
n_mfcc = 3

In [4]:
training_data = []
def create_training_data():
    count=0
    error_count = 0
    #Enter time in seconds
    t_window = 1
    t_frame = 0.025
    t_shift = 0.01
    n_mfccs = n_mfcc
    laugh_counter = 0
    nonlaugh_counter = 0
    for category in CATEGORIES:
        path = os.path.join(DATADIR, category) 
        class_num = CATEGORIES.index(category)
        print('New Folder')

        for aud in os.listdir(path):
            if aud == '.DS_Store':
                continue
            
            aud_array , sr = librosa.load(os.path.join(path,aud), sr=None)
            count+=1

            mfccs = []
            
            try:
                mfcc_temp = (librosa.feature.mfcc(aud_array, sr=sr,  n_mfcc=n_mfccs,  win_length = int(sr*t_frame), hop_length = int(sr*t_shift))) 
                mfcc_delta = librosa.feature.delta(mfcc_temp)
                
                mean_mfccs = np.mean(np.asarray(mfcc_temp),axis = 1)
                var_mfccs = np.var(np.asarray(mfcc_temp), axis = 1)
                std_deltamfccs = np.std(np.asarray(mfcc_delta), axis = 1)
                
                mfccs.append(mean_mfccs)
                mfccs.append(var_mfccs)
                mfccs.append(std_deltamfccs)
                mfccs = np.asarray(mfccs).reshape(n_mfccs*3,1)
                training_data.append([mfccs.reshape(-1,1), class_num])
                if category == 'Segmented_Laugh':
                    laugh_counter +=1
                else:
                    nonlaugh_counter += 1

            except ValueError:
                print("Oops!  That was no valid number.  Try again...")
        
    print(laugh_counter, nonlaugh_counter)

In [5]:

create_training_data()
data = np.array(training_data)

New Folder
New Folder
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
Oops!  That was no valid number.  Try again...
616 604


In [6]:
# mfcc array | class
random.shuffle(training_data)

In [7]:
data[0,0].shape

(9, 1)

In [8]:

X = []
Y = []
for features, label in training_data:
    X.append(features)
    Y.append(label)

    
X = np.array(X).reshape(-1,n_mfcc*3)
#-1 corresponds to how many features we have
Y = np.array(Y)
#X = X/250
#print(X.shape)
#print(Y)
X.shape

(1220, 9)

In [273]:

X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                                                    test_size=0.7, 
                                                    random_state=101)

In [274]:
from sklearn.mixture import GaussianMixture

In [275]:
gm = GaussianMixture(n_components=2)

In [276]:
gm.fit(X_train,y_train)

GaussianMixture(covariance_type='full', init_params='kmeans', max_iter=100,
                means_init=None, n_components=2, n_init=1, precisions_init=None,
                random_state=None, reg_covar=1e-06, tol=0.001, verbose=0,
                verbose_interval=10, warm_start=False, weights_init=None)

In [277]:
y_pred = gm.predict(X_test)

In [278]:
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[372  53]
 [192 237]]
              precision    recall  f1-score   support

           0       0.66      0.88      0.75       425
           1       0.82      0.55      0.66       429

    accuracy                           0.71       854
   macro avg       0.74      0.71      0.71       854
weighted avg       0.74      0.71      0.71       854



In [257]:
correct

237

In [258]:
total

429

<function sklearn.model_selection._split.train_test_split(*arrays, **options)>