In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import librosa
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
import pandas
import seaborn as sns
import csv
import itertools
import pandas as pd
from scipy.special import comb

In [None]:
DATADIR = "data"
CATEGORIES = ["Segmented_Laugh", "Segmented_NonLaugh"]

In [None]:
def createcombinations(n):
    elements = [0,1,2,3,4,5]
    a = list(itertools.combinations(elements, n))
    a = np.asarray(a).astype(int)
    return a

In [None]:
def create_training_data(t_frame, t_shift, n_mfccs, component, training_data, n_features):
    count=0
    error_count = 0
    laugh_counter = 0
    nonlaugh_counter = 0
    for category in CATEGORIES:
        path = os.path.join(DATADIR, category) 
        class_num = CATEGORIES.index(category)
        for aud in os.listdir(path):
            if aud == '.DS_Store':
                continue
            
            aud_array , sr = librosa.load(os.path.join(path,aud), sr=None)
            count+=1

            mfccs = []
            
            try:
                mfcc = (librosa.feature.mfcc(aud_array, sr=sr,  n_mfcc=6,  win_length = int(sr*t_frame), hop_length = int(sr*t_shift))) 
                mfcc_temp = mfcc[component,:]
                mean_mfccs = np.mean(np.asarray(mfcc_temp),axis = 1)
                var_mfccs = np.var(np.asarray(mfcc_temp), axis = 1)
                
                mfccs.append(mean_mfccs)
                mfccs.append(var_mfccs)
                mfccs = np.asarray(mfccs).reshape(n_mfccs*n_features,1)
                
                training_data.append([mfccs.reshape(-1,1), class_num])
                if category == 'Segmented_Laugh':
                    laugh_counter +=1
                else:
                    nonlaugh_counter += 1

            except ValueError:
                pass
    

In [None]:
def createaccuracy(file, n_iterations):
    with open('data/knn/knn' + str(file) + '.csv', 'r') as f:
        file_lines = f.readlines()

    keymap =  pd.DataFrame([string.split(',') for string in file_lines])
    keymap_np = keymap.to_numpy()

    row_list = [["MFCCs", "Mean_Accuracy", "STD_Accuracy"]]
    for i in range(int(comb(6, file , exact=False))):
        accuracy = []
        for j in range(n_iterations):
            accuracy.append(float(keymap_np[n_iterations*i+1+j,5]))
    
        mean_accuracy = np.mean(np.asarray(accuracy))
        std_accuracy = np.std(np.asarray(accuracy))
    
        row_list.append([keymap_np[n_iterations*i+1,0], mean_accuracy, std_accuracy])

    with open('data/knn/knnAccuracy' + str(file) + '.csv', 'w', newline='') as newfile:
        writer = csv.writer(newfile)
        writer.writerows(row_list)
    

In [6]:
#Choose these parameters. Calculates results for combinations of mfccs, from 0-9, taken n_mfcc at a time.
n_iterations = 10
for n_mfcc in range(2,7):
    print("n_mfcc = ", n_mfcc)
    
    n_features = 2
    component = createcombinations(n_mfcc)
    t_frame = 0.025
    t_shift = 0.01
    
    row_list = [["MFCCs", "Actual(0)Predicted(0)", "Actual(0)Predicted(1)", "Actual(1)Predicted(0)", "Actual(1)Predicted(1)","Accuracy","(0) f1", "(0) Precision", "(0) Recall","(1) f1", "(1) Precision", "(1) Recall", "Number of Entries (0)", "Number of Entries (1)",  ]]
    
    for components in component:
        #print("For Components ", components)
        #print("Loading Data....")
        training_data = []
        create_training_data(t_frame, t_shift, n_mfcc, components, training_data, n_features)
    
        #print("PreProcessing Data....")
        random.shuffle(training_data) 
        X = []
        Y = []
        for features, label in training_data:
            X.append(features)
            Y.append(label)
        
        X = np.array(X).reshape(-1,n_mfcc*n_features)
        Y = np.array(Y)
        X = X/100
    
        #print("Running Classifier....")
        for i in range(n_iterations):
            X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1) 
            from sklearn.neighbors import KNeighborsClassifier
            knn = KNeighborsClassifier()
            knn.fit(X_train,y_train)
            y_pred = knn.predict(X_test)
    
            cmatrix = confusion_matrix(y_test,y_pred, labels=[0,1])
            report = classification_report(y_test, y_pred, output_dict=True)
            df = pandas.DataFrame(report).transpose()
            creport = df.to_numpy()
    
            row_list.append([components, cmatrix[0,0], cmatrix[0,1], cmatrix[1,0], cmatrix[1,1], creport[2,1], creport[0,0], creport[0,1], creport[0,2], creport[1,0], creport[1,1], creport[1,2],  creport[0,3], creport[1,3]]) 
        #print(" ")
    
    #Writing your results into a file
    with open('data/knn/knn' + str(n_mfcc) + '.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerows(row_list)

    createaccuracy(n_mfcc, n_iterations)  

n_mfcc =  2
n_mfcc =  3
n_mfcc =  4
n_mfcc =  5
n_mfcc =  6
