In [1]:
import os,librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold
from tqdm import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import optimizers as opt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import losses
import pandas as pd
import seaborn as sns
from sklearn.metrics import precision_score,recall_score,f1_score, cohen_kappa_score

In [2]:
seed=15
n_MFCC=13
column_names= ['MFCC_'+str(i) for i in range(1,n_MFCC+1)]

In [3]:
DATA_PATH="./Vowel_Data/Vowel/"

In [4]:
def get_mfcc(file_path):
    wave, sr= librosa.load(file_path, mono=True, sr=None)
    mfcc=librosa.feature.mfcc(wave,sr=sr,n_mfcc=n_MFCC)
    mfcc_df=pd.DataFrame(mfcc.T,columns=column_names) #convert array to pandas df
    return mfcc_df

def get_all_mfcc(path=DATA_PATH):
    labels=sorted(os.listdir(path))
    all_mfcc=pd.DataFrame() #empty dataframe to store all_mfccs
    
    for label_index,label in enumerate(labels):
        folder_mfcc=pd.DataFrame() #empty dataframe to store mfcc of indivuals vowels/words i.e. folders

        speech_files=[path + label + '/' + file for file in sorted(os.listdir(path+'/'+label))]
        for file in tqdm(speech_files,"Reading Speech of label -'{}'".format(label)):
            #print(file)
            individual_mfcc=get_mfcc(file_path=file)
            folder_mfcc=folder_mfcc.append(individual_mfcc,ignore_index=True)
        
        folder_mfcc['label']=label_index  #new columns for encoding label
        
        all_mfcc=all_mfcc.append(folder_mfcc,ignore_index=True)
        
    return all_mfcc

def get_train_test(split_ratio=0.8,random_state=42):
    all_mfccs=get_all_mfcc()
    
    y=all_mfccs['label'].values
    
    only_mfcc=all_mfccs.drop(labels=['label'],axis=1)
    standard_mfcc=(only_mfcc-only_mfcc.mean())/only_mfcc.std()
    X=standard_mfcc.values

    assert X.shape[0] == len(y)
    return X,y

In [5]:
X,y=get_train_test()

Reading Speech of label -'vowel1': 100%|██████████| 40/40 [00:00<00:00, 42.96it/s]
Reading Speech of label -'vowel2': 100%|██████████| 40/40 [00:00<00:00, 101.50it/s]
Reading Speech of label -'vowel3': 100%|██████████| 40/40 [00:00<00:00, 95.53it/s]
Reading Speech of label -'vowel4': 100%|██████████| 40/40 [00:00<00:00, 110.66it/s]
Reading Speech of label -'vowel5': 100%|██████████| 40/40 [00:00<00:00, 103.53it/s]
Reading Speech of label -'vowel6': 100%|██████████| 40/40 [00:00<00:00, 113.76it/s]
Reading Speech of label -'vowel7': 100%|██████████| 40/40 [00:00<00:00, 114.34it/s]


In [19]:
model_v.metrics_names[2]

'auc'

In [20]:
kfold=StratifiedKFold(n_splits=4,shuffle=True,random_state=seed)
cv_acc=[]
cv_AUC=[]
#cv_precision=[]
#cv_recall=[]
cv_f1=[]
cv_kappa=[]

for train,test in kfold.split(X,y):
    y_hot=to_categorical(y)
    
    model_v=Sequential()
    model_v.add(Dense(32, activation='tanh',input_shape=(X[train].shape[1],)))
    model_v.add(Dense(16, activation='tanh'))
    model_v.add(Dense(7, activation='softmax'))
    
    model_v.compile(
        #optimizer=opt.SGD(learning_rate=0.005,momentum=0.9),
        optimizer=opt.Adam(learning_rate=0.005),
        #loss=losses.CategoricalCrossentropy(),
        loss='categorical_crossentropy',
        #metrics=['AUC'],
        #metrics=['accuracy']#,
        metrics=[['accuracy','AUC']]
    )
    model_v.fit(X[train], y_hot[train], epochs=50, batch_size=X[train].shape[0],verbose=0)
    
    scores=model_v.evaluate(X[test],y_hot[test],verbose=0)
    
    print("%s: %.2f" % (model_v.metrics_names[1],scores[1]))
    print("%s: %.2f" % (model_v.metrics_names[2],scores[2]))
    
    yhat_classes = model_v.predict_classes(X[test], verbose=0)
    
    #precision = precision_score(y[test], yhat_classes,average='weighted')*100
    #print('Precision: %.2f%%' % precision)
    
    #recall = recall_score(y[test], yhat_classes,average='weighted')*100
    #print('Recall: %.2f%%' % recall)
    
    f1 = f1_score(y[test], yhat_classes,average='weighted')
    print('F1: %.2f' % f1)
    
    kappa = cohen_kappa_score(y[test], yhat_classes)
    print('Cohens kappa: %.2f' % kappa)
    
    cv_acc.append(scores[1])
    cv_AUC.append(scores[2])
    #cv_precision.append(precision)
    #cv_recall.append(recall)
    cv_f1.append(f1)
    cv_kappa.append(kappa)
    
    print(' ')
    
print("Overall acc: %.2f (+/- %.2f)" % (np.mean(cv_acc),np.std(cv_acc)))
print("Overall AUC: %.2f (+/- %.2f)" % (np.mean(cv_AUC),np.std(cv_AUC)))
#print("Overall precision: %.2f%% (+/- %.2f%%)" % (np.mean(cv_precision),np.std(cv_precision)))
#print("Overall recall: %.2f%% (+/- %.2f%%)" % (np.mean(cv_recall),np.std(cv_recall)))
print("Overall f1: %.2f (+/- %.2f)" % (np.mean(cv_f1),np.std(cv_f1)))
print("Overall kappa: %.2f (+/- %.2f)" % (np.mean(cv_kappa),np.std(cv_kappa)))

accuracy: 0.73
auc: 0.96
F1: 0.72
Cohens kappa: 0.69
 
accuracy: 0.75
auc: 0.96
F1: 0.75
Cohens kappa: 0.71
 
accuracy: 0.73
auc: 0.96
F1: 0.72
Cohens kappa: 0.69
 
accuracy: 0.74
auc: 0.96
F1: 0.73
Cohens kappa: 0.69
 
Overall acc: 0.74 (+/- 0.01)
Overall AUC: 0.96 (+/- 0.00)
Overall f1: 0.73 (+/- 0.01)
Overall kappa: 0.69 (+/- 0.01)


In [44]:
yhat_classes = model_v.predict_classes(X[test], verbose=0)

In [47]:
yhat_probs = model_v.predict(X[test], verbose=0)

In [50]:
precision = precision_score(y[test], yhat_classes,average='weighted')
print('Precision: %f' % precision)

Precision: 0.729089


In [51]:
kappa = cohen_kappa_score(y[test], yhat_classes)
print('Cohens kappa: %f' % kappa)

Cohens kappa: 0.689937


In [15]:
cv_data_df=pd.DataFrame(data={'acc':cv_acc,
                              'AUC':cv_AUC,
                              'F1':cv_f1,
                              'Kappa':cv_kappa}
                       )

In [16]:
cv_data_df

Unnamed: 0,x,y
0,0.746602,0.708625
1,0.730126,0.69087
2,0.734819,0.703409
3,0.750483,0.71393


In [30]:
history_w=model_w.fit(X_train, y_train_hot, epochs=50, batch_size=X_train.shape[0], validation_data=(X_test,y_test_hot))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [21]:
save_specifier='standard_mfcc_32_16_tanh_softmax_adam_0.005_batchfull'

In [67]:
all_compare_df.tail()

Unnamed: 0,loss_v,val_loss_v,acc_v,val_acc_v,auc_v,val_auc_v,loss_w,val_loss_w,acc_w,val_acc_w,auc_w,val_auc_w
45,1.087982,1.079178,0.578892,0.576316,0.914252,0.91413,1.465496,1.461363,0.433841,0.435846,0.808475,0.810116
46,1.073246,1.063786,0.580471,0.586316,0.916134,0.91638,1.46093,1.456924,0.435345,0.437049,0.809607,0.811218
47,1.058198,1.051638,0.585735,0.577368,0.918239,0.917834,1.456578,1.452782,0.435445,0.443865,0.81085,0.812264
48,1.04317,1.040124,0.585867,0.578421,0.920454,0.919898,1.452407,1.448659,0.439755,0.447875,0.812117,0.813237
49,1.030735,1.028717,0.590604,0.581053,0.922445,0.921268,1.448298,1.44454,0.444968,0.446672,0.813397,0.814203


In [45]:
all_compare_df.tail()

Unnamed: 0,loss_v,val_loss_v,acc_v,val_acc_v,auc_v,val_auc_v,loss_w,val_loss_w,acc_w,val_acc_w,auc_w,val_auc_w
45,0.747012,0.753977,0.741413,0.727895,0.960994,0.959738,1.281573,1.291772,0.509824,0.50401,0.859364,0.856465
46,0.740706,0.747679,0.742203,0.727895,0.961514,0.96033,1.277932,1.287836,0.511628,0.503609,0.860304,0.857371
47,0.734629,0.741603,0.743387,0.728421,0.96197,0.96081,1.274361,1.283999,0.512831,0.503609,0.861195,0.858353
48,0.728761,0.735736,0.745493,0.731053,0.962445,0.961305,1.270858,1.280256,0.513031,0.506816,0.862042,0.859321
49,0.723083,0.730062,0.745756,0.734211,0.962883,0.96181,1.26742,1.276603,0.514635,0.510024,0.862903,0.860176
