# Decoding EEG Data of Imagined Speech - Jonathan Buckingham

In [None]:
#Import relevant libraries.
import numpy as np
import os
import re
import csv
import string
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import svm
from sklearn import metrics
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.decomposition import PCA
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score
import seaborn as sns
sns.set()
sns.set_style("ticks")
sns.set_context("poster")
sns.set_palette("husl")
#This library is used to calculate the MFCCs.
from python_speech_features import mfcc
import scipy.io.wavfile as wav
#This widens the display, if so desired.
from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))
display(HTML("<style>div.output_scroll { height: 70em; }</style>"))

In [None]:
def csv_x_y(participant):
    '''Converts subject eeg data from csv files to numpy arrays and creates label arrays.'''
    with open(os.getcwd()+'/Dissertation_ML/epochs.csv') as f:
        reader = csv.reader(f)
        epochs_ = []
        epochs = list(reader)
        for i in range(len(epochs)):
            epochs_.append(epochs[i][0])
        epochs = epochs_
    df = pd.read_csv(os.getcwd()+'/Dissertation_ML/processed/Imagined/'+ participant +'.csv')
    df['Epoch'] = epochs
    X = np.array(list(df.groupby('Epoch').apply(pd.DataFrame.to_numpy)))
    X = X[:,:,:14]
    y = np.loadtxt(os.getcwd()+'/Dissertation_ML/target.csv', dtype=object)
    # Finds relevant semantic/syllabic groups.
    y_sem, y_leng = categoriser(y)
    return X, y, y_sem, y_leng

def categoriser(y):
    '''Creates label arrays for semantic and syllable categories.'''
    semantic = ['fruit','animal','outdoor','social']
    length = ['one','one','two','three','four']
    paradigm = np.array([['date','juice','berry','gooseberry','watermelon'],
              ['goose','moose','hedgehog','waterfowl','caterpillar'],
              ['spruce','night','water','wilderness','environment'],
              ['knight','queen','daughter','relative','ambassador']])
    y_sem = []
    y_leng = []
    for word in y:
        for i in range(5):
            if word in paradigm[:,i]:
                y_leng.append(length[i])
        for i in range(4):
            if word in paradigm[i]:
                y_sem.append(semantic[i])
    y_sem = np.array(y_sem)
    y_leng = np.array(y_leng)
    return y_sem, y_leng

def dueller(X, y, choices):
    '''Extracts data from only to 2-4 chosen classes.'''
    if len(choices) == 2:
        X = X[np.where((y==choices[0]) | (y==choices[1]))][:][:]
        y = y[np.where((y==choices[0]) | (y==choices[1]))]
    if len(choices) == 3:
        X = X[np.where((y==choices[0]) | (y==choices[1]) | (y==choices[2]))][:][:]
        y = y[np.where((y==choices[0]) | (y==choices[1]) | (y==choices[2]))]        
    if len(choices) == 4:
        X = X[np.where((y==choices[0]) | (y==choices[1]) | (y==choices[2]) | (y==choices[3]))][:][:]
        y = y[np.where((y==choices[0]) | (y==choices[1]) | (y==choices[2]) | (y==choices[3]))]
    else:
        pass
    return X, y

def tt_split(X,y):
    '''Splits data into 20:80 test_set:training_set.'''
    train_indices, test_indices = train_test_split(list(range(len(y))),
                                                   test_size=0.2,
                                                   random_state=1)
    X_train = X[train_indices,:,:]
    X_test = X[test_indices,:,:]
    y_train = y[train_indices]
    y_test = y[test_indices]
    return X_train, X_test, y_train, y_test

def clf_mfcc(X_train, X_test, y_train, y_test, window_len, window_step, num_cep):
    '''Performs classification with grid search and cross-validation on MFCC feature sets.'''
    #Calculates MFCCs on test and training sets seperately.
    X_train, y_train = mfcc_calc_old(X_train, y_train, window_len, window_step, num_cep)
    X_test, y_test = mfcc_calc_old(X_test, y_test, window_len, window_step, num_cep)
    #Conducts PCA to reduce the dimension.
    pca = PCA(.95, random_state = 1)
    pca.fit(X_train)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    return X_train, X_test, y_train, y_test

def model_runner(X_train, X_test, y_train, y_test, model):
    '''Fits chosen model to training data, generates predictions on test data, and calculates accuracy score.'''
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    #Calculates accuracy
    acc = accuracy_score(y_test, y_pred)
    return acc

def mfcc_calc_old(x, y, winlen_, winstep_,num_cep_):
    '''Calculates 13 coefficients per channel per event window.'''
    epoch_tracker = []
    for i in range(len(y)):
        for j in range(14):
            array_current = x[i,:,j]
            mfcc_feat = mfcc(array_current,
                             samplerate=128,
                             winlen=winlen_, 
                             winstep=winstep_,
                             numcep=num_cep_, 
                            )
            mfcc_dataframe = pd.DataFrame(mfcc_feat)
            if j == 0:
                mfcc_all = mfcc_dataframe
            else:
                mfcc_all = pd.concat([mfcc_all, mfcc_dataframe], axis=1, join="inner")
        for k in range(mfcc_all.shape[0]):
            epoch_tracker.append(y[i])
        if i == 0:
            arr_mfcc = mfcc_all
        else:
            arr_mfcc = pd.concat([arr_mfcc, mfcc_all], axis=0, join="outer")
    arrr = arr_mfcc.to_numpy()                        
    epochs = np.array(epoch_tracker)
    return arrr, epochs

def linear_features(X, y):
    '''Extracts six linear features from a subject eeg data arrays.'''
    lf_list = [np.mean, np.median, np.max, np.min, np.var, np.sum]
    X_lf = np.empty((len(y),14*len(lf_list)))
    for i in range(len(y)):
        for k in range(len(lf_list)):
            X_lf[i,k*14:(k*14)+14] = lf_list[k](X[i], axis=0)
    #Runs PCA
    pca = PCA(.95)
    pca.fit(X_lf)
    X_lf = pca.transform(X_lf)
    return X_lf

def clf_lf(X, y, p, opt):
    '''Performs classification with grid search and cross-validation on linear feature sets.'''
    r_list = []
    X_lf = linear_features(X, y)
    kf = KFold(n_splits=4, random_state=98, shuffle=True)

    svc = svm.SVC(random_state=98)
    if opt == True:
        parameters_svm = {'kernel':('sigmoid', 'rbf'),
                          'C':[0.01, 0.1, 1, 10],
                          'gamma': [0.001,0.01,0.1],
                          'decision_function_shape': ('ovo','ovr')}
    if opt == False:
        parameters_svm = {'kernel':('sigmoid', 'rbf'),
                          'C':[0.01, 0.1, 1, 10],
                          'gamma': [0.001,0.01,0.1]}
    grid_svm = GridSearchCV(svc, parameters_svm, cv=kf, scoring='accuracy')
    grid_svm.fit(X_lf,y)
    r_list.append((p,
                   grid_svm.best_params_,
                   grid_svm.best_score_,
                   grid_svm.cv_results_['std_test_score'][grid_svm.best_index_]))
    
    rf = RandomForestClassifier(random_state=98)
    parameters_rf = {'max_depth': [80, 100, 120],
                  'n_estimators': [200, 600, 1000],
                  'criterion': ('gini','entropy')}
    grid_rf = GridSearchCV(rf, parameters_rf, cv=kf, scoring='accuracy')
    grid_rf.fit(X_lf,y)
    r_list.append((p,
                    grid_rf.best_params_,
                    grid_rf.best_score_,
                    grid_rf.cv_results_['std_test_score'][grid_rf.best_index_]))  
    return r_list

In [None]:
#Uses functions to create arrays of EEG data and labels for each participant.
participants = ['alpha','bravo','charlie','delta','echo','foxtrot','golf','hotel']
p_data = []
for participant in participants:
    p_data.append(csv_x_y(participant))
#Choice of words and types to examine.
fruit = ['date','juice','berry','gooseberry','watermelon']
animal = ['goose','moose','hedgehog','waterfowl','caterpillar']
outdoor = ['spruce','night','water','wilderness','environment']
social = ['knight','queen','daughter','relative','ambassador']
semantic = ['fruit','animal','outdoor','social']
syllable = ['one','two','three','four']
rhyme = ['moose','goose','spruce','juice']

In [None]:
#Models and hyper-parameters to examine. Loops to be used for MFCC feature sets as alternate to grid search.
c_p = [0.1, 1, 10]
g_p = [0.001,0.01,0.1]
k_p = ['rbf','sigmoid']
max_d = [80, 100, 120]
n_est = [200, 600, 1000]
crit = ['gini','entropy']
models_svm = []
models_rf = []
models_svm.append(SVC(random_state=1))
for c in c_p:
    for g in g_p:
        models_svm.append(SVC(C=c, gamma=g, random_state=1))
models_rf.append(RandomForestClassifier(random_state=1))
for m in max_d:
    for n in n_est:
        for c in crit:
            models_rf.append(RandomForestClassifier(max_depth=m, n_estimators=n, criterion=c, random_state=1))

#Using OnevsOne on SVM.
models_opt = []
models_opt.append(SVC(class_weight='balanced', random_state=1))
for k in k_p:
    for c in c_p:
        for g in g_p:
            models_opt.append(SVC(C=c, gamma=g, decision_function_shape='ovo', random_state=1))

### Confusion Matrices used in the Dissertation

In [None]:
#Confusion matrix for best model on mix of words from syllabic groups.
label_choice = ['queen', 'goose', 'watermelon', 'environment']
rf = RandomForestClassifier(random_state=98, criterion = 'entropy', max_depth = 80, n_estimators = 200)
kf = KFold(n_splits=4, random_state=98, shuffle=True)
X, y, y_sem, y_leng  = p_data[4]
X, y = dueller(X, y, label_choice)
X = linear_features(X, y)
pca = PCA(.95)
pca.fit(X)
X = pca.transform(X)
cm_list = []
for train_ind, test_ind in kf.split(X, y):
    X_train, X_test, y_train, y_test = X[train_ind], X[test_ind], y[train_ind], y[test_ind]
    rf.fit(X_train,y_train)
    y_pred = rf.predict(X_test)
    cm_list.append(confusion_matrix(y_test, y_pred, labels=label_choice))
cm_all = sum(cm_list)
ConfusionMatrixDisplay(cm_all, display_labels = label_choice).plot(xticks_rotation = 45, colorbar = False, cmap = 'Blues')

In [None]:
#Confusion matrix for best model on mix of words from semantic groups.
rf = RandomForestClassifier(random_state=98, criterion = 'gini', max_depth = 80, n_estimators = 200)
kf = KFold(n_splits=4, random_state=98, shuffle=True)
X, y, y_sem, y_leng  = p_data[4]
X, y = dueller(X, y, ['watermelon', 'gooseberry','waterfowl', 'caterpillar'])
X = linear_features(X, y)
pca = PCA(.95)
pca.fit(X)
X = pca.transform(X)
cm_list = []
for train_ind, test_ind in kf.split(X, y):
    X_train, X_test, y_train, y_test = X[train_ind], X[test_ind], y[train_ind], y[test_ind]
    rf.fit(X_train,y_train)
    y_pred = rf.predict(X_test)
    cm_list.append(confusion_matrix(y_test, y_pred, labels=['watermelon', 'gooseberry','waterfowl', 'caterpillar']))
cm_all = sum(cm_list)
ConfusionMatrixDisplay(cm_all, display_labels = ['watermelon', 'gooseberry','waterfowl', 'caterpillar']).plot(xticks_rotation = 45, colorbar = False, cmap = 'Blues')

### Classification of MFCC Features.

These first results relate to classification based on semantic properties.

In [None]:
#Semantic groups classification - MFCC.
print('Semantic Groups')
sem_rfmean = np.zeros((8,len(models_rf)))
sem_rfstd = np.zeros((8,len(models_rf)))
sem_svmmean = np.zeros((8,len(models_opt)))
sem_svmstd = np.zeros((8,len(models_opt)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_opt), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y_sem):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y_sem[train_ind], y_sem[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_opt)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_opt[m])
        fold+=1
    for m in range(len(models_opt)):
        sem_svmmean[p][m] += np.mean(svm_arr[m])
        sem_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        sem_rfmean[p][m] += np.mean(rf_arr[m])
        sem_rfstd[p][m] += np.std(rf_arr[m])
sem_rf_mean = []
sem_svm_mean = []
sem_rf_std = []
sem_svm_std = []
for p in range(len(p_data)):
    sem_rf_mean.append(np.max(sem_rfmean[p]))
    sem_svm_mean.append(np.max(sem_svmmean[p]))
    sem_rf_std.append(sem_rfstd[p][np.argmax(sem_rfmean[p])])
    sem_svm_std.append(sem_svmstd[p][np.argmax(sem_svmmean[p])])
print('Best means from RF: \n',sem_rf_mean)
print('Best means from SVM: \n',sem_svm_mean)
print('Std of mean from RF: \n',sem_rf_std)
print('Std of mean from SVM: \n',sem_svm_std)

#Fruits classification - MFCC.
print('Fruits')
fruit_rfmean = np.zeros((8,len(models_rf)))
fruit_rfstd = np.zeros((8,len(models_rf)))
fruit_svmmean = np.zeros((8,len(models_svm)))
fruit_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['date', 'juice', 'watermelon', 'gooseberry'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        fruit_svmmean[p][m] += np.mean(svm_arr[m])
        fruit_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        fruit_rfmean[p][m] += np.mean(rf_arr[m])
        fruit_rfstd[p][m] += np.std(rf_arr[m])
fruit_rf_mean = []
fruit_svm_mean = []
fruit_rf_std = []
fruit_svm_std = []
for p in range(len(p_data)):
    fruit_rf_mean.append(np.max(fruit_rfmean[p]))
    fruit_svm_mean.append(np.max(fruit_svmmean[p]))
    fruit_rf_std.append(fruit_rfstd[p][np.argmax(fruit_rfmean[p])])
    fruit_svm_std.append(fruit_svmstd[p][np.argmax(fruit_svmmean[p])])
print('Best means from RF: \n',fruit_rf_mean)
print('Best means from SVM: \n',fruit_svm_mean)
print('Std of mean from RF: \n',fruit_rf_std)
print('Std of mean from SVM: \n',fruit_svm_std)


#Animals classification - MFCC.
print('Animals')
animal_rfmean = np.zeros((8,len(models_rf)))
animal_rfstd = np.zeros((8,len(models_rf)))
animal_svmmean = np.zeros((8,len(models_svm)))
animal_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['goose', 'hedgehog', 'waterfowl', 'caterpillar'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m]))
        fold+=1
    for m in range(len(models_svm)):
        animal_svmmean[p][m] += np.mean(svm_arr[m])
        animal_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        animal_rfmean[p][m] += np.mean(rf_arr[m])
        animal_rfstd[p][m] += np.std(rf_arr[m])
animal_rf_mean = []
animal_svm_mean = []
animal_rf_std = []
animal_svm_std = []
for p in range(len(p_data)):
    animal_rf_mean.append(np.max(animal_rfmean[p]))
    animal_svm_mean.append(np.max(animal_svmmean[p]))
    animal_rf_std.append(animal_rfstd[p][np.argmax(animal_rfmean[p])])
    animal_svm_std.append(animal_svmstd[p][np.argmax(animal_svmmean[p])])
print('Best means from RF: \n',animal_rf_mean)
print('Best means from SVM: \n',animal_svm_mean)
print('Std of mean from RF: \n',animal_rf_std)
print('Std of mean from SVM: \n',animal_svm_std)


#Two fruits and two animals classification - MFCC.
print('2 Fruits 2 Animals')
a2f2_rfmean = np.zeros((8,len(models_rf)))
a2f2_rfstd = np.zeros((8,len(models_rf)))
a2f2_svmmean = np.zeros((8,len(models_svm)))
a2f2_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['watermelon', 'gooseberry', 'waterfowl', 'caterpillar'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        a2f2_svmmean[p][m] += np.mean(svm_arr[m])
        a2f2_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        a2f2_rfmean[p][m] += np.mean(rf_arr[m])
        a2f2_rfstd[p][m] += np.std(rf_arr[m])
a2f2_rf_mean = []
a2f2_svm_mean = []
a2f2_rf_std = []
a2f2_svm_std = []
for p in range(len(p_data)):
    a2f2_rf_mean.append(np.max(a2f2_rfmean[p]))
    a2f2_svm_mean.append(np.max(a2f2_svmmean[p]))
    a2f2_rf_std.append(a2f2_rfstd[p][np.argmax(a2f2_rfmean[p])])
    a2f2_svm_std.append(a2f2_svmstd[p][np.argmax(a2f2_svmmean[p])])
print('Best means from RF: \n',a2f2_rf_mean)
print('Best means from SVM: \n',a2f2_svm_mean)
print('Std of mean from RF: \n',a2f2_rf_std)
print('Std of mean from SVM: \n',a2f2_svm_std)


#Homophones classification - MFCC.
print('Homophones')
kni_rfmean = np.zeros((8,len(models_rf)))
kni_rfstd = np.zeros((8,len(models_rf)))
kni_svmmean = np.zeros((8,len(models_svm)))
kni_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['knight', 'night'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            rf_arr[m][fold] += model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            print(model_runner(X_train, X_test, y_train, y_test, models_rf[m]))
        for m in range(len(models_svm)):
            svm_arr[m][fold] += model_runner(X_train, X_test, y_train, y_test, models_svm[m])
            print(model_runner(X_train, X_test, y_train, y_test, models_svm[m]))
        fold+=1
    for m in range(len(models_svm)):
        kni_svmmean[p][m] += np.mean(svm_arr[m])
        kni_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        kni_rfmean[p][m] += np.mean(rf_arr[m])
        kni_rfstd[p][m] += np.std(rf_arr[m])
kni_rf_mean = []
kni_svm_mean = []
kni_rf_std = []
kni_svm_std = []
for p in range(len(p_data)):
    kni_rf_mean.append(np.max(kni_rfmean[p]))
    kni_svm_mean.append(np.max(kni_svmmean[p]))
    kni_rf_std.append(kni_rfstd[p][np.argmax(kni_rfmean[p])])
    kni_svm_std.append(kni_svmstd[p][np.argmax(kni_svmmean[p])])
print('Best means from RF: \n',kni_rf_mean)
print('Best means from SVM: \n',kni_svm_mean)
print('Std of mean from RF: \n',kni_rf_std)
print('Std of mean from SVM: \n',kni_svm_std)

These next results relate to classification based on rhymes.

In [None]:
#Rhyme classification - MFCC.
print('spruce/goose/moose')
rhyme1_rfmean = np.zeros((8,len(models_rf)))
rhyme1_rfstd = np.zeros((8,len(models_rf)))
rhyme1_svmmean = np.zeros((8,len(models_svm)))
rhyme1_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['spruce','goose','moose'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        rhyme1_svmmean[p][m] += np.mean(svm_arr[m])
        rhyme1_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        rhyme1_rfmean[p][m] += np.mean(rf_arr[m])
        rhyme1_rfstd[p][m] += np.std(rf_arr[m])
rhyme1_rf_mean = []
rhyme1_svm_mean = []
rhyme1_rf_std = []
rhyme1_svm_std = []
for p in range(len(p_data)):
    rhyme1_rf_mean.append(np.max(rhyme1_rfmean[p]))
    rhyme1_svm_mean.append(np.max(rhyme1_svmmean[p]))
    rhyme1_rf_std.append(rhyme1_rfstd[p][np.argmax(rhyme1_rfmean[p])])
    rhyme1_svm_std.append(rhyme1_svmstd[p][np.argmax(rhyme1_svmmean[p])])
print('Best means from RF: \n',rhyme1_rf_mean)
print('Best means from SVM: \n',rhyme1_svm_mean)
print('Std of mean from RF: \n',rhyme1_rf_std)
print('Std of mean from SVM: \n',rhyme1_svm_std)

print('spruce/goose/juice')
rhyme2_rfmean = np.zeros((8,len(models_rf)))
rhyme2_rfstd = np.zeros((8,len(models_rf)))
rhyme2_svmmean = np.zeros((8,len(models_svm)))
rhyme2_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    print(p)
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['spruce','goose','juice'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        rhyme2_svmmean[p][m] += np.mean(svm_arr[m])
        rhyme2_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        rhyme2_rfmean[p][m] += np.mean(rf_arr[m])
        rhyme2_rfstd[p][m] += np.std(rf_arr[m])
rhyme2_rf_mean = []
rhyme2_svm_mean = []
rhyme2_rf_std = []
rhyme2_svm_std = []
for p in range(len(p_data)):
    rhyme2_rf_mean.append(np.max(rhyme2_rfmean[p]))
    rhyme2_svm_mean.append(np.max(rhyme2_svmmean[p]))
    rhyme2_rf_std.append(rhyme2_rfstd[p][np.argmax(rhyme2_rfmean[p])])
    rhyme2_svm_std.append(rhyme2_svmstd[p][np.argmax(rhyme2_svmmean[p])])
print('Best means from RF: \n',rhyme2_rf_mean)
print('Best means from SVM: \n',rhyme2_svm_mean)
print('Std of mean from RF: \n',rhyme2_rf_std)
print('Std of mean from SVM: \n',rhyme2_svm_std)

print('moose/spruce/juice')
rhyme3_rfmean = np.zeros((8,len(models_rf)))
rhyme3_rfstd = np.zeros((8,len(models_rf)))
rhyme3_svmmean = np.zeros((8,len(models_svm)))
rhyme3_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['moose','spruce','juice'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        rhyme3_svmmean[p][m] += np.mean(svm_arr[m])
        rhyme3_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        rhyme3_rfmean[p][m] += np.mean(rf_arr[m])
        rhyme3_rfstd[p][m] += np.std(rf_arr[m])
rhyme3_rf_mean = []
rhyme3_svm_mean = []
rhyme3_rf_std = []
rhyme3_svm_std = []
for p in range(len(p_data)):
    rhyme3_rf_mean.append(np.max(rhyme3_rfmean[p]))
    rhyme3_svm_mean.append(np.max(rhyme3_svmmean[p]))
    rhyme3_rf_std.append(rhyme3_rfstd[p][np.argmax(rhyme3_rfmean[p])])
    rhyme3_svm_std.append(rhyme3_svmstd[p][np.argmax(rhyme3_svmmean[p])])
print('Best means from RF: \n',rhyme3_rf_mean)
print('Best means from SVM: \n',rhyme3_svm_mean)
print('Std of mean from RF: \n',rhyme3_rf_std)
print('Std of mean from SVM: \n',rhyme3_svm_std)
    
print('moose/goose/juice')
rhyme4_rfmean = np.zeros((8,len(models_rf)))
rhyme4_rfstd = np.zeros((8,len(models_rf)))
rhyme4_svmmean = np.zeros((8,len(models_svm)))
rhyme4_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['moose','goose','juice'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        rhyme4_svmmean[p][m] += np.mean(svm_arr[m])
        rhyme4_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        rhyme4_rfmean[p][m] += np.mean(rf_arr[m])
        rhyme4_rfstd[p][m] += np.std(rf_arr[m])
rhyme4_rf_mean = []
rhyme4_svm_mean = []
rhyme4_rf_std = []
rhyme4_svm_std = []
for p in range(len(p_data)):
    rhyme4_rf_mean.append(np.max(rhyme4_rfmean[p]))
    rhyme4_svm_mean.append(np.max(rhyme4_svmmean[p]))
    rhyme4_rf_std.append(rhyme4_rfstd[p][np.argmax(rhyme4_rfmean[p])])
    rhyme4_svm_std.append(rhyme4_svmstd[p][np.argmax(rhyme4_svmmean[p])])
print('Best means from RF: \n',rhyme4_rf_mean)
print('Best means from SVM: \n',rhyme4_svm_mean)
print('Std of mean from RF: \n',rhyme4_rf_std)
print('Std of mean from SVM: \n',rhyme4_svm_std)
    

These next results relate to classification based on syllabic groups.

In [None]:
#Syllabic Groups.
print('Quadrisyllabic words classification - MFCC')
syll4_rfmean = np.zeros((8,len(models_rf)))
syll4_rfstd = np.zeros((8,len(models_rf)))
syll4_svmmean = np.zeros((8,len(models_svm)))
syll4_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['watermelon','environment','caterpillar','ambassador'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        syll4_svmmean[p][m] += np.mean(svm_arr[m])
        syll4_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        syll4_rfmean[p][m] += np.mean(rf_arr[m])
        syll4_rfstd[p][m] += np.std(rf_arr[m])
syll4_rf_mean = []
syll4_svm_mean = []
syll4_rf_std = []
syll4_svm_std = []
for p in range(len(p_data)):
    syll4_rf_mean.append(np.max(syll4_rfmean[p]))
    syll4_svm_mean.append(np.max(syll4_svmmean[p]))
    syll4_rf_std.append(syll4_rfstd[p][np.argmax(syll4_rfmean[p])])
    syll4_svm_std.append(syll4_svmstd[p][np.argmax(syll4_svmmean[p])])
print('Best means from RF: \n',syll4_rf_mean)
print('Best means from SVM: \n',syll4_svm_mean)
print('Std of mean from RF: \n',syll4_rf_std)
print('Std of mean from SVM: \n',syll4_svm_std)

print('Trisyllabic words classification - MFCC')
syll3_rfmean = np.zeros((8,len(models_rf)))
syll3_rfstd = np.zeros((8,len(models_rf)))
syll3_svmmean = np.zeros((8,len(models_svm)))
syll3_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['gooseberry','waterfowl','wilderness','relative'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        syll3_svmmean[p][m] += np.mean(svm_arr[m])
        syll3_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        syll3_rfmean[p][m] += np.mean(rf_arr[m])
        syll3_rfstd[p][m] += np.std(rf_arr[m])
syll3_rf_mean = []
syll3_svm_mean = []
syll3_rf_std = []
syll3_svm_std = []
for p in range(len(p_data)):
    syll3_rf_mean.append(np.max(syll3_rfmean[p]))
    syll3_svm_mean.append(np.max(syll3_svmmean[p]))
    syll3_rf_std.append(syll3_rfstd[p][np.argmax(syll3_rfmean[p])])
    syll3_svm_std.append(syll3_svmstd[p][np.argmax(syll3_svmmean[p])])
print('Best means from RF: \n',syll3_rf_mean)
print('Best means from SVM: \n',syll3_svm_mean)
print('Std of mean from RF: \n',syll3_rf_std)
print('Std of mean from SVM: \n',syll3_svm_std)

print('Disyllabic words classification - MFCC')
syll2_rfmean = np.zeros((8,len(models_rf)))
syll2_rfstd = np.zeros((8,len(models_rf)))
syll2_svmmean = np.zeros((8,len(models_svm)))
syll2_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['berry','hedgehog','water','daughter'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        syll2_svmmean[p][m] += np.mean(svm_arr[m])
        syll2_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        syll2_rfmean[p][m] += np.mean(rf_arr[m])
        syll2_rfstd[p][m] += np.std(rf_arr[m])
syll2_rf_mean = []
syll2_svm_mean = []
syll2_rf_std = []
syll2_svm_std = []
for p in range(len(p_data)):
    syll2_rf_mean.append(np.max(syll2_rfmean[p]))
    syll2_svm_mean.append(np.max(syll2_svmmean[p]))
    syll2_rf_std.append(syll2_rfstd[p][np.argmax(syll2_rfmean[p])])
    syll2_svm_std.append(syll2_svmstd[p][np.argmax(syll2_svmmean[p])])
print('Best means from RF: \n',syll2_rf_mean)
print('Best means from SVM: \n',syll2_svm_mean)
print('Std of mean from RF: \n',syll2_rf_std)
print('Std of mean from SVM: \n',syll2_svm_std)

print('Monosyllabic words classification - MFCC')
syll1_rfmean = np.zeros((8,len(models_rf)))
syll1_rfstd = np.zeros((8,len(models_rf)))
syll1_svmmean = np.zeros((8,len(models_svm)))
syll1_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['date','goose','queen','night'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
        fold+=1
    for m in range(len(models_svm)):
        syll1_svmmean[p][m] += np.mean(svm_arr[m])
        syll1_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        syll1_rfmean[p][m] += np.mean(rf_arr[m])
        syll1_rfstd[p][m] += np.std(rf_arr[m])
syll1_rf_mean = []
syll1_svm_mean = []
syll1_rf_std = []
syll1_svm_std = []
for p in range(len(p_data)):
    syll1_rf_mean.append(np.max(syll1_rfmean[p]))
    syll1_svm_mean.append(np.max(syll1_svmmean[p]))
    syll1_rf_std.append(syll1_rfstd[p][np.argmax(syll1_rfmean[p])])
    syll1_svm_std.append(syll1_svmstd[p][np.argmax(syll1_svmmean[p])])
print('Best means from RF: \n',syll1_rf_mean)
print('Best means from SVM: \n',syll1_svm_mean)
print('Std of mean from RF: \n',syll1_rf_std)
print('Std of mean from SVM: \n',syll1_svm_std)


print('Two monosyllabic words and two quadrisyllabic words classification - MFCC.')
syll_mix_rfmean = np.zeros((8,len(models_rf)))
syll_mix_rfstd = np.zeros((8,len(models_rf)))
syll_mix_svmmean = np.zeros((8,len(models_svm)))
syll_mix_svmstd = np.zeros((8,len(models_svm)))
for p in range(len(p_data)):
    svm_arr = np.zeros((len(models_svm), 4))
    rf_arr = np.zeros((len(models_rf), 4))
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['queen', 'goose', 'watermelon', 'environment'])
    kf = KFold(n_splits=4, random_state=1, shuffle=True)
    fold = 0
    for train_ind, test_ind in kf.split(X, y):
        X_train, X_test, y_train, y_test = clf_mfcc(X[train_ind], X[test_ind], y[train_ind], y[test_ind], 0.5, 0.25, 13)
        for m in range(len(models_rf)):
            score = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            rf_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_rf[m])
            print(model_runner(X_train, X_test, y_train, y_test, models_rf[m]))
        for m in range(len(models_svm)):
            svm_arr[m][fold] = model_runner(X_train, X_test, y_train, y_test, models_svm[m])
            print(model_runner(X_train, X_test, y_train, y_test, models_svm[m]))
        fold+=1
    for m in range(len(models_svm)):
        syll_mix_svmmean[p][m] += np.mean(svm_arr[m])
        syll_mix_svmstd[p][m] += np.std(svm_arr[m])
    for m in range(len(models_rf)):
        syll_mix_rfmean[p][m] += np.mean(rf_arr[m])
        syll_mix_rfstd[p][m] += np.std(rf_arr[m])
syll_mix_rf_mean = []
syll_mix_svm_mean = []
syll_mix_rf_std = []
syll_mix_svm_std = []
for p in range(len(p_data)):
    syll_mix_rf_mean.append(np.max(syll_mix_rfmean[p]))
    syll_mix_svm_mean.append(np.max(syll_mix_svmmean[p]))
    syll_mix_rf_std.append(syll_mix_rfstd[p][np.argmax(syll_mix_rfmean[p])])
    syll_mix_svm_std.append(syll_mix_svmstd[p][np.argmax(syll_mix_svmmean[p])])
print('Best means from RF: \n',syll_mix_rf_mean)
print('Best means from SVM: \n',syll_mix_svm_mean)
print('Std of mean from RF: \n',syll_mix_rf_std)
print('Std of mean from SVM: \n',syll_mix_svm_std)

### Classification of Linear Features.

All results are displayed in the cell below for linear features.

In [None]:
results_linear = []
#Semantic groups classification - Linear.
semantic_results = []
print('semantic_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    semantic_results.append(clf_lf(X, y_sem, participants[p],opt = True))
results_linear.append(semantic_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(semantic_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(semantic_results[j][k][3])

#Fruits classification - Linear.
fru_results = []
print('fru_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['date', 'juice', 'watermelon', 'gooseberry'])
    fru_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(fru_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(fru_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(fru_results[j][k][3])

#Animals classification - Linear.
ani_results = []
print('ani_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['goose', 'hedgehog', 'waterfowl', 'caterpillar'])
    ani_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(ani_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(ani_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(ani_results[j][k][3])

#Two fruits and two animals classification - Linear.
both4_cat_results = []
print('both4_cat_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['watermelon', 'gooseberry', 'waterfowl', 'caterpillar'])
    both4_cat_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(both4_cat_results)  
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(both4_cat_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(both4_cat_results[j][k][3])

#Homophones classification - Linear. 
kni_ni_results = []
print('kni_ni_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['knight', 'night'])
    kni_ni_results.append(clf_lf(X, y, participants[p],opt = False))
results_linear.append(kni_ni_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(kni_ni_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(kni_ni_results[j][k][3])

#Rhyme combinations classification - Linear.
rhymecomb1_results = []
print('rhymecomb1_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['moose','spruce','goose'])
    rhymecomb1_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(rhymecomb1_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(rhymecomb1_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(rhymecomb1_results[j][k][3])

rhymecomb2_results = []
print('rhymecomb2_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['spruce','goose','juice'])
    rhymecomb2_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(rhymecomb2_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(rhymecomb2_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(rhymecomb2_results[j][k][3])
    
rhymecomb3_results = []
print('rhymecomb3_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['moose','spruce','juice'])
    rhymecomb3_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(rhymecomb3_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(rhymecomb3_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(rhymecomb3_results[j][k][3])

rhymecomb4_results = []
print('rhymecomb4_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['moose','goose','juice'])
    rhymecomb4_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(rhymecomb4_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(rhymecomb4_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(rhymecomb4_results[j][k][3])
    
#Quadrisyllabic words classification - Linear.
syll4_results = []
print('syll4_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['wilderness','environment','caterpillar','ambassador'])
    syll4_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(syll4_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(syll4_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(syll4_results[j][k][3])

#Trisyllabic words classification - Linear.
syll3_results = []
print('syll3_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['gooseberry','waterfowl','wilderness','relative'])
    syll3_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(syll3_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(syll3_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(syll3_results[j][k][3])

#Disyllabic words classification - Linear.
syll2_results = []
print('syll2_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['berry','hedgehog','water','daughter'])
    syll2_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(syll2_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(syll2_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(syll2_results[j][k][3])

#Monosyllabic words classification - Linear.
syll1_results = []
print('syll1_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['date','goose','queen','night'])
    syll1_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(syll1_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(syll1_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(syll1_results[j][k][3])

#Two monosyllabic words and two quadrisyllabic words classification - Linear.
syll_mix_results = []
print('syll_mix_results')
for p in range(len(p_data)):
    X, y, y_sem, y_leng  = p_data[p]
    X, y = dueller(X, y, ['queen', 'goose', 'watermelon', 'environment'])
    syll_mix_results.append(clf_lf(X, y, participants[p],opt = True))
results_linear.append(syll_mix_results)
print('Best Mean for each subject.')
for k in range(2):
    for j in range(8):
        print(syll_mix_results[j][k][2])
print('Standard deviation for best mean.')
for k in range(2):
    for j in range(8):
        print(syll_mix_results[j][k][3])

#To back up the results!
with open("results_linear.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(results_linear)

One may print out the results in full here.

In [None]:
#Print out all means.
for k in range(2):
    for j in range(8):
        print(results_linear[j][k][2])
#Print out all standard deviations.
for k in range(2):
    for j in range(8):
        print(results_linear[j][k][3])

### Calculating Cohen's Kappa Coefficient.

In [None]:
def k_score(acc, chance):
    '''Function which takes in chance accruacy and attained accuracy, and returns a value for Kappa.'''
    K = (acc-chance)/(100-chance)
    return K

### Graph used in dissertation to show relationship between frequency and Mel-scale. 

In [None]:
def mel_conv(f):
    m_f = 1125*np.log(1+(f/700))
    return m_f
plt.figure(figsize = (16,10))
for x in np.linspace(1, 50, num=50):
    plt.scatter(x, mel_conv(x), )
plt.ylabel('Mel Scale - M(f)')
plt.xlabel('Frequency - f (Hz)')
plt.grid(True)
plt.show()