In [46]:
from sklearn.model_selection import train_test_split
import numpy as np
import data_loader

### setup

In [47]:
xs, ys = data_loader.verified_subjects_calibrated_yprs(resampled=True, flatten=True)
xs = np.array(xs)
ys = np.array(ys)
trainx, testx, trainy, testy = train_test_split(xs, ys, test_size=0.2)

albert
chaonan_12_5
daniel
haobin_11_22
isa_12_5
janet
joanne
kelly_11_7
kevin_11_7
ruocheng
russell_11_20_stand
russell_11_7
solomon
wenzhou_12_5
yiheng_11_30
yongxu_11_30


In [48]:
# train_subjects = data_loader.VERIFIED_SUBJECTS[:-2]
# test_subjects = data_loader.VERIFIED_SUBJECTS[-2:]
# train_subjects , test_subjects

In [49]:
# trainx, trainy = data_loader.verified_subjects_calibrated_yprs(resampled=True, flatten=True, subjects=train_subjects)
# testx, testy = data_loader.verified_subjects_calibrated_yprs(resampled=True, flatten=True, subjects=test_subjects)
# trainx = np.array(trainx)
# trainy = np.array(trainy)
# testx = np.array(testx)
# testy = np.array(testy)

In [50]:
trainx.shape, trainy.shape, testx.shape, testy.shape

((6425, 300), (6425,), (1607, 300), (1607,))

In [51]:
def get_acc(pred, testy):
    correct_pred = (pred-testy == 0).astype(int)
    acc = np.sum(correct_pred) / pred.shape[0]
    return acc

### SVM

In [52]:
from sklearn import svm

In [53]:
kernels = ['poly']
svc_acc = {}

for kernel in kernels:
    print(f'Training svc with {kernel} kernel')
    clf = svm.SVC(kernel=kernel, gamma='auto', max_iter=25000)
    clf.fit(trainx, trainy)
    pred = clf.predict(testx)
    svc_acc[kernel] = get_acc(pred, testy)

Training svc with poly kernel


In [54]:
svc_acc

{'poly': 0.6770379589296827}

### KNN

In [55]:
from sklearn.neighbors import KNeighborsClassifier

In [56]:
test_acc = {}

for num_neighbor in range(2, 10):
    print(f'Running KNN with n={num_neighbor}')
    
    clf = KNeighborsClassifier(n_neighbors=num_neighbor)
    clf.fit(trainx, trainy)
    
    pred = clf.predict(testx)
    test_acc[num_neighbor] = get_acc(pred, testy)

Running KNN with n=2
Running KNN with n=3
Running KNN with n=4
Running KNN with n=5
Running KNN with n=6
Running KNN with n=7
Running KNN with n=8
Running KNN with n=9


In [57]:
test_acc

{2: 0.7554449284380834,
 3: 0.7747355320472931,
 4: 0.761045426260112,
 5: 0.760423148724331,
 6: 0.7573117610454263,
 7: 0.7479775980087119,
 8: 0.7380211574362165,
 9: 0.7255756067205974}

### Nearest Centroid

In [58]:
from sklearn.neighbors.nearest_centroid import NearestCentroid

In [59]:
clf = NearestCentroid()
clf.fit(trainx, trainy)

NearestCentroid(metric='euclidean', shrink_threshold=None)

In [60]:
pred = clf.predict(testx)
correct_pred = (pred-testy == 0).astype(int)
acc = np.sum(correct_pred) / pred.shape[0]
acc

0.11200995644057249

### Lasso: linear regression

In [61]:
from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.1, max_iter=5000)
clf.fit(trainx, trainy)
pred = clf.predict(testx)
pred = np.rint(pred)
acc = get_acc(pred, testy)

In [62]:
acc

0.05164903546981954

### Gaussian Naive Bayes

In [63]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(trainx, trainy)
pred = clf.predict(testx)
acc = get_acc(pred, testy)

In [64]:
acc

0.14561294337274425

### Forest of randomized trees

In [65]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(trainx, trainy)
pred = clf.predict(testx)
acc = get_acc(pred, testy)



In [66]:
acc

0.5650280024891101

### Basic NN (multi-layer perceptron)

In [67]:
from sklearn.neural_network import MLPClassifier

In [68]:
structures = [
    (100,50),
    (100,100,50),
    (200,50),
    (200,100,50),
]

activations = ['logistic', 'tanh', 'relu']

l2_reg_const = [0.0001 * x for x in [1, 5, 10]]

nn_acc = {}

In [69]:
for structure in structures:
    for act in activations:
        for alpha in l2_reg_const:
            name = f'nn-{str(structure)}-{act}-{alpha}'
            print(name)
            
            clf = MLPClassifier(
                hidden_layer_sizes=structure,
                activation=act,
                alpha=alpha,
                max_iter=5000
            )
            clf.fit(trainx, trainy)
            pred = clf.predict(testx)
            
            nn_acc[name] = get_acc(pred, testy)

nn-(100, 50)-logistic-0.0001
nn-(100, 50)-logistic-0.0005
nn-(100, 50)-logistic-0.001
nn-(100, 50)-tanh-0.0001
nn-(100, 50)-tanh-0.0005
nn-(100, 50)-tanh-0.001
nn-(100, 50)-relu-0.0001
nn-(100, 50)-relu-0.0005
nn-(100, 50)-relu-0.001
nn-(100, 100, 50)-logistic-0.0001
nn-(100, 100, 50)-logistic-0.0005
nn-(100, 100, 50)-logistic-0.001
nn-(100, 100, 50)-tanh-0.0001
nn-(100, 100, 50)-tanh-0.0005
nn-(100, 100, 50)-tanh-0.001
nn-(100, 100, 50)-relu-0.0001
nn-(100, 100, 50)-relu-0.0005
nn-(100, 100, 50)-relu-0.001
nn-(200, 50)-logistic-0.0001
nn-(200, 50)-logistic-0.0005
nn-(200, 50)-logistic-0.001
nn-(200, 50)-tanh-0.0001
nn-(200, 50)-tanh-0.0005
nn-(200, 50)-tanh-0.001
nn-(200, 50)-relu-0.0001
nn-(200, 50)-relu-0.0005
nn-(200, 50)-relu-0.001
nn-(200, 100, 50)-logistic-0.0001
nn-(200, 100, 50)-logistic-0.0005
nn-(200, 100, 50)-logistic-0.001
nn-(200, 100, 50)-tanh-0.0001
nn-(200, 100, 50)-tanh-0.0005
nn-(200, 100, 50)-tanh-0.001
nn-(200, 100, 50)-relu-0.0001
nn-(200, 100, 50)-relu-0.0005
nn-

In [70]:
nn_acc

{'nn-(100, 50)-logistic-0.0001': 0.5743621655258245,
 'nn-(100, 50)-logistic-0.0005': 0.5892968263845675,
 'nn-(100, 50)-logistic-0.001': 0.5799626633478532,
 'nn-(100, 50)-tanh-0.0001': 0.47666459240821407,
 'nn-(100, 50)-tanh-0.0005': 0.492221530802738,
 'nn-(100, 50)-tanh-0.001': 0.4878655880522713,
 'nn-(100, 50)-relu-0.0001': 0.6596141879278158,
 'nn-(100, 50)-relu-0.0005': 0.6533914125700062,
 'nn-(100, 50)-relu-0.001': 0.6365899191039204,
 'nn-(100, 100, 50)-logistic-0.0001': 0.4953329184816428,
 'nn-(100, 100, 50)-logistic-0.0005': 0.46172993154947106,
 'nn-(100, 100, 50)-logistic-0.001': 0.4940883634100809,
 'nn-(100, 100, 50)-tanh-0.0001': 0.5488487865588052,
 'nn-(100, 100, 50)-tanh-0.0005': 0.552582451773491,
 'nn-(100, 100, 50)-tanh-0.001': 0.6036092097075295,
 'nn-(100, 100, 50)-relu-0.0001': 0.7056627255756067,
 'nn-(100, 100, 50)-relu-0.0005': 0.6981953951462352,
 'nn-(100, 100, 50)-relu-0.001': 0.6820161792159303,
 'nn-(200, 50)-logistic-0.0001': 0.6204107031736155,
 '