In [1]:
from sklearn.model_selection import train_test_split
import numpy as np
import data_loader

### setup

In [2]:
xs, ys = data_loader.verified_subjects_calibrated_yprs(resampled=True, flatten=True)

haobin_11_22
kevin_11_7
russell_11_7
kelly_11_7
russell_11_20_stand


In [3]:
xs = np.array(xs)
ys = np.array(ys)

In [4]:
trainx, testx, trainy, testy = train_test_split(xs, ys, test_size=0.2)

In [5]:
trainx.shape, trainy.shape, testx.shape, testy.shape

((2088, 300), (2088,), (522, 300), (522,))

In [6]:
def get_acc(pred, testy):
    correct_pred = (pred-testy == 0).astype(int)
    acc = np.sum(correct_pred) / pred.shape[0]
    return acc

### SVM

In [7]:
from sklearn import svm

In [8]:
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
svc_acc = {}

for kernel in kernels:
    print(f'Training svc with {kernel} kernel')
    clf = svm.SVC(kernel=kernel, gamma='auto')
    clf.fit(trainx, trainy)
    pred = clf.predict(testx)
    svc_acc[kernel] = get_acc(pred, testy)

Training svc with linear kernel
Training svc with poly kernel
Training svc with rbf kernel
Training svc with sigmoid kernel


In [9]:
svc_acc

{'linear': 0.7413793103448276,
 'poly': 0.7873563218390804,
 'rbf': 0.03639846743295019,
 'sigmoid': 0.03639846743295019}

### KNN

In [10]:
from sklearn.neighbors import KNeighborsClassifier

In [11]:
knn_acc = {}

for num_neighbor in range(2, 15):
    print(f'Running KNN with n={num_neighbor}')
    
    clf = KNeighborsClassifier(n_neighbors=num_neighbor)
    clf.fit(trainx, trainy)
    
    pred = clf.predict(testx)
    knn_acc[num_neighbor] = get_acc(pred, testy)

Running KNN with n=2
Running KNN with n=3
Running KNN with n=4
Running KNN with n=5
Running KNN with n=6
Running KNN with n=7
Running KNN with n=8
Running KNN with n=9
Running KNN with n=10
Running KNN with n=11
Running KNN with n=12
Running KNN with n=13
Running KNN with n=14


In [12]:
knn_acc

{2: 0.789272030651341,
 3: 0.8007662835249042,
 4: 0.8026819923371648,
 5: 0.7969348659003831,
 6: 0.7720306513409961,
 7: 0.7624521072796935,
 8: 0.7471264367816092,
 9: 0.7547892720306514,
 10: 0.7394636015325671,
 11: 0.7260536398467433,
 12: 0.710727969348659,
 13: 0.7088122605363985,
 14: 0.7030651340996169}

### Nearest Centroid

In [13]:
from sklearn.neighbors.nearest_centroid import NearestCentroid

In [14]:
clf = NearestCentroid()
clf.fit(trainx, trainy)

NearestCentroid(metric='euclidean', shrink_threshold=None)

In [15]:
pred = clf.predict(testx)
correct_pred = (pred-testy == 0).astype(int)
acc = np.sum(correct_pred) / pred.shape[0]
acc

0.28544061302681994

### Lasso: linear regression

In [16]:
from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.1, max_iter=5000)
clf.fit(trainx, trainy)
pred = clf.predict(testx)
pred = np.rint(pred)
acc = get_acc(pred, testy)

In [17]:
acc

0.040229885057471264

### Gaussian Naive Bayes

In [18]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(trainx, trainy)
pred = clf.predict(testx)
acc = get_acc(pred, testy)

In [19]:
acc

0.4061302681992337

### Forest of randomized trees

In [20]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(trainx, trainy)
pred = clf.predict(testx)
acc = get_acc(pred, testy)



In [21]:
acc

0.6130268199233716

### Basic NN (multi-layer perceptron)

In [23]:
from sklearn.neural_network import MLPClassifier

In [25]:
structures = [
    (100,50),
    (100,100,50),
    (200,50),
    (200,100,50),
]

activations = ['logistic', 'tanh', 'relu']

l2_reg_const = [0.0001 * x for x in [1, 5, 10]]

nn_acc = {}

In [27]:
for structure in structures:
    for act in activations:
        for alpha in l2_reg_const:
            name = f'nn-{str(structure)}-{act}-{alpha}'
            print(name)
            
            clf = MLPClassifier(
                hidden_layer_sizes=structure,
                activation=act,
                alpha=alpha,
                max_iter=5000
            )
            clf.fit(trainx, trainy)
            pred = clf.predict(testx)
            
            nn_acc[name] = get_acc(pred, testy)

nn-(100, 50)-logistic-0.0001
nn-(100, 50)-logistic-0.0005
nn-(100, 50)-logistic-0.001
nn-(100, 50)-tanh-0.0001
nn-(100, 50)-tanh-0.0005
nn-(100, 50)-tanh-0.001
nn-(100, 50)-relu-0.0001
nn-(100, 50)-relu-0.0005
nn-(100, 50)-relu-0.001
nn-(100, 100, 50)-logistic-0.0001
nn-(100, 100, 50)-logistic-0.0005
nn-(100, 100, 50)-logistic-0.001
nn-(100, 100, 50)-tanh-0.0001
nn-(100, 100, 50)-tanh-0.0005
nn-(100, 100, 50)-tanh-0.001
nn-(100, 100, 50)-relu-0.0001
nn-(100, 100, 50)-relu-0.0005
nn-(100, 100, 50)-relu-0.001
nn-(200, 50)-logistic-0.0001
nn-(200, 50)-logistic-0.0005
nn-(200, 50)-logistic-0.001
nn-(200, 50)-tanh-0.0001
nn-(200, 50)-tanh-0.0005
nn-(200, 50)-tanh-0.001
nn-(200, 50)-relu-0.0001
nn-(200, 50)-relu-0.0005
nn-(200, 50)-relu-0.001
nn-(200, 100, 50)-logistic-0.0001
nn-(200, 100, 50)-logistic-0.0005
nn-(200, 100, 50)-logistic-0.001
nn-(200, 100, 50)-tanh-0.0001
nn-(200, 100, 50)-tanh-0.0005
nn-(200, 100, 50)-tanh-0.001
nn-(200, 100, 50)-relu-0.0001
nn-(200, 100, 50)-relu-0.0005
nn-

In [28]:
nn_acc

{'nn-(100, 50)-logistic-0.0001': 0.6800766283524904,
 'nn-(100, 50)-logistic-0.0005': 0.6666666666666666,
 'nn-(100, 50)-logistic-0.001': 0.6685823754789272,
 'nn-(100, 50)-tanh-0.0001': 0.6513409961685823,
 'nn-(100, 50)-tanh-0.0005': 0.6666666666666666,
 'nn-(100, 50)-tanh-0.001': 0.6954022988505747,
 'nn-(100, 50)-relu-0.0001': 0.7298850574712644,
 'nn-(100, 50)-relu-0.0005': 0.7203065134099617,
 'nn-(100, 50)-relu-0.001': 0.7260536398467433,
 'nn-(100, 100, 50)-logistic-0.0001': 0.5363984674329502,
 'nn-(100, 100, 50)-logistic-0.0005': 0.5766283524904214,
 'nn-(100, 100, 50)-logistic-0.001': 0.5478927203065134,
 'nn-(100, 100, 50)-tanh-0.0001': 0.6877394636015326,
 'nn-(100, 100, 50)-tanh-0.0005': 0.6915708812260536,
 'nn-(100, 100, 50)-tanh-0.001': 0.6800766283524904,
 'nn-(100, 100, 50)-relu-0.0001': 0.7777777777777778,
 'nn-(100, 100, 50)-relu-0.0005': 0.7911877394636015,
 'nn-(100, 100, 50)-relu-0.001': 0.7509578544061303,
 'nn-(200, 50)-logistic-0.0001': 0.7528735632183908,
 '