In [ ]:
from sklearn.model_selection import train_test_split
import numpy as np
import data_loader

### setup

In [ ]:
xs, ys = data_loader.verified_subjects_calibrated_yprs(resampled=True, flatten=True)
xs = np.array(xs)
ys = np.array(ys)
trainx, testx, trainy, testy = train_test_split(xs, ys, test_size=0.2)

In [ ]:
# train_subjects = data_loader.VERIFIED_SUBJECTS[:-2]
# test_subjects = data_loader.VERIFIED_SUBJECTS[-2:]
# train_subjects , test_subjects

In [ ]:
# trainx, trainy = data_loader.verified_subjects_calibrated_yprs(resampled=True, flatten=True, subjects=train_subjects)
# testx, testy = data_loader.verified_subjects_calibrated_yprs(resampled=True, flatten=True, subjects=test_subjects)
# trainx = np.array(trainx)
# trainy = np.array(trainy)
# testx = np.array(testx)
# testy = np.array(testy)

In [ ]:
trainx.shape, trainy.shape, testx.shape, testy.shape

In [ ]:
def get_acc(pred, testy):
    correct_pred = (pred-testy == 0).astype(int)
    acc = np.sum(correct_pred) / pred.shape[0]
    return acc

### SVM

In [ ]:
from sklearn import svm

In [ ]:
kernels = ['poly']
svc_acc = {}

for kernel in kernels:
    print(f'Training svc with {kernel} kernel')
    clf = svm.SVC(kernel=kernel, gamma='auto', max_iter=25000)
    clf.fit(trainx, trainy)
    pred = clf.predict(testx)
    svc_acc[kernel] = get_acc(pred, testy)

In [ ]:
svc_acc

### KNN

In [ ]:
from sklearn.neighbors import KNeighborsClassifier

In [ ]:
test_acc = {}

for num_neighbor in range(2, 10):
    print(f'Running KNN with n={num_neighbor}')
    
    clf = KNeighborsClassifier(n_neighbors=num_neighbor)
    clf.fit(trainx, trainy)
    
    pred = clf.predict(testx)
    test_acc[num_neighbor] = get_acc(pred, testy)

In [ ]:
test_acc

### Nearest Centroid

In [ ]:
from sklearn.neighbors.nearest_centroid import NearestCentroid

In [ ]:
clf = NearestCentroid()
clf.fit(trainx, trainy)

In [ ]:
pred = clf.predict(testx)
correct_pred = (pred-testy == 0).astype(int)
acc = np.sum(correct_pred) / pred.shape[0]
acc

### Lasso: linear regression

In [ ]:
from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.1, max_iter=5000)
clf.fit(trainx, trainy)
pred = clf.predict(testx)
pred = np.rint(pred)
acc = get_acc(pred, testy)

In [ ]:
acc

### Gaussian Naive Bayes

In [ ]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(trainx, trainy)
pred = clf.predict(testx)
acc = get_acc(pred, testy)

In [ ]:
acc

### Forest of randomized trees

In [ ]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(trainx, trainy)
pred = clf.predict(testx)
acc = get_acc(pred, testy)

In [ ]:
acc

### Basic NN (multi-layer perceptron)

In [ ]:
from sklearn.neural_network import MLPClassifier

In [ ]:
structures = [
    (100,50),
    (100,100,50),
    (200,50),
    (200,100,50),
]

activations = ['logistic', 'tanh', 'relu']

l2_reg_const = [0.0001 * x for x in [1, 5, 10]]

nn_acc = {}

In [ ]:
for structure in structures:
    for act in activations:
        for alpha in l2_reg_const:
            name = f'nn-{str(structure)}-{act}-{alpha}'
            print(name)
            
            clf = MLPClassifier(
                hidden_layer_sizes=structure,
                activation=act,
                alpha=alpha,
                max_iter=5000
            )
            clf.fit(trainx, trainy)
            pred = clf.predict(testx)
            
            nn_acc[name] = get_acc(pred, testy)

In [ ]:
nn_acc