In [1]:
from sklearn.model_selection import train_test_split
import numpy as np
import data_loader

### setup

In [32]:
trainx, devx, testx, trainy, devy, testy = data_loader.load_all_classic_random_split()
# trainx, devx, testx, trainy, devy, testy = data_loader.load_all_subject_split()

Processing albert
Processing canon_12_5
Processing chen_12_7
Processing daniel
Processing haobin_11_22
Processing isa_12_5
Processing janet
Processing joanne
Processing jq_12_6
Processing kelly_11_7
Processing kevin_11_7
Processing ruocheng
Processing russell_11_20_stand
Processing russell_11_7
Processing russell_random_12_7
Processing solomon
Processing wenzhou_12_5
Processing yiheng_11_30
Processing yiheng_12_5
Processing yongxu_11_30
Processing zhaoye_12_7
Splitting out test set
Splitting out dev and train set


In [33]:
trainx.shape, devx.shape, testx.shape, trainy.shape, devy.shape, testy.shape

((8320, 300), (1040, 300), (1041, 300), (8320,), (1040,), (1041,))

In [34]:
def get_acc(pred, testy):
    correct_pred = (pred-testy == 0).astype(int)
    acc = np.sum(correct_pred) / pred.shape[0]
    return acc

### SVM

In [26]:
from sklearn import svm

In [27]:
kernels = ['poly']
svc_acc = {}

for kernel in kernels:
    print(f'Training svc with {kernel} kernel')
    clf = svm.SVC(kernel=kernel, gamma='auto', max_iter=30000)
    clf.fit(trainx, trainy)
    pred = clf.predict(testx)
    svc_acc[kernel] = get_acc(pred, testy)
    pred = clf.predict(trainx)
    svc_acc[kernel + 'train'] = get_acc(pred, trainy)

Training svc with poly kernel


In [28]:
svc_acc

{'poly': 0.15561959654178675, 'polytrain': 0.9998805399593836}

### KNN

In [35]:
from sklearn.neighbors import KNeighborsClassifier

In [36]:
test_acc = {}

for num_neighbor in range(2, 10):
    print(f'Running KNN with n={num_neighbor}')
    
    clf = KNeighborsClassifier(n_neighbors=num_neighbor)
    clf.fit(trainx, trainy)
    
    pred = clf.predict(testx)
    test_acc[num_neighbor] = get_acc(pred, testy)

Running KNN with n=2
Running KNN with n=3
Running KNN with n=4
Running KNN with n=5
Running KNN with n=6
Running KNN with n=7
Running KNN with n=8
Running KNN with n=9


In [37]:
test_acc

{2: 0.7214217098943324,
 3: 0.7175792507204611,
 4: 0.7175792507204611,
 5: 0.7070124879923151,
 6: 0.6974063400576369,
 7: 0.6926032660902978,
 8: 0.675312199807877,
 9: 0.6618635926993276}

### Nearest Centroid

In [ ]:
from sklearn.neighbors.nearest_centroid import NearestCentroid

In [ ]:
clf = NearestCentroid()
clf.fit(trainx, trainy)

In [ ]:
pred = clf.predict(testx)
correct_pred = (pred-testy == 0).astype(int)
acc = np.sum(correct_pred) / pred.shape[0]
acc

### Lasso: linear regression

In [ ]:
from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.1, max_iter=5000)
clf.fit(trainx, trainy)
pred = clf.predict(testx)
pred = np.rint(pred)
acc = get_acc(pred, testy)

In [ ]:
acc

### Gaussian Naive Bayes

In [ ]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(trainx, trainy)
pred = clf.predict(testx)
acc = get_acc(pred, testy)

In [ ]:
acc

### Forest of randomized trees

In [ ]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(trainx, trainy)
pred = clf.predict(testx)
acc = get_acc(pred, testy)

In [ ]:
acc

### Basic NN (multi-layer perceptron)

In [ ]:
from sklearn.neural_network import MLPClassifier

In [ ]:
structures = [
    (100,50),
    (100,100,50),
    (200,50),
    (200,100,50),
]

activations = ['logistic', 'tanh', 'relu']

l2_reg_const = [0.0001 * x for x in [1, 5, 10]]

nn_acc = {}

In [ ]:
for structure in structures:
    for act in activations:
        for alpha in l2_reg_const:
            name = f'nn-{str(structure)}-{act}-{alpha}'
            print(name)
            
            clf = MLPClassifier(
                hidden_layer_sizes=structure,
                activation=act,
                alpha=alpha,
                max_iter=5000
            )
            clf.fit(trainx, trainy)
            pred = clf.predict(testx)
            
            nn_acc[name] = get_acc(pred, testy)

In [ ]:
nn_acc