# Various sklearn models

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import accuracy_score, f1_score

Xtrain = np.load(os.getcwd()+'/../feature_representations/feature_representation_2_train.npy')
ytrain = np.load(os.getcwd()+'/../feature_representations/ytrain.npy')
Xtest = np.load(os.getcwd()+'/../feature_representations/feature_representation_2_test.npy')
ytest = np.load(os.getcwd()+'/../feature_representations/ytest.npy')

In [2]:
Xtrain.shape, ytrain.shape, Xtest.shape, ytest.shape

((124800, 17), (124800,), (20800, 17), (20800,))

I have noticed better performance so far by removing the last 10 features for FR1.

In [3]:
Xtrain = Xtrain[:,:8]
Xtest = Xtest[:,:8]
Xtrain.shape, ytrain.shape, Xtest.shape, ytest.shape

((124800, 8), (124800,), (20800, 8), (20800,))

# KNN

In [3]:
from sklearn.neighbors import KNeighborsClassifier

k = 3
model = KNeighborsClassifier(n_neighbors=k)
model.fit(Xtrain, ytrain)
yhat = model.predict(Xtest)

In [4]:
acc = accuracy_score(ytest, yhat)
f1 = f1_score(ytest, yhat, average='macro')
print(f'Accuracy is {acc}')
print(f'F1 score is {f1}')

Accuracy is 0.6920673076923077
F1 score is 0.6933827875749089


# Naive Bayes

In [5]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(Xtrain, ytrain)
yhat = model.predict(Xtest)

In [6]:
acc = accuracy_score(ytest, yhat)
f1 = f1_score(ytest, yhat, average='macro')
print(f'Accuracy is {acc}')
print(f'F1 score is {f1}')

Accuracy is 0.4758173076923077
F1 score is 0.46572068454345134


# Random Forest

In [7]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(Xtrain, ytrain)
yhat = model.predict(Xtest)

In [8]:
acc = accuracy_score(ytest, yhat)
f1 = f1_score(ytest, yhat, average='macro')
print(f'Accuracy is {acc}')
print(f'F1 score is {f1}')

Accuracy is 0.7613942307692307
F1 score is 0.7601282692316084


# Support Vector Machine

This may take a little while to run...

In [9]:
from sklearn.svm import SVC

model = SVC()
model.fit(Xtrain, ytrain)
yhat = model.predict(Xtest)

In [10]:
acc = accuracy_score(ytest, yhat)
f1 = f1_score(ytest, yhat, average='macro')
print(f'Accuracy is {acc}')
print(f'F1 score is {f1}')

Accuracy is 0.8121634615384615
F1 score is 0.8120494817065604


In [3]:
model = SVC(kernel='linear')
model.fit(Xtrain, ytrain)
yhat = model.predict(Xtest)

In [4]:
acc = accuracy_score(ytest, yhat)
f1 = f1_score(ytest, yhat, average='macro')
print(f'Accuracy is {acc}')
print(f'F1 score is {f1}')

Accuracy is 0.692451923076923
F1 score is 0.6906966772755812


In [5]:
model = SVC(kernel='poly')
model.fit(Xtrain, ytrain)
yhat = model.predict(Xtest)

In [6]:
acc = accuracy_score(ytest, yhat)
f1 = f1_score(ytest, yhat, average='macro')
print(f'Accuracy is {acc}')
print(f'F1 score is {f1}')

Accuracy is 0.7886057692307692
F1 score is 0.7885673466668843


# Neural Network

In [12]:
from sklearn.neural_network import MLPClassifier

model = MLPClassifier(
    hidden_layer_sizes = [25,50,100,50],
    activation='logistic',
    solver='adam',
    batch_size=16,
    shuffle=True,
    random_state=4622,
)
model.fit(Xtrain, ytrain)



MLPClassifier(activation='logistic', batch_size=16,
              hidden_layer_sizes=[25, 50, 100, 50], random_state=4622)

In [13]:
acc = accuracy_score(ytest, yhat)
f1 = f1_score(ytest, yhat, average='macro')
print(f'Accuracy is {acc}')
print(f'F1 score is {f1}')

Accuracy is 0.8121634615384615
F1 score is 0.8120494817065604
