In [45]:
# import packages
import matplotlib.pyplot as plt
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

In [2]:
# load npy data
feature_raw_data_path = os.path.join('..', 'Data', 'X.npy')
feature_raw = np.load(feature_raw_data_path)

label_raw_data_path = os.path.join('..', 'Data', 'Y.npy')
label_raw = np.load(label_raw_data_path)

In [3]:
print("feature matrix shape: ", feature_raw.shape)
print("label matrix shape: ", label_raw.shape)

feature matrix shape:  (2062, 64, 64)
label matrix shape:  (2062, 10)


# Split data into training and testing

In [17]:
X = feature_raw.reshape((len(feature_raw), -1)) 
X_train, X_test, y_train, y_test = train_test_split(X, label_raw, test_size=0.2, random_state=0)
print("train", X_train.shape, y_train.shape)
print("test", X_test.shape, y_test.shape)

train (1649, 4096) (1649, 10)
test (413, 4096) (413, 10)


# Model selection and training

In [39]:
# -------knn-------

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# cross validation
scores = cross_val_score(knn, X_train, y_train, cv=10)
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

0.61 accuracy with a standard deviation of 0.03


In [None]:
# ----------random forest------------
rf = RandomForestClassifier(random_state=0)
rf.fit(X_train, y_train)

# cross validation
scores = cross_val_score(rf, X_train, y_train, cv=10)
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

0.39 accuracy with a standard deviation of 0.04


In [47]:
# -------svm-------

# reshape one hot encoding
y_train_1d = np.argmax(y_train, axis=1)

svm = SVC()
svm.fit(X_train, y_train_1d)

# cross validation
scores = cross_val_score(svm, X_train, y_train_1d, cv=10)
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

0.83 accuracy with a standard deviation of 0.03


# test on the testing data

In [48]:
# knn
y_pred = knn.predict(X_test)
print("knn accuracy: ", np.mean(y_pred == y_test))


knn accuracy:  0.946004842615012


In [49]:
# random forest
y_pred = rf.predict(X_test)
print("random forest accuracy: ", np.mean(y_pred == y_test))

random forest accuracy:  0.9399515738498789


In [50]:
# svm
y_pred = svm.predict(X_test)
print("svm accuracy: ", np.mean(y_pred == np.argmax(y_test, axis=1)))

svm accuracy:  0.847457627118644
