# Machine learning tests are run here
Some code blocks always have to be run, some can be skipped if only certain tests are of interest.

# Imports (mandatory)
We start with importing some necessary packages.

In [33]:
import numpy as np
# import matplotlib as plt

# Make the dataset (mandatory)
Here, we make the dataset that we will use.

In [34]:
# load the features
dftrain = np.load('features.npy')
# dftrain = dftrain[:,17:135] # discard some peripheral patients and the chin landmarks
# load the corresponding labels
y_train = np.load('labels.npy')
# y_train = y_train[50:202] # discard the same peripheral patients

dftrain = dftrain / 900.0

classes = ['peripheral palsy', 'central palsy', 'healthy']


# Print the dataset (optional)
See the dataset that we're working with.

In [35]:
# print("Features:", dftrain[0])
# print("Labels:", classes[y_train[0]])
print("Features shape:",dftrain.shape)
print("Labels shape:",y_train.shape)

Features shape: (203, 136)
Labels shape: (203,)


# Model 1 (optional)
This is the first experiment. A DNN is made with many different amounts of hidden layers and nodes. The final accuracy is printed using LOOCV.

In [10]:
# imports:
import tensorflow as tf

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    # print("Run", i+1, "of", len(dftrain))
    x_test = dftrain[i]
    x_test = np.reshape(x_test, (1,-1))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model1 = tf.keras.Sequential([
        # tf.keras.layers.Dense(136, activation='relu'),
        tf.keras.layers.Dense(108800, activation='relu'),
        # tf.keras.layers.Dense(10880, activation='relu'),
        tf.keras.layers.Dense(3, activation='sigmoid'),
    ])

    model1.compile(
        optimizer = tf.keras.optimizers.Adam(),
        loss = tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics = tf.keras.metrics.CategoricalAccuracy(),
    )
    model1.fit(x_train_loocv, y_train_loocv, epochs=1, verbose=0, batch_size=len(x_train_loocv), shuffle=True)
    prediction = np.argmax(model1.predict(x_test))
    # print("Prediction:", classes[prediction.astype(int)])
    # print("Actual:    ", classes[y_test.astype(int)])
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.5073891625615764


# Model 2 (optional)
This is the second experiment. A support vector machine (SVM) will be built.

In [12]:
# todo: higher weight for central palsy (most severe condition)

# imports:
from sklearn import svm

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model2 = svm.SVC(kernel='poly',degree=5)
    model2.fit(x_train_loocv, y_train_loocv)
    prediction = model2.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.8078817733990148


# Model 3 (optional)
This is the third experiment. This model uses a K-nearest neighbors (KNN) algorithm to classify the data.

In [30]:
# imports:
from sklearn import neighbors

# model settings:
n_neighbors = 5
weight = 'distance'

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model3 = neighbors.KNeighborsClassifier(n_neighbors, weights=weight)
    model3.fit(x_train_loocv, y_train_loocv)
    prediction = model3.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.7142857142857143


# Model 4 (optional)
This is the fourth model. A Random Forest classification system is used.

In [38]:
# imports:
from sklearn import ensemble

# model settings:
n_estimators = 136

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model4 = ensemble.RandomForestClassifier(n_estimators)
    model4.fit(x_train_loocv, y_train_loocv)
    prediction = model4.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.7586206896551724


# Model 5 (optional)
This is the fifth model. A Gaussian Naive Bayes model is built here.

In [44]:
# imports:
from sklearn import naive_bayes

# model settings:

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model5 = naive_bayes.GaussianNB()
    model5.fit(x_train_loocv, y_train_loocv)
    prediction = model5.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.6009852216748769


# Model 6 (optional)
This is the sixth model. A decision tree is used in the following code block.

In [None]:
# imports:
from sklearn import tree

# model settings:
depth = 3

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model6 = tree.DecisionTreeClassifier(max_depth=depth)
    model6.fit(x_train_loocv, y_train_loocv)
    prediction = model6.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)