# Machine learning tests are run here
Some code blocks always have to be run, some can be skipped if only certain tests are of interest.

# Imports (mandatory)
We start with importing some necessary packages.

In [1]:
import numpy as np
# import matplotlib as plt

# Make the dataset (mandatory)
Here, we make the dataset that we will use.

In [39]:
# dataset settings:
full_data = True
full_features = True

# load the features
dftrain = np.load('features.npy')
# load the corresponding labels
y_train = np.load('labels.npy')

# (potentially) discard some data
if not full_data: 
    # leave out the first 50 peripheral patients for a more balanced dataset
    dftrain = dftrain[50:202,:]
    y_train = y_train[50:202]
if not full_features: 
    # leave out the chin landmarks for more significant data
    dftrain = dftrain[:,34:]

# scale the data so every 'coordinate' is between 0 and 1,
# this works because the coordinates are on a 900x900 grid.
dftrain = dftrain / 900.0

# define class names for use in printing predictions
classes = ['peripheral palsy', 'central palsy', 'healthy']

# print the size of the dataset
print("Features shape:",dftrain.shape)
print("Labels shape:",y_train.shape)


Features shape: (203, 136)
Labels shape: (203,)


# Model 1 (optional)
This is the first experiment. A DNN is made with many different amounts of hidden layers and nodes. The final accuracy is printed using LOOCV.

In [10]:
# results: not working well. probably not enough datapoints available.

# imports:
import tensorflow as tf

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    # print("Run", i+1, "of", len(dftrain))
    x_test = dftrain[i]
    x_test = np.reshape(x_test, (1,-1))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model1 = tf.keras.Sequential([
        # tf.keras.layers.Dense(136, activation='relu'),
        tf.keras.layers.Dense(108800, activation='relu'),
        # tf.keras.layers.Dense(10880, activation='relu'),
        tf.keras.layers.Dense(3, activation='sigmoid'),
    ])

    model1.compile(
        optimizer = tf.keras.optimizers.Adam(),
        loss = tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics = tf.keras.metrics.CategoricalAccuracy(),
    )
    model1.fit(x_train_loocv, y_train_loocv, epochs=1, verbose=0, batch_size=len(x_train_loocv), shuffle=True)
    prediction = np.argmax(model1.predict(x_test))
    # print("Prediction:", classes[prediction.astype(int)])
    # print("Actual:    ", classes[y_test.astype(int)])
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.3092105263157895


# Model 2 (optional)
This is the second experiment. A support vector machine (SVM) will be built.

In [40]:
# todo: higher weight for central palsy (most severe condition)

# results:
    # full data, full features: .823
    # full data, part features: .
    # part data, full features: .
    # part data, part features: .

# imports:
from sklearn import svm

# model settings:
kernel = 'poly'
degree = 5

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model2 = svm.SVC(kernel=kernel,degree=degree)
    model2.fit(x_train_loocv, y_train_loocv)
    prediction = model2.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.8226600985221675


# Model 3 (optional)
This is the third experiment. This model uses a K-nearest neighbors (KNN) algorithm to classify the data.

In [41]:
# results:
    # full data, full features: .700
    # full data, part features: .
    # part data, full features: .
    # part data, part features: .

# imports:
from sklearn import neighbors

# model settings:
n_neighbors = 5
weight = 'distance'

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model3 = neighbors.KNeighborsClassifier(n_neighbors, weights=weight)
    model3.fit(x_train_loocv, y_train_loocv)
    prediction = model3.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.6995073891625616


# Model 4 (optional)
This is the fourth model. A Random Forest classification system is used.

In [42]:
# results:
    # full data, full features: .773
    # full data, part features: .
    # part data, full features: .
    # part data, part features: .

# imports:
from sklearn import ensemble

# model settings:
n_estimators = 136

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model4 = ensemble.RandomForestClassifier(n_estimators)
    model4.fit(x_train_loocv, y_train_loocv)
    prediction = model4.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.7733990147783252


# Model 5 (optional)
This is the fifth model. A Gaussian Naive Bayes model is built here.

In [43]:
# results:
    # full data, full features: .601
    # full data, part features: .
    # part data, full features: .
    # part data, part features: .

# imports:
from sklearn import naive_bayes

# model settings:

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model5 = naive_bayes.GaussianNB()
    model5.fit(x_train_loocv, y_train_loocv)
    prediction = model5.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.6009852216748769


# Model 6 (optional)
This is the sixth model. A decision tree is used in the following code block.

In [44]:
# results:
    # full data, full features: .700
    # full data, part features: .
    # part data, full features: .
    # part data, part features: .

# imports:
from sklearn import tree

# model settings:
depth = 10

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model6 = tree.DecisionTreeClassifier(max_depth=depth)
    model6.fit(x_train_loocv, y_train_loocv)
    prediction = model6.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)

Final accuracy = 0.6995073891625616


# Model 7 (optional)
This is the seventh model. A DNN is implemented here.

In [69]:
# todo: not working well yet, lots of errors

# imports:
from sklearn import neural_network

# model settings:
solver = 'adam'
alpha = 1e-5
hidden_layers = (5)
random = 1

# use leave-one-out cross-validation to test the accuracy of the model
correct = 0
n = len(y_train)
for i in range(len(y_train)):
    x_test = dftrain[i]
    x_test = np.reshape(x_test, ([1,-1]))
    y_test = y_train[i]
    x_train_loocv = np.delete(dftrain, i, 0)
    y_train_loocv = np.delete(y_train, i, 0)
    model7 = neural_network.MLPClassifier(solver=solver, alpha=alpha, hidden_layer_sizes=hidden_layers, random_state=random)
    model7.fit(x_train_loocv, y_train_loocv)
    prediction = model7.predict(x_test)
    if (prediction == y_test):
        correct += 1
print("Final accuracy =", correct/n)



Final accuracy = 0.3881578947368421


