In [None]:
from __future__ import print_function
from __future__ import division
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

filepath=os.path.abspath(os.getcwd())+"\\feature_matrix\\Combined Data CSV.csv" # Code assumes it is found in Postural_sway_measures / Change name of csv file as needed
data=pd.read_csv(filepath,sep=',')
data=data.set_index('name')
data['sex']=(data['sex'] == 'male').astype(int) # 1 for male and 0 for female

In [None]:
## Predictors 
# layer 1: [0]="1" / [1]="2" 
data['layer 1']=(data['layer 1'] == 2).astype(int) # 1 for "2" in layer1 and 0 for "1" in layer1
# layer 2: [0,0]="1" / [1,0]="3" / [0,1]="4"
data.insert(5,"layer 2-2",data['layer 2'])
data['layer 2']=(data['layer 2'] == 3).astype(int) # 1 for "3" in layer2
data['layer 2-2']=(data['layer 2-2'] == 4).astype(int) # 1 for "4" in layer2
# layer 3: [0,0,0]="1" / [1,0,0]="4" / [0,1,0]="5" / [0,0,1]="6"
data.insert(7,"layer 3-2",data['layer 3'])
data.insert(8,"layer 3-3",data['layer 3'])
data['layer 3']=(data['layer 3'] == 4).astype(int) # 1 for "4" in layer2
data['layer 3-2']=(data['layer 3-2'] == 5).astype(int) # 1 for "5" in layer2
data['layer 3-3']=(data['layer 3-3'] == 6).astype(int) # 1 for "6" in layer2

## Data Division
young_data=data.drop(data[data.age>=65].index)
old_data=data.drop(data[data.age<65].index)

## Features
all_features=list(data.columns[0:2])+list(data.columns[9:]) # all features, not including the diagnosis
features=all_features[:]
data

In [None]:
# Learning
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA

In [6]:
## (!!! LAYER 1 !!!)
features=all_features[:]
diagnosis=data['layer 1']

# Most Correlated Features
spearman_correlations=data[features].corrwith(diagnosis,method='spearman')
features_layer1=[]
minimum=0.2 # Change this for different results
for i in range(len(spearman_correlations)):
    if abs(spearman_correlations[i])>minimum:
        features_layer1.append(spearman_correlations.index[i])
#features=features_layer1
features

['sex',
 'age',
 'Open eye pressure center position (left and right)',
 'Center position of the eye pressure (front and back)',
 'Eye circumference area',
 'Eye opening effective value area',
 'Total eye-open path length',
 'Eye opening unit area locus length',
 'Eye opening average speed',
 'Closed foot pressure center position (left and right)',
 'Eye pressure center position (front and back)',
 'Peripheral area with eyes closed',
 'Eye closing effective value area',
 'Total eye-closure length',
 'Eye-closing unit area locus length',
 'Eye closing average speed',
 'Romberg rate (peripheral area)',
 'Romberg rate (total track length)',
 'Romberg rate (average speed)']

In [5]:
# Feature Ranking (SVR)
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR
estimator = SVR(kernel="linear")
selector = RFECV(estimator,step=1,cv=5)
selector = selector.fit(data[features],diagnosis)
selector.ranking_

KeyboardInterrupt: 

In [None]:
# Feature Ranking (RF)
from sklearn.feature_selection import RFECV
estimator = RandomForestClassifier(random_state=42, warm_start=True, n_jobs=-1)
selector = RFECV(estimator,step=1,cv=5)
selector = selector.fit(data[features],diagnosis)
selector.ranking_

In [None]:
# Feature Ranking (SVC)
from sklearn.feature_selection import RFECV
estimator = LinearSVC()
selector = RFECV(estimator,step=1,cv=5)
selector = selector.fit(data[features],diagnosis)
selector.ranking_

In [27]:
# SVC: Train-Test Split 
LSVC=LinearSVC()
scaler=RobustScaler()
scaled_train, scaled_test, diagnosis_train, diagnosis_test = train_test_split(data[features], diagnosis,test_size=0.3, random_state=42)
scaler.fit(scaled_train)
scaled_train=scaler.transform(scaled_train)
scaled_test=scaler.transform(scaled_test)

LSVC=LSVC.fit(scaled_train,diagnosis_train)
diagnosis_predictions=LSVC.predict(scaled_test)

TP, FP, TN, FN = 0, 0, 0, 0
for i in range(len(diagnosis_test)):   
    if diagnosis_test[i]==diagnosis_predictions[i]==1:
        TP=TP+1
    elif diagnosis_test[i]==diagnosis_predictions[i]==0:
        TN=TN+1
    elif diagnosis_test[i]==1 and diagnosis_predictions[i]==0:
        FP=FP+1
    elif diagnosis_test[i]==0 and diagnosis_predictions[i]==1:
        FN=FN+1
Accuracy=(TP+TN)/(len(diagnosis_test))
Sensitivity=TP/(TP+FN)
Specificity=TN/(TN+FP)
Precision=TP/(TP+FP)
print("Train-Test Split Results:\nTP=%d / FP=%d / TN=%d / FN=%d \nAccuracy = %f  \nSensitivity = %f \nSpecificity = %f \nPrecision = %f" %(TP,FP,TN,FN,Accuracy,Sensitivity,Specificity,Precision))
best_accuracy=Accuracy
best_precision=Precision

Train-Test Split Results:
TP=79 / FP=43 / TN=76 / FN=23 
Accuracy = 0.701357  
Sensitivity = 0.774510 
Specificity = 0.638655 
Precision = 0.647541


In [28]:
# SVC: Train-Test Split (Ranking) 
loops=len(features)-1
check=True
iteration=0
feature_list=features[:]
removed_features=[]
while(iteration<loops and check==True):
    iteration=iteration+1
    accuracies=[]
    precisions=[]
    final_list=feature_list[:]
    for i in range(len(feature_list)):
        feature_list=final_list[:]
        del feature_list[i]
        LSVC=LinearSVC()
        scaler=RobustScaler()
        scaled_train, scaled_test, diagnosis_train, diagnosis_test = train_test_split(data[feature_list], diagnosis,test_size=0.3, random_state=42)
        scaler.fit(scaled_train)
        scaled_train=scaler.transform(scaled_train)
        scaled_test=scaler.transform(scaled_test)

        LSVC=LSVC.fit(scaled_train,diagnosis_train)
        diagnosis_predictions=LSVC.predict(scaled_test)

        TP, FP, TN, FN = 0, 0, 0, 0
        for j in range(len(diagnosis_test)):   
            if diagnosis_test[j]==diagnosis_predictions[j]==1:
                TP=TP+1
            elif diagnosis_test[j]==diagnosis_predictions[j]==0:
                TN=TN+1
            elif diagnosis_test[j]==1 and diagnosis_predictions[j]==0:
                FP=FP+1
            elif diagnosis_test[j]==0 and diagnosis_predictions[j]==1:
                FN=FN+1
        Accuracy=(TP+TN)/(len(diagnosis_test))
        #Sensitivity=TP/(TP+FN)
        #Specificity=TN/(TN+FP)
        Precision=TP/(TP+FP)
        accuracies.append(Accuracy)
        precisions.append(Precision)
    if(max(accuracies)>best_accuracy):
        best_accuracy=max(accuracies)
        best_precision=precisions[accuracies.index(max(accuracies))]
        feature_list=final_list[:]
        removed_features.append(feature_list[accuracies.index(max(accuracies))])
        del feature_list[accuracies.index(max(accuracies))]
    else:
        print("N/A")
        check=False
print(final_list)

N/A
['age', 'Center position of the eye pressure (front and back)', 'Eye circumference area', 'Eye opening effective value area', 'Total eye-open path length', 'Closed foot pressure center position (left and right)', 'Eye pressure center position (front and back)', 'Peripheral area with eyes closed', 'Eye closing effective value area', 'Total eye-closure length', 'Eye-closing unit area locus length', 'Eye closing average speed', 'Romberg rate (peripheral area)', 'Romberg rate (total track length)', 'Romberg rate (average speed)']


In [29]:
print(iteration)
print(best_accuracy)
print(best_precision)
print(removed_features)


5
0.755656108597
0.72131147541
['sex', 'Open eye pressure center position (left and right)', 'Eye opening unit area locus length', 'Eye opening average speed']


In [30]:
# ////////////////////////////////////////////////////////////
LSVC=LinearSVC()
scaler=RobustScaler()
scaled_train, scaled_test, diagnosis_train, diagnosis_test = train_test_split(data[final_list], diagnosis,test_size=0.3, random_state=42)
scaler.fit(scaled_train)
scaled_train=scaler.transform(scaled_train)
scaled_test=scaler.transform(scaled_test)

LSVC=LSVC.fit(scaled_train,diagnosis_train)
diagnosis_predictions=LSVC.predict(scaled_test)

TP, FP, TN, FN = 0, 0, 0, 0
for i in range(len(diagnosis_test)):   
    if diagnosis_test[i]==diagnosis_predictions[i]==1:
        TP=TP+1
    elif diagnosis_test[i]==diagnosis_predictions[i]==0:
        TN=TN+1
    elif diagnosis_test[i]==1 and diagnosis_predictions[i]==0:
        FP=FP+1
    elif diagnosis_test[i]==0 and diagnosis_predictions[i]==1:
        FN=FN+1
Accuracy=(TP+TN)/(len(diagnosis_test))
Sensitivity=TP/(TP+FN)
Specificity=TN/(TN+FP)
Precision=TP/(TP+FP)
print("Train-Test Split Results:\nTP=%d / FP=%d / TN=%d / FN=%d \nAccuracy = %f  \nSensitivity = %f \nSpecificity = %f \nPrecision = %f" %(TP,FP,TN,FN,Accuracy,Sensitivity,Specificity,Precision))

Train-Test Split Results:
TP=87 / FP=35 / TN=79 / FN=20 
Accuracy = 0.751131  
Sensitivity = 0.813084 
Specificity = 0.692982 
Precision = 0.713115


In [21]:
# Alternative SVC: Train-Test Split
LSVC=SVC(kernel='rbf')
scaler=RobustScaler()
scaled_train, scaled_test, diagnosis_train, diagnosis_test = train_test_split(data[features], diagnosis,test_size=0.3, random_state=42)
scaler.fit(scaled_train)
scaled_train=scaler.transform(scaled_train)
scaled_test=scaler.transform(scaled_test)

LSVC=LSVC.fit(scaled_train,diagnosis_train)
diagnosis_predictions=LSVC.predict(scaled_test)

TP, FP, TN, FN = 0, 0, 0, 0
for i in range(len(diagnosis_test)):   
    if diagnosis_test[i]==diagnosis_predictions[i]==1:
        TP=TP+1
    elif diagnosis_test[i]==diagnosis_predictions[i]==0:
        TN=TN+1
    elif diagnosis_test[i]==1 and diagnosis_predictions[i]==0:
        FP=FP+1
    elif diagnosis_test[i]==0 and diagnosis_predictions[i]==1:
        FN=FN+1
Accuracy=(TP+TN)/(len(diagnosis_test))
Sensitivity=TP/(TP+FN)
Specificity=TN/(TN+FP)
Precision=TP/(TP+FP)
print("Train-Test Split Results:\nTP=%d / FP=%d / TN=%d / FN=%d \nAccuracy = %f  \nSensitivity = %f \nSpecificity = %f \nPrecision = %f" %(TP,FP,TN,FN,Accuracy,Sensitivity,Specificity,Precision))
best_accuracy=Accuracy
best_precision=Precision

Train-Test Split Results:
TP=79 / FP=43 / TN=82 / FN=17 
Accuracy = 0.728507  
Sensitivity = 0.822917 
Specificity = 0.656000 
Precision = 0.647541




In [22]:
# Alternative SVC: Train-Test Split (Ranking) 
loops=len(features)-1
check=True
iteration=0
feature_list=features[:]
removed_features=[]
while(iteration<loops and check==True):
    iteration=iteration+1
    accuracies=[]
    precisions=[]
    final_list=feature_list[:]
    for i in range(len(feature_list)):
        feature_list=final_list[:]
        del feature_list[i]
        LSVC=SVC(kernel='rbf')
        scaler=RobustScaler()
        scaled_train, scaled_test, diagnosis_train, diagnosis_test = train_test_split(data[features], diagnosis,test_size=0.3, random_state=42)
        scaler.fit(scaled_train)
        scaled_train=scaler.transform(scaled_train)
        scaled_test=scaler.transform(scaled_test)

        LSVC=LSVC.fit(scaled_train,diagnosis_train)
        diagnosis_predictions=LSVC.predict(scaled_test)

        TP, FP, TN, FN = 0, 0, 0, 0
        for j in range(len(diagnosis_test)):   
            if diagnosis_test[j]==diagnosis_predictions[j]==1:
                TP=TP+1
            elif diagnosis_test[j]==diagnosis_predictions[j]==0:
                TN=TN+1
            elif diagnosis_test[j]==1 and diagnosis_predictions[j]==0:
                FP=FP+1
            elif diagnosis_test[j]==0 and diagnosis_predictions[j]==1:
                FN=FN+1
        Accuracy=(TP+TN)/(len(diagnosis_test))
        #Sensitivity=TP/(TP+FN)
        #Specificity=TN/(TN+FP)
        Precision=TP/(TP+FP)
        accuracies.append(Accuracy)
        precisions.append(Precision)
    if(max(accuracies)>best_accuracy):
        best_accuracy=max(accuracies)
        best_precision=precisions[accuracies.index(max(accuracies))]
        feature_list=final_list[:]
        removed_features.append(feature_list[accuracies.index(max(accuracies))])
        del feature_list[accuracies.index(max(accuracies))]
    else:
        print("N/A")
        check=False
print(final_list)

N/A
['sex', 'age', 'Open eye pressure center position (left and right)', 'Center position of the eye pressure (front and back)', 'Eye circumference area', 'Eye opening effective value area', 'Total eye-open path length', 'Eye opening unit area locus length', 'Eye opening average speed', 'Closed foot pressure center position (left and right)', 'Eye pressure center position (front and back)', 'Peripheral area with eyes closed', 'Eye closing effective value area', 'Total eye-closure length', 'Eye-closing unit area locus length', 'Eye closing average speed', 'Romberg rate (peripheral area)', 'Romberg rate (total track length)', 'Romberg rate (average speed)']


In [23]:
print(iteration)
print(best_accuracy)
print(best_precision)
print(removed_features)

1
0.72850678733
0.647540983607
[]


In [24]:
# K-nearest Neighbors: Train-Test Split
neigh = KNeighborsClassifier(n_neighbors=3)
scaler=RobustScaler()
scaled_train, scaled_test, diagnosis_train, diagnosis_test = train_test_split(data[features], diagnosis,test_size=0.3, random_state=42)
scaler.fit(scaled_train)
scaled_train=scaler.transform(scaled_train)
scaled_test=scaler.transform(scaled_test)

neigh.fit(scaled_train,diagnosis_train)
diagnosis_predictions=neigh.predict(scaled_test)

TP, FP, TN, FN = 0, 0, 0, 0
for i in range(len(diagnosis_test)):   
    if diagnosis_test[i]==diagnosis_predictions[i]==1:
        TP=TP+1
    elif diagnosis_test[i]==diagnosis_predictions[i]==0:
        TN=TN+1
    elif diagnosis_test[i]==1 and diagnosis_predictions[i]==0:
        FP=FP+1
    elif diagnosis_test[i]==0 and diagnosis_predictions[i]==1:
        FN=FN+1
Accuracy=(TP+TN)/(len(diagnosis_test))
Sensitivity=TP/(TP+FN)
Specificity=TN/(TN+FP)
Precision=TP/(TP+FP)
print("Train-Test Split Results:\nTP=%d / FP=%d / TN=%d / FN=%d \nAccuracy = %f  \nSensitivity = %f \nSpecificity = %f \nPrecision = %f" %(TP,FP,TN,FN,Accuracy,Sensitivity,Specificity,Precision))
best_accuracy=Accuracy
best_precision=Precision

Train-Test Split Results:
TP=81 / FP=41 / TN=71 / FN=28 
Accuracy = 0.687783  
Sensitivity = 0.743119 
Specificity = 0.633929 
Precision = 0.663934


In [25]:
# K-nearest Neighbor: Train-Test Split (Ranking) 
loops=len(features)-1
check=True
iteration=0
feature_list=features[:]
removed_features=[]
while(iteration<loops and check==True):
    iteration=iteration+1
    accuracies=[]
    precisions=[]
    final_list=feature_list[:]
    for i in range(len(feature_list)):
        feature_list=final_list[:]
        del feature_list[i]
        neigh = KNeighborsClassifier(n_neighbors=3)
        scaler=RobustScaler()
        scaled_train, scaled_test, diagnosis_train, diagnosis_test = train_test_split(data[features], diagnosis,test_size=0.3, random_state=42)
        scaler.fit(scaled_train)
        scaled_train=scaler.transform(scaled_train)
        scaled_test=scaler.transform(scaled_test)

        neigh.fit(scaled_train,diagnosis_train)
        diagnosis_predictions=neigh.predict(scaled_test)

        TP, FP, TN, FN = 0, 0, 0, 0
        for j in range(len(diagnosis_test)):   
            if diagnosis_test[j]==diagnosis_predictions[j]==1:
                TP=TP+1
            elif diagnosis_test[j]==diagnosis_predictions[j]==0:
                TN=TN+1
            elif diagnosis_test[j]==1 and diagnosis_predictions[j]==0:
                FP=FP+1
            elif diagnosis_test[j]==0 and diagnosis_predictions[j]==1:
                FN=FN+1
        Accuracy=(TP+TN)/(len(diagnosis_test))
        #Sensitivity=TP/(TP+FN)
        #Specificity=TN/(TN+FP)
        Precision=TP/(TP+FP)
        accuracies.append(Accuracy)
        precisions.append(Precision)
    if(max(accuracies)>best_accuracy):
        best_accuracy=max(accuracies)
        best_precision=precisions[accuracies.index(max(accuracies))]
        feature_list=final_list[:]
        removed_features.append(feature_list[accuracies.index(max(accuracies))])
        del feature_list[accuracies.index(max(accuracies))]
    else:
        print("N/A")
        check=False
print(final_list)

N/A
['sex', 'age', 'Open eye pressure center position (left and right)', 'Center position of the eye pressure (front and back)', 'Eye circumference area', 'Eye opening effective value area', 'Total eye-open path length', 'Eye opening unit area locus length', 'Eye opening average speed', 'Closed foot pressure center position (left and right)', 'Eye pressure center position (front and back)', 'Peripheral area with eyes closed', 'Eye closing effective value area', 'Total eye-closure length', 'Eye-closing unit area locus length', 'Eye closing average speed', 'Romberg rate (peripheral area)', 'Romberg rate (total track length)', 'Romberg rate (average speed)']


In [26]:
print(iteration)
print(best_accuracy)
print(best_precision)
print(removed_features)

1
0.68778280543
0.66393442623
[]


In [18]:
# Random Forest: Train-Test Split
scaled_train, scaled_test, diagnosis_train, diagnosis_test = train_test_split(data[features], diagnosis,test_size=0.3, random_state=42)
RF = RandomForestClassifier(random_state=42, warm_start=True, n_jobs=-1)
#RF.set_params(n_estimators=100) # Tree number
RF.fit(scaled_train,diagnosis_train)
diagnosis_predictions=RF.predict(scaled_test)

TP, FP, TN, FN = 0, 0, 0, 0
for i in range(len(diagnosis_test)): 
    if diagnosis_test[i]==diagnosis_predictions[i]==1:
        TP=TP+1
    elif diagnosis_test[i]==diagnosis_predictions[i]==0:
        TN=TN+1
    elif diagnosis_test[i]==1 and diagnosis_predictions[i]==0:
        FP=FP+1
    elif diagnosis_test[i]==0 and diagnosis_predictions[i]==1:
        FN=FN+1
Accuracy=(TP+TN)/(len(diagnosis_test))
Sensitivity=TP/(TP+FN)
Specificity=TN/(TN+FP)
Precision=TP/(TP+FP)
print("Train-Test Split Results:\nTP=%d / FP=%d / TN=%d / FN=%d \nAccuracy = %f  \nSensitivity = %f \nSpecificity = %f \nPrecision = %f" %(TP,FP,TN,FN,Accuracy,Sensitivity,Specificity,Precision))
best_accuracy=Accuracy
best_precision=Precision

Train-Test Split Results:
TP=71 / FP=51 / TN=82 / FN=17 
Accuracy = 0.692308  
Sensitivity = 0.806818 
Specificity = 0.616541 
Precision = 0.581967


In [19]:
# Random Forest: Train-Test Split (Ranking) 
loops=len(features)-1
check=True
iteration=0
feature_list=features[:]
removed_features=[]
while(iteration<loops and check==True):
    iteration=iteration+1
    accuracies=[]
    precisions=[]
    final_list=feature_list[:]
    for i in range(len(feature_list)):
        feature_list=final_list[:]
        del feature_list[i]
        scaled_train, scaled_test, diagnosis_train, diagnosis_test = train_test_split(data[features], diagnosis,test_size=0.3, random_state=42)
        RF = RandomForestClassifier(random_state=42, warm_start=True, n_jobs=-1)
        RF.fit(scaled_train,diagnosis_train)
        diagnosis_predictions=RF.predict(scaled_test)

        TP, FP, TN, FN = 0, 0, 0, 0
        for j in range(len(diagnosis_test)):   
            if diagnosis_test[j]==diagnosis_predictions[j]==1:
                TP=TP+1
            elif diagnosis_test[j]==diagnosis_predictions[j]==0:
                TN=TN+1
            elif diagnosis_test[j]==1 and diagnosis_predictions[j]==0:
                FP=FP+1
            elif diagnosis_test[j]==0 and diagnosis_predictions[j]==1:
                FN=FN+1
        Accuracy=(TP+TN)/(len(diagnosis_test))
        #Sensitivity=TP/(TP+FN)
        #Specificity=TN/(TN+FP)
        Precision=TP/(TP+FP)
        accuracies.append(Accuracy)
        precisions.append(Precision)
    if(max(accuracies)>best_accuracy):
        best_accuracy=max(accuracies)
        best_precision=precisions[accuracies.index(max(accuracies))]
        feature_list=final_list[:]
        removed_features.append(feature_list[accuracies.index(max(accuracies))])
        del feature_list[accuracies.index(max(accuracies))]
    else:
        print("N/A")
        check=False
print(final_list)

N//A
['sex', 'age', 'Open eye pressure center position (left and right)', 'Center position of the eye pressure (front and back)', 'Eye circumference area', 'Eye opening effective value area', 'Total eye-open path length', 'Eye opening unit area locus length', 'Eye opening average speed', 'Closed foot pressure center position (left and right)', 'Eye pressure center position (front and back)', 'Peripheral area with eyes closed', 'Eye closing effective value area', 'Total eye-closure length', 'Eye-closing unit area locus length', 'Eye closing average speed', 'Romberg rate (peripheral area)', 'Romberg rate (total track length)', 'Romberg rate (average speed)']


In [20]:
print(iteration)
print(best_accuracy)
print(best_precision)
print(removed_features)

1
0.692307692308
0.581967213115
[]
