# Packages

In [3]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pandas import ExcelWriter
from pandas import ExcelFile
from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import LeaveOneGroupOut

# Functions

In [4]:
def read_feature_data(file_name):
    file_path = 'Feature/'+file_name
    data = pd.read_excel(file_path, sheet_name=0)
    data.to_csv(file_name + '.csv', encoding='utf-8')
    return data

In [5]:
def outer_cross_val(X, Y, groups, split_model, model, params):

    print("Outer Cross Validation")

    for train_index, test_index in split_model.split(X, Y, groups):
        
        X_train = X[train_index[0]]
        X_test = X[test_index[0]]
        Y_train = Y[train_index[0]]
        Y_test = Y[test_index[0]]
        
        for i in train_index[1:]:
            X_train.append(X[i], sort=False)
            Y_train.append(X[i], sort=False)
        
        for i in test_index[1:]:
            X_test.append(X[i])
            Y_test.append(X[i])
        
        cv = len(train_index)
        best_inner_model = nested_cross_val(X_train, Y_train, cv, LeaveOneGroupOut(), model, params)

        best_inner_model.fit(X_train,Y_train.values.ravel())

        train_score = best_inner_model.score(X_train,Y_train.values.ravel())
        test_score =best_inner_model.score(X_test,Y_test.values.ravel())

        
        print ("Training score:{} ".format(train_score) )
        print ("Test score :{}".format(test_score))

def nested_cross_val(X, Y, cv, split_model, model, params):
    print("\nInner Cross Validation")
    
    test_model = GridSearchCV(model, params, cv = cv ).fit(X, Y.values.ravel())
    
    print("{} = {} % with {}".format(str(test_model.best_estimator_).split('(')[0],
                                     round(test_model.best_score_,2)*100,
                                     test_model.best_params_))
    return test_model.best_estimator_

# Initailizers

In [6]:
s1_feature = read_feature_data('S1_feature.xlsx')
s2_feature = read_feature_data('S2_feature.xlsx')
s3_feature = read_feature_data('S3_feature.xlsx')
c1_feature = read_feature_data('C1_feature.xlsx')
c2_feature = read_feature_data('C2_feature.xlsx')
c3_feature = read_feature_data('C3_feature.xlsx')

In [7]:
param_range = [10.0**-x for x in range(-7,6)]
param_range

[10000000.0,
 1000000.0,
 100000.0,
 10000.0,
 1000.0,
 100.0,
 10.0,
 1.0,
 0.1,
 0.01,
 0.001,
 0.0001,
 1e-05]

In [8]:
param_odds = list(range(1, 10, 2))
param_odds

[1, 3, 5, 7, 9]

In [9]:
groups = [1,2,3,4,5,6]
split_model = LeaveOneGroupOut()

In [10]:
n_estimators = list(range(10,150,10))
n_estimators

[10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140]

In [11]:
X = [s1_feature[['min_acc','max_acc']],s2_feature[['min_acc','max_acc']],s3_feature[['min_acc','max_acc']], \
     c1_feature[['min_acc','max_acc']],c2_feature[['min_acc','max_acc']],c3_feature[['min_acc','max_acc']]]
Y = [s1_feature[['fall_value']],s2_feature[['fall_value']],s3_feature[['fall_value']],\
     c1_feature[['fall_value']],c2_feature[['fall_value']],c3_feature[['fall_value']]]

# Nested SVM

In [12]:
model = svm.SVC(kernel="rbf")
params = {'C':param_range,'gamma':param_range}

In [13]:
outer_cross_val(X, Y, groups, split_model, model, params)

Outer Cross Validation

Inner Cross Validation
SVC = 97.0 % with {'C': 10000000.0, 'gamma': 1e-05}
Training score:1.0 
Test score :0.973384030418251

Inner Cross Validation
SVC = 97.0 % with {'C': 10000000.0, 'gamma': 1e-05}
Training score:1.0 
Test score :0.9651324965132496

Inner Cross Validation
SVC = 97.0 % with {'C': 10000000.0, 'gamma': 1e-05}
Training score:1.0 
Test score :0.9669902912621359

Inner Cross Validation
SVC = 97.0 % with {'C': 10000000.0, 'gamma': 1e-05}
Training score:1.0 
Test score :0.9544117647058824

Inner Cross Validation
SVC = 97.0 % with {'C': 10000000.0, 'gamma': 1e-05}
Training score:1.0 
Test score :0.9467849223946785

Inner Cross Validation
SVC = 97.0 % with {'C': 10000000.0, 'gamma': 1e-05}
Training score:1.0 
Test score :0.9564270152505446


# Nested Random Forest

In [59]:
model = RandomForestClassifier()
params = {'n_estimators':n_estimators}

In [60]:
outer_cross_val(X, Y, groups, split_model, model, params)

Outer Cross Validation

Inner Cross Validation
RandomForestClassifier = 99.0 % with {'n_estimators': 10}
Training score:0.99721059972106 
Test score :0.9860583016476553

Inner Cross Validation
RandomForestClassifier = 99.0 % with {'n_estimators': 60}
Training score:1.0 
Test score :0.9888423988842399

Inner Cross Validation
RandomForestClassifier = 99.0 % with {'n_estimators': 80}
Training score:1.0 
Test score :0.9728155339805825

Inner Cross Validation
RandomForestClassifier = 99.0 % with {'n_estimators': 60}
Training score:1.0 
Test score :0.9764705882352941

Inner Cross Validation
RandomForestClassifier = 99.0 % with {'n_estimators': 60}
Training score:1.0 
Test score :0.9667405764966741

Inner Cross Validation
RandomForestClassifier = 99.0 % with {'n_estimators': 110}
Training score:1.0 
Test score :0.9651416122004357


# Nested KNN 

In [51]:
model = KNeighborsClassifier()
params = {'n_neighbors':param_odds}

In [56]:
outer_cross_val(X, Y, groups, split_model, model, params)

Outer Cross Validation

Inner Cross Validation
KNeighborsClassifier = 99.0 % with {'n_neighbors': 9}
Training score:0.9916317991631799 
Test score :0.9885931558935361

Inner Cross Validation
KNeighborsClassifier = 99.0 % with {'n_neighbors': 5}
Training score:0.9898605830164765 
Test score :0.9888423988842399

Inner Cross Validation
KNeighborsClassifier = 99.0 % with {'n_neighbors': 5}
Training score:0.9898605830164765 
Test score :0.9825242718446602

Inner Cross Validation
KNeighborsClassifier = 99.0 % with {'n_neighbors': 5}
Training score:0.9898605830164765 
Test score :0.9735294117647059

Inner Cross Validation
KNeighborsClassifier = 99.0 % with {'n_neighbors': 5}
Training score:0.9898605830164765 
Test score :0.9733924611973392

Inner Cross Validation
KNeighborsClassifier = 99.0 % with {'n_neighbors': 5}
Training score:0.9898605830164765 
Test score :0.971677559912854


# Nested LASSO

In [57]:
    model = LogisticRegression(solver='lbfgs',penalty='l2')
    params = {'C':param_range}

In [58]:
outer_cross_val(X, Y, groups, split_model, model, params)

Outer Cross Validation

Inner Cross Validation
LogisticRegression = 99.0 % with {'C': 100000.0}
Training score:0.99302649930265 
Test score :0.9873257287705957

Inner Cross Validation
LogisticRegression = 98.0 % with {'C': 100000.0}
Training score:0.9873257287705957 
Test score :0.9888423988842399

Inner Cross Validation
LogisticRegression = 98.0 % with {'C': 100000.0}
Training score:0.9873257287705957 
Test score :0.9805825242718447

Inner Cross Validation
LogisticRegression = 98.0 % with {'C': 100000.0}
Training score:0.9873257287705957 
Test score :0.9735294117647059

Inner Cross Validation
LogisticRegression = 98.0 % with {'C': 100000.0}
Training score:0.9873257287705957 
Test score :0.975609756097561

Inner Cross Validation
LogisticRegression = 98.0 % with {'C': 100000.0}
Training score:0.9873257287705957 
Test score :0.9586056644880174
