In [1]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pandas import ExcelWriter
from pandas import ExcelFile
from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import Lasso
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression

In [2]:
def read_feature_data(file_name):
    file_path = 'Feature/'+file_name
    data = pd.read_excel(file_path, sheet_name=0)
    data.to_csv(file_name + '.csv', encoding='utf-8')
    return data

In [3]:
s1_feature = read_feature_data('S1_feature.xlsx')
s2_feature = read_feature_data('S2_feature.xlsx')
c1_feature = read_feature_data('C1_feature.xlsx')
c2_feature = read_feature_data('C2_feature.xlsx')
c3_feature = read_feature_data('C3_feature.xlsx')

In [4]:
feature = s1_feature.append([s1_feature,c1_feature,c2_feature,c3_feature])

In [5]:
feature = shuffle(feature)
c3_feature = shuffle(c3_feature)

In [7]:
x_train = feature[['min_acc','max_acc']]
y_train = feature[['fall_value']]
x_test = c3_feature[['min_acc','max_acc']]
y_test = c3_feature[['fall_value']]

In [8]:
param_range = [10.0**-x for x in range(-5,6)]
param_range

[100000.0, 10000.0, 1000.0, 100.0, 10.0, 1.0, 0.1, 0.01, 0.001, 0.0001, 1e-05]

In [9]:
param_odds = list(range(1, 10, 2))
param_odds

[1, 3, 5, 7, 9]

In [10]:
n_estimators = list(range(10,150,10))
n_estimators

[10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140]

In [11]:
grid = [ 
         GridSearchCV(LogisticRegression(solver='lbfgs',penalty='l2'),{'C':param_range},cv=5).fit(x_train,y_train.values.ravel()),
         GridSearchCV(svm.SVC(kernel="rbf"), {'C':param_range,'gamma':param_range},cv=5).fit(x_train, y_train.values.ravel()),
         GridSearchCV(KNeighborsClassifier(), {'n_neighbors':param_odds},cv=5).fit(x_train,y_train.values.ravel()),
         GridSearchCV(RandomForestClassifier(), {'n_estimators':n_estimators},cv=5).fit(x_train,y_train.values.ravel())]

for model in enumerate(grid):
    print("{} = {} % with {}".format(str(model[1].best_estimator_).split('(')[0],
                                     round(model[1].best_score_,2)*100,
                                     model[1].best_params_))


LogisticRegression = 97.0 % with {'C': 100000.0}
SVC = 98.0 % with {'C': 100.0, 'gamma': 1e-05}
KNeighborsClassifier = 98.0 % with {'n_neighbors': 7}
RandomForestClassifier = 98.0 % with {'n_estimators': 20}


In [137]:
lasso = LogisticRegression(C=0.001, solver='lbfgs',penalty= 'l2') 
lasso.fit(x_train,y_train.values.ravel())

LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [138]:
train_score_lasso=lasso.score(x_train,y_train)
test_score_lasso=lasso.score(x_test,y_test)

print ("training score for alpha=0.0001:", train_score_lasso )
print ("test score for alpha =0.0001: ", test_score_lasso)

training score for alpha=0.0001: 0.9722222222222222
test score for alpha =0.0001:  0.9477124183006536


In [114]:
svmc = svm.SVC(C=10,kernel="rbf",gamma = 'scale')
svmc.fit(x_train,y_train.values.ravel())

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [115]:
train_score_lasso=svmc.score(x_train,y_train)
test_score_lasso=svmc.score(x_test,y_test)

print ("training score for alpha=0.0001:", train_score_lasso )
print ("test score for alpha =0.0001: ", test_score_lasso)

training score for alpha=0.0001: 0.9993686868686869
test score for alpha =0.0001:  0.9956427015250545


In [108]:
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(x_train,y_train.values.ravel())

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')

In [109]:
train_score_knn=knn.score(x_train,y_train)
test_score_knn=knn.score(x_test,y_test)

print ("training score for alpha=0.0001:", train_score_knn )
print ("test score for alpha =0.0001: ", test_score_knn)

training score for alpha=0.0001: 1.0
test score for alpha =0.0001:  1.0


In [154]:

clf = RandomForestClassifier(n_estimators=10, max_depth=2,random_state=0)
clf.fit(x_train,y_train.values.ravel())

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=2, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [155]:
train_score_lasso=clf.score(x_train,y_train)
test_score_lasso=clf.score(x_test,y_test)

print ("training score for alpha=0.0001:", train_score_lasso )
print ("test score for alpha =0.0001: ", test_score_lasso)

training score for alpha=0.0001: 0.9830195644149132
test score for alpha =0.0001:  0.9651416122004357
