In [4]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score,f1_score
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV

In [5]:
def Data(dframe):
    dframe=pd.read_csv(dframe)
    X=dframe.iloc[:,:-1].values
    Y=dframe.iloc[:,dframe.shape[1]-1]
    x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=.2,random_state=42)
    return x_train,x_test,y_train,y_test

In [6]:
def GridSearch(est,param,x_train,y_train):
    skf=StratifiedKFold(n_splits=10,random_state=42)
    gs_clf=GridSearchCV(estimator=est,param_grid=param,cv=skf,n_jobs=-1)
    gs_clf.fit(x_train,y_train)
    return gs_clf.best_score_,gs_clf.best_param_

In [7]:
def Score(y_test,y_pred):
    cm=confusion_matrix(y_test,y_pred)
    ac=accuracy_score(y_test,y_pred)
    f1=f1_score(y_test,y_pred)
    
    prec=np.arround(np.diag(cm).astype(float)*100/cm.sum(axis=0),decimal=2)
    rec=np.arround(np.diag(cm).astype(float)*100/cm.sum(axis=1),decimal=2)
    
    cm_new=np.vstack((cm,prec))
    cm_new=np.hstack((cm_new,(np.append(rec,np.arround(ac*100,decimal=2)))
                      .reshape(len(cm_new),1)))
    act_col=np.array(' ','Walking','Upstairs','Downstairs','Standing','Sitting','Laying','recall').reshape(1,8)
    act_index=np.array('Walking','Upstairs','Downstairs','Standing','Sitting','Laying','precision').reshape(7,1)
    cm_new=np.hstack(act_index,cm_new)
    cm_new=np.vstack(act_col,cm_new)
    
    acc=np.around(ac*100,decimal=2)
    prec_mean=np.around(np.mean(prec),decimal=2)
    rec_mean=np.around(np.mean(rec),decimal=2)
    return acc,prec_mean,rec_mean,f1,cm_new
    

In [8]:
def svc(x_train,x_test,y_train,y_test):
    svc_param=svc_params()
    svc=SVC()
    svc_best_score,svc_best_param=GridSearch(svc,svc_param,x_train,y_train)
    print('svc_best_score',svc_best_score)
    print('svc_best_param',svc_best_param)
    
    svc_new=SVC(C=svc_best_param['C'],kernel=svc_best_param['kernel']
               ,degree=3,gamma=svc_best_param['gamma'])
    svc_new.fit(x_train,y_train)
    y_pred=svc.predict(x_test)
    
    svc_ac,svc_prec,svc_rec,svc_f1,svc_cm_new=Score(y_test,y_pred)
    print('svc_acuracy',svc_ac)
    print('svc_precision',svc_prec)
    print('svc_recall',svc_rec)
    print('svc_f1_score',svc_f1)
    print('svc_matrix',svc_cm_new)
    
def svc_params():
    C_range = [0.1, 0.316, 1, 3.16, 10, 31.6, 100, 316, 1000, 3160, 10000]
    gamma_range = [1.0000e-08, 6.3096e-08, 3.9811e-07, 2.5119e-06, 1.5849e-05, 0.0001, 0.00063096,
                            0.0039811, 0.025119, 0.15849, 1.0000]
    svc_param=[
        {'C':C_range,'kernel':['linear']},
        {'C':C_range,'kernel':['rbf'],'gamma':gamma_range}
    ]
    return svc_param
    

In [9]:
def rfc(x_train,x_test,y_train,y_test):
    rfc_param=rfc_params()
    rfc=RandomForestClassifier()
    rfc_best_score,rfc_best_param=GridSearch(rfc,rfc_param,x_train,y_train)
    
    print('rfc_best_score', rfc_best_score)
    print('rfc_best_param',rfc_best_param)
    
    rfc_new=RandomForestClassifier(n_estimators=rfc_best_param['n_estimators'],
                                   criterion=rfc_best_param['criterion'], 
                                   max_depth=rfc_best_param['max_depth'], min_samples_split=2, 
                                   min_samples_leaf=rfc_best_param['min_samples_leaf'], 
                                   min_weight_fraction_leaf=0.0,
                                   max_features=rfc_best_param['max_features'])
    rfc.new.fit(x_train,y_train)
    feature_imp=np.argsort(rfc_new.feature_importancs_)
    print('rfc_10most_imp_features', feature_imp[-1:-11:-1])
    y_pred=rfc_new.predict(x_test)
    
    acc,prec_mean,rec_mean,f1,cm_new=Score(y_test,y_pred)
    print('rfc_acuracy',acc)
    print('rfc_precision',prec)
    print('rfc_recall',rec)
    print('rfc_f1_score',f1)
    print('rfc_matrix',new)
    
def rfc_params():
    n_estimators=[100,500,1000]
    max_depth=[None,6,8]
    max_features=[9,24,561]
    min_samples_leaf=[1,4,6]
    rfc_param={'criterion':['gini','entropy'],'n_estimators':n_estimators,'max_depth':max_depth
              ,'max_features':max_features,'min_samples_leaf':min_samples_leaf}
    return rfc_param
    

In [10]:
def abc(x_train,x_test,y_train,y_test):
    abc_param=abc_params()
    abc=AdaBoostClassifier()
    abc_best_score,abc_best_params=GridSearch(abc,abc_param,x_train,y_train)
    
    print('abc_best_score', abc_best_score)
    print('abc_best_param',abc_best_param)
    
    abc_new=AdaBoostClassifier((DecisionTreeClassifier(max_depth=2)), 
                               n_estimators=abc_best_params['n_estimators'], 
              learning_rate=abc_best_params['learning_rate'])    
    
    abc_new.fit(x_train,y_train)
    feature_imp=np.argsort(abc_new.feature_importancs_)
    print('abc_10most_imp_features', feature_imp[-1:-11:-1])
    y_pred=abc_new.predict(x_test)
    
    acc,prec_mean,rec_mean,f1,cm_new=Score(y_test,y_pred)
    print('abc_acuracy',acc)
    print('abc_precision',prec)
    print('abc_recall',rec)
    print('abc_f1_score',f1)
    print('abc_matrix',new)
    
def abc_params():
    n_estimators_range = [5, 10, 25, 50, 100, 500, 1000]
    learning_rate_range = [0.1, 0.3, 0.5, 0.7, 0.9, 1]   
    abc_param = {'n_estimators': n_estimators_range, 'learning_rate': learning_rate_range}
    return abc_param
    

In [None]:
if __name__=='__main__':
    file='../Data/Dframe.csv'
    x_train,x_test,y_train,y_test=Data(file)
    
    svc(x_train,x_test,y_train,y_test)
