In [None]:
import pandas as pd
import numpy
import numpy as np
from sklearn.datasets import make_classification
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
from sklearn.multiclass import OutputCodeClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn import svm
from sklearn.model_selection import LeavePOut
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV


# Classification Training

- Data format: The data format for x (feature matrix) is in the general form of (m*n) where m is the length of data, and n is the numder of features. Here the features are the mean values of each FSR and the x and y values of the accelerometer. Labels are shown by vector y1. Detail related to data can be found under Section 3.2. 

- Hyperparameter Tuning: details of all possible choices can be found in Section 4.1

- Class "Classifier_hypertuning" can be used for hyperparameter tuning of the random forest and support vector machine algorithms. 
- Class "Classifier_training" can be used for training the algorithms. 


# Parameter choice for the Classifier_hypertuning class

- min_samples_split: type required is a python list of integers. 
- min_samples_leaf : type required is a python list of integers.
- bootstrap: a bolean list: [True, False]
- start-tree, stop_tree, num_tree, min_depth, max_depth, num_depth, n_iter, cv: integer
- norm_condition: should be set to either False or True.
- list_C: type required is a python list of integers.
- list_gamma:type required is a python list of floats.  

# Explanaition of the Functions in the class

- RF_hyper_parameter_setup(self,x): used to set up all the possible hyperparameter values for the tuning
- RF_hyper_parameter_tuning(self,x,y): used to find the optimzed hyperparameter values for RF.
- SVM_hyperparameter_tuning(self,x,y): used to find the optimzed hyperparameter values for SVC. 
- LOOV(self,x,y): used to make training and test data sets based on leave one out validation. 

In [None]:
# This is *ONLY* as an exmaple. 
min_samples_split = [2, 5, 7, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
start_tree = 5
stop_tree = 100
num_tree = 10
min_depth = 2
max_depth = 10
num_depth = 2
n_iter = 500
cv = 5
norm_condition = 'False'
list_C = [0.1, 1, 10, 100, 1000]
list_gamma = [1, 0.1, 0.01, 0.001, 0.0001]


In [None]:
class Classifier_hypertuning:
    
    def __init__(self,min_samples_split,min_samples_leaf,bootstrap,start_tree,stop_tree,num_tree,min_depth,max_depth,num_depth,n_iter,cv,norm_condition, list_C, list_gamma):
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.bootstrap = bootstrap
        self.start_tree = start_tree
        self.stop_tree = stop_tree
        self.num_tree = num_tree
        self.min_depth = min_depth
        self.max_depth = max_depth
        self.num_depth = num_depth
        self.n_iter = n_iter
        self.cv = cv
        self.norm_condition = norm_condition # string of form 'False' or 'True'
        self.list_C = list_C
        self.list_gamma = list_gamma

        
        

    
    def RF_hyper_parameter_setup(self,x):
    
   
    # Number of trees in random forest
        n_estimators = [int(x) for x in np.linspace(self.start_tree, self.stop_tree , self.num_tree)]
        # Number of features to consider at every split
        max_features = ['auto', 'log2', None]
        # Maximum number of levels in tree
        Max_depth = [int(x) for x in np.linspace(self.min_depth , self.max_depth , self.num_depth )]
        Max_depth.append(None)

        random_grid = {'n_estimators': n_estimators,
                       'max_features': max_features,
                        'min_samples_split': min_samples_split,
                      'min_samples_leaf': min_samples_leaf,
                       'max_depth' : Max_depth,
                      'bootstrap':self.bootstrap}


        return random_grid


    def RF_hyper_parameter_tuning(self,x,y):
    
        if norm_condition == 'True':
            scaler1 = StandardScaler()
            x = scaler1.fit_transform(x)

        rf = RandomForestClassifier()
        rf_random = RandomizedSearchCV(estimator = rf, param_distributions = self.RF_hyper_parameter_setup(x), n_iter = self.n_iter, cv = self.cv, verbose=2, random_state=42, n_jobs = -1)
        rf_random.fit(x, y)
        return (rf_random.best_params_)
    
    def SVM_hyperparameter_tuning(self,x,y):
        
        if norm_condition == 'True':
            scaler1 = StandardScaler()
            x = scaler1.fit_transform(x)
        
        param_grid_svm = {'C': list_C, 
              'gamma': list_gamma,
              'kernel': ['rbf']}
        
        
        clf_random = GridSearchCV(SVC(),param_grid_svm,refit=True,verbose=2)

        clf_random.fit(x, y)
        return (clf_random.best_params_)
    

    
    def LOOV(self,x,y):
        
        if norm_condition == 'True':
            scaler1 = StandardScaler()
            x = scaler1.fit_transform(x)
        
        lpo = LeavePOut(p=1)
        train_ = []
        test_ = []
        Y_train = []
        Y_test = []

        for train, test in lpo.split(x):
            train_.append(x[train])
            test_.append(x[test])

        for train, test in lpo.split(y):
            Y_train.append(y[train])
            Y_test.append(y[test])
            
        return train_, test_, Y_train, Y_test

    
    

    

# Classifier_training

- best_param: set of optimal hyper parameters: this can be achived using the Classifier_hypertuning class. 
- model_name: should be chosen from 'rf', 'svc' or 'gdb'.
- MC_iteration: For noise propagation of input values the number of iterations of Monte Carlo apporach should be selected (integer).

In [None]:
class Classifier_training:
    
    def __init__(self, best_param, model_name, MC_iteration):
        self.best_param = best_param
        self.model_name = model_name
        self.MC_iteration = MC_iteration
    
    def train_model(self,train_, test_, Y_train, Y_test):
        
        params = self.best_param
        if self.model_name == 'rf':
            
            model = RandomForestClassifier(**params)
        elif self.model_name == 'svc':
            
            model = OutputCodeClassifier(svm.SVC(**params),code_size=20)
        elif self.model_name == 'gdb':
            
            model = OneVsRestClassifier(GradientBoostingClassifier(max_depth = 5, min_samples_split = 5))


            
        pred_model = []

        for i in range(len(train_)):
                
                model.fit(train_[i], Y_train[i])
    
                pred_model.append(model.predict(test_[i]))
        
       
    
        return pred_model
    
        
    def train_model_added_noise(self,train_, test_, Y_train, Y_test):
        
        params = self.best_param
        if self.model_name == 'rf':
            
            model = RandomForestClassifier(**params)
        elif self.model_name == 'svc':
            
            model = OutputCodeClassifier(svm.SVC(**params),code_size=20)
            
        elif self.model_name == 'gdb':
            
            model = OneVsRestClassifier(GradientBoostingClassifier(max_depth = 5, min_samples_split = 5))


       
        pred_model_noise = []
        for i in range(len(train_)):
            for j in range(self.MC_iteration):
    
                model.fit((train_[i]+ np.random.normal(.01,0)), Y_train[i])
    
                pred_model_noise.append(model.predict(test_[i]))
        
        
    
        return pred_model_noise
        

        


# Example of Training

The following cell is an example of trainig for support vector calssifier. At first, "Classifier_hypertuning" is called. Then the "best" parameters are caclulated (model.SVM_hyperparameter_tuning(x,y1)). By running model_tune.LOOV(x,y1), the training and test data sets are generated. The classifer is trained by calling the  Classifier_training class

In [None]:
model_tune =  Classifier_hypertuning(min_samples_split,min_samples_leaf,bootstrap,start_tree,stop_tree,num_tree,min_depth,max_depth,num_depth,n_iter,cv,norm_condition, list_C, list_gamma)
best_parameter_svm = model.SVM_hyperparameter_tuning(x,y1)
train_, test_ ,Y_train, Y_test = model_tune.LOOV(x,y1)
model_tune =  Classifier_training(best_parameter_svm, 'svc', 500)
result = model_tune.train_model(train_, test_ ,Y_train, Y_test)

# Evaluation 


In [None]:
from sklearn.metrics import accuracy_score
print("The accuracy of classifier is:", np.round(accuracy_score(y1, result),2))

from sklearn.metrics import classification_report

target_names = ['N', 'FC', 'FW', 'RC','RW','LC','LW']
print(classification_report(y1, result, target_names=target_names))