In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sklearn.metrics as mt

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from sklearn.ensemble import RandomForestClassifier
from boruta import BorutaPy
from sklearn.metrics import roc_auc_score


In [2]:
curveList = [[103.9, 209.3], [316.6, 399.6], [425.3, 517.9], [590.5, 756.9], [1048.7, 1110.5], [1212.3, 1437.1]]
def generate_data(label):
    df_label = []
    f_1 = 'beginner_expert_processedData/'+label+'/'+label+'_'
    f_3 = '.csv'
    num_label = 19

    df_concat = pd.DataFrame()
    for curve_num in range(0,6):
        for idx in range(1, num_label+1):
            tmp_file = f_1+str(idx)+'_new2'+f_3
            df = pd.read_csv(tmp_file)
            df = df.dropna()
            tmp = df.astype(float)
            tmp['level'] =0

            tmpcorner = tmp[(tmp['Distance'] >= curveList[curve_num][0]) & (tmp['Distance'] <= curveList[curve_num][1])]

            df_label.append(tmpcorner)
            df_concat = pd.concat([df_concat,df_label[idx-1]])

        df_label = []
        df_concat.to_csv('cornerData/corner1_'+str(curve_num+1)+'_'+label+'.csv')
        df_concat = pd.DataFrame()


In [3]:
def load_data(left_column,corner_number):
    if corner_number=='all':
        df_all = pd.DataFrame()
        for corner_number in range(1,7):
            df_tmp_begin = pd.read_csv('cornerData/corner_'+str(corner_number)+'_begin.csv')
            df_tmp_exp   = pd.read_csv('cornerData/corner_'+str(corner_number)+'_expert.csv')
            df_curve1 = pd.concat([df_tmp_begin, df_tmp_exp])
            df_all = pd.concat([df_all,df_curve1])
        df_curve1 = df_all
    else:
        df_tmp_begin = pd.read_csv('cornerData/corner_'+corner_number+'_begin.csv')
        df_tmp_exp   = pd.read_csv('cornerData/corner_'+corner_number+'_expert.csv')
        df_curve1 = pd.concat([df_tmp_begin, df_tmp_exp])

    df_curve1= df_curve1.loc[:,left_column]
    df_curve1_saved = df_curve1.loc[:,left_column]
    y = df_curve1.pop('level')
    X = df_curve1
    return X,y

In [4]:
def processing_data(X,y):
    y = np.array(y)
    X = np.array(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)

    sc = StandardScaler()
    sc.fit(X_train)

    X_train = sc.transform(X_train)
    X_test = sc.transform(X_test)
    
    return X_train, X_test, y_train, y_test

In [5]:
def evaluate_model(X_train,X_test,y_train,y_test,kernel='rbf', C=1, gamma=0.01):
    svm_model = SVC(kernel=kernel, C=C, gamma=gamma)
    svm_model.fit(X_train, y_train)
    y_pred = svm_model.predict(X_test)
    print(svm_model.score(X_train,y_train))
    print(svm_model.score(X_test,y_test))

    scoreSet = cross_val_score(svm_model, X_test, y_test, cv=5)
    print("\n\rUsing model_selection library    ")
    print("score set : ",end="")
    print(scoreSet)
    return y_test,y_pred

In [6]:
def confusion_matrix(y_test,y_pred):
    actual_result = y_test
    predicted_result = y_pred
    # 정확도
    accuracy = mt.accuracy_score(actual_result, predicted_result)
    # 오차행렬
    matrix = mt.confusion_matrix(actual_result, predicted_result)
    # 정밀도
    precision = mt.precision_score(actual_result, predicted_result)
    # 재현율
    recall = mt.recall_score(actual_result, predicted_result)
    # f1 score
    f1score = mt.f1_score(actual_result, predicted_result)
    # 전체보고서
    scores = mt.classification_report(actual_result, predicted_result)
    # 모두 출력
    print("정확도: ", accuracy, "\n")
    print("오차행렬\n", matrix, "\n")
    print("정밀도: ", precision, "\n")
    print("재현율: ", round(recall, 3), "\n")
    print("F1스코어: ", f1score, "\n")
    print("결과\n", scores)

In [7]:
def draw_ROC(y_test,y_pred):
    fpr, tpr, thresholds = roc_curve(y_test, y_pred)
    plt.figure(figsize=(6,4))
    plt.plot(fpr, tpr, linewidth=2)
    plt.plot([0,1], [0,1], 'k--' )
    plt.rcParams['font.size'] = 12
    plt.title('ROC curve for Predicting a Pulsar Star classifier')
    plt.xlabel('False Positive Rate (1 - Specificity)')
    plt.ylabel('True Positive Rate (Sensitivity)')
    plt.show()

    # compute ROC AUC
    ROC_AUC = roc_auc_score(y_test, y_pred)
    print('ROC AUC : {:.4f}'.format(ROC_AUC))

In [8]:
def grid_searching(X_train,y_train):
    svc=SVC() 
    # declare parameters for hyperparameter tuning
    parameters = [ #{'C':[0.01,0.1,1, 10, 100], 'kernel':['linear']},
                   {'C':[0.01,0.1,1, 10, 100], 'kernel':['rbf'], 'gamma':[0.001,0.01,0.1, 0.3,0.4,0.5,0.6,0.8, 1,10]},
                  # {'C':[0.01,0.1,1, 10, 100], 'kernel':['poly'], 'degree': [2,3,4] ,'gamma':[0.01,0.1,1,10]} 
                  ]
    grid_search = GridSearchCV(estimator = svc,  
                               param_grid = parameters,
                               scoring = 'accuracy',
                               cv = 5,
                               verbose=0)
    grid_search.fit(X_train, y_train)
    # best score achieved during the GridSearchCV
    print('GridSearch CV best score : {:.4f}\n\n'.format(grid_search.best_score_))
    # print parameters that give the best results
    print('Parameters that give the best results :','\n\n', (grid_search.best_params_))
    # print estimator that was chosen by the GridSearch
    print('\n\nEstimator that was chosen by the search :','\n\n', (grid_search.best_estimator_))

In [9]:
all_column = [
#'Time',
#    'Distance',
    'GPS Latitude','GPS Longitude','Damper Velocity (Calc) FL','Damper Velocity (Calc) FR','Damper Velocity (Calc) RL',
'Damper Velocity (Calc) RR','Corr Dist','Corr Dist (Unstretched)','Corr Speed','Brake Pos',
'CG Accel Lateral','CG Accel Longitudinal','CG Accel Vertical','CG Height','Camber FL','Camber FR','Camber RL','Camber RR','Car Coord X',
'Car Coord Y','Car Coord Z','Car Pos Norm','Chassis Pitch Angle','Chassis Pitch Rate','Chassis Roll Angle','Chassis Roll Rate',
'Chassis Velocity X','Chassis Velocity Y','Chassis Velocity Z','Chassis Yaw Rate','Drive Train Speed','Engine RPM','Ground Speed',
'Ride Height FL','Ride Height FR','Ride Height RL','Ride Height RR','Road Temp','Self Align Torque FL','Self Align Torque FR',
'Self Align Torque RL','Self Align Torque RR','Session Time Left','Steering Angle','Suspension Travel FL','Suspension Travel FR',
'Suspension Travel RL','Suspension Travel RR','Tire Load FL','Tire Load FR','Tire Load RL','Tire Load RR','Tire Loaded Radius FL',
'Tire Loaded Radius FR','Tire Loaded Radius RL','Tire Loaded Radius RR','Tire Pressure FL','Tire Pressure FR','Tire Pressure RL','Tire Pressure RR',
'Tire Rubber Grip FL','Tire Rubber Grip FR','Tire Rubber Grip RL','Tire Rubber Grip RR','Tire Slip Angle FL','Tire Slip Angle FR',
'Tire Slip Angle RL','Tire Slip Angle RR','Tire Slip Ratio FL','Tire Slip Ratio FR','Tire Slip Ratio RL','Tire Slip Ratio RR',
'Tire Temp Core FL','Tire Temp Core FR','Tire Temp Core RL','Tire Temp Core RR','Tire Temp Inner FL','Tire Temp Inner FR',
'Tire Temp Inner RL','Tire Temp Inner RR','Tire Temp Middle FL','Tire Temp Middle FR','Tire Temp Middle RL',
'Tire Temp Middle RR','Tire Temp Outer FL','Tire Temp Outer FR','Tire Temp Outer RL','Tire Temp Outer RR','Toe In FL',
'Toe In FR','Toe In RL','Toe In RR','Wheel Angular Speed FL','Wheel Angular Speed FR','Wheel Angular Speed RL','Wheel Angular Speed RR',
'CG Distance','Lateral Velocity','Longitudinal Velocity','Lateral Acceleration','Longitudinal Acceleration','level']

selected_column = ['Brake Pos', 'Ground Speed', 'Steering Angle', 'Throttle Pos', 'Chassis Yaw Rate', 'Chassis Velocity X',
                   'Chassis Velocity Y','Chassis Velocity Z','Lateral Velocity','Longitudinal Velocity','Lateral Acceleration','Longitudinal Acceleration',
                   'CG Distance','level']

def run_experiment(column, corner):
    X,y = load_data(column,corner)
    X_train, X_test, y_train, y_test = processing_data(X,y)
    y_test,y_pred = evaluate_model(X_train, X_test, y_train, y_test,'linear',1,0.01)
    confusion_matrix(y_test,y_pred)
    draw_ROC(y_test,y_pred)

def finding_hyperparameter(column,corner):
    X,y = load_data(column,corner)
    X_train, X_test, y_train, y_test = processing_data(X,y)
    grid_searching(X_train,y_train)

In [None]:
###for loop for all experiments 
columns = {'all':all_column, 'select':selected_column}
corners = ['1','2','3','4','5','6','all']

for key,value in columns.items():
    print('******************************'+key+'*****************************')
    for corner in corners:
        print('************'+corner+'***************')
        run_experiment(value, corner)

In [10]:
finding_hyperparameter(selected_column, 'all')

GridSearch CV best score : 0.9036


Parameters that give the best results : 

 {'C': 10, 'gamma': 0.8, 'kernel': 'rbf'}


Estimator that was chosen by the search : 

 SVC(C=10, gamma=0.8)


In [None]:
GridSearch CV best score : 0.9203


Parameters that give the best results : 

 {'C': 10, 'gamma': 0.3, 'kernel': 'rbf'}


Estimator that was chosen by the search : 

 SVC(C=10, gamma=0.3)


finding_hyperparameter(selected_column, '2')
GridSearch CV best score : 0.9147
Parameters that give the best results : 

 {'C': 10, 'gamma': 0.4, 'kernel': 'rbf'}
Estimator that was chosen by the search : 

 SVC(C=10, gamma=0.4)



finding_hyperparameter(selected_column, 'all')
GridSearch CV best score : 0.9036


Parameters that give the best results : 

 {'C': 10, 'gamma': 0.8, 'kernel': 'rbf'}


Estimator that was chosen by the search : 

 SVC(C=10, gamma=0.8)