In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,f1_score,roc_auc_score


In [2]:
def model6(x_train,y_train,x_val,y_val,x_test,y_test):
    svc=SVC()
    svc.fit(x_train,y_train)                                                  #fitting the training data into the model
    y_test_pred=svc.predict(x_test)                                           #predicting the test data
    print("Model-6: Support Vector Machine ")
    acc_t=round(accuracy_score(y_test_pred,y_test),4)*100                     #accuracy of the predicting model
    print("Accuracy of Model 6 on test set before tuning: ",acc_t)
    pre=round(precision_score(y_test_pred,y_test,average='macro'),4)*100      #precision of the predicting model
    print("Precision of Model 6 on test set before tuning: ",pre)
    rec=round(recall_score(y_test_pred,y_test,average='macro'),4)*100         #recall_Score of the model
    print("Recall of Model 6 on test set before tuning: ",rec)
    f_score=round(f1_score(y_test_pred,y_test,average='macro'),4)*100         #f1_score of the model
    print("F1-score of Model-6 on test set before tuning: ",f_score)
    con_mat=confusion_matrix(y_test,y_test_pred,labels=[1,2,3,4,5,6,7,8])     #confusion matrix of the model
    print("Confusion Matrix of Model-6 on test set before tuning: ")
    print(con_mat)
    print("---------------------------------------------------------------------------------------------------")
    print("Tuning the hyperparameter using validation set")
    C=[1,10,100,1000]                   #first hyper-parameter list containing different regularization value
    gamma=[0.001,0.01,0.1,1,10]                #second hyper-parameter list containg different gamma values
    poly=[2,3]                          #third hyper-parameter list containing different degree values of kernel 'poly'
    feature=[]                        #list that will contain different combination of hyperparameter
    a=[]                              #list will contain different accuracy by combining different parameters
    for i in C:
        for j in gamma:
            for k in poly:
                svc=SVC(C=i,gamma=j,degree=k,kernel='poly')
                svc.fit(x_train,y_train)
                y_val_pred=svc.predict(x_val)
                acc=round(accuracy_score(y_val_pred,y_val),4)*100
                feature.append((i,j,k))
                a.append(acc)
    m=max(a)                        #getting the maximum accuracy
    p=a.index(m)                    #getting the index of max accuracy to obtain the best hyperparameters
    svc=SVC(C=feature[p][0],gamma=feature[p][1],degree=feature[p][2],kernel='poly')
    x_train=np.concatenate((x_train,x_val))  #Now combining both training and validation data
    y_train=np.concatenate((y_train,y_val))  #Now combing the target values of training and validation data
    svc.fit(x_train,y_train)        #fitting the training data into the model
    y_test_pred=svc.predict(x_test) #predicting the test data
    print("Model-6: Support Vector Machine ")
    acc_t=round(accuracy_score(y_test_pred,y_test),4)*100                    #accuracy of the predicting model
    print("Accuracy of Model 6 on test set after tuning: ",acc_t)
    pre=round(precision_score(y_test_pred,y_test,average='macro'),4)*100     #precision of the predicting model
    print("Precision of Model 6 on test set after tuning: ",pre)
    rec=round(recall_score(y_test_pred,y_test,average='macro'),4)*100        #recall_Score of the model
    print("Recall of Model 6 on test set after tuning: ",rec)
    f_score=round(f1_score(y_test_pred,y_test,average='macro'),4)*100        #f1_score of the model
    print("F1-score of Model-6 on test set after tuning: ",f_score)
    con_mat=confusion_matrix(y_test,y_test_pred,labels=[1,2,3,4,5,6,7,8])    #confusion matrix of the model
    print("Confusion Matrix of Model-6 on test set after tuning: ")
    print(con_mat)
    

In [3]:
if __name__ == '__main__':
    train=pd.read_excel('cTTD_features_with_Labels/S2/trainset_60.xls')  #reading the xls file into dataframe
    validate=pd.read_excel('cTTD_features_with_Labels/S2/validate_20.xls')
    test=pd.read_excel('cTTD_features_with_Labels/S2/testset_20.xls')
    
    x_tr=train.drop(43,axis=1)    #separating the target values
    y_tr=train[43]
    x_v=validate.drop(43,axis=1)
    y_v=validate[43]
    x_te=test.drop(43,axis=1)
    y_te=test[43]
    
    x_train=x_tr.to_numpy()        # converting dataframe to numpy array
    y_train=y_tr.to_numpy()
    x_val=x_v.to_numpy()
    y_val=y_v.to_numpy()
    x_test=x_te.to_numpy()
    y_test=y_te.to_numpy()
    
    model6(x_train,y_train,x_val,y_val,x_test,y_test)
    print("==========================================================================================================")
    




Model-6: Support Vector Machine 
Accuracy of Model 6 on test set before tuning:  90.48
Precision of Model 6 on test set before tuning:  90.57
Recall of Model 6 on test set before tuning:  90.51
F1-score of Model-6 on test set before tuning:  90.48
Confusion Matrix of Model-6 on test set before tuning: 
[[869   0   7   2   1   0   0   0]
 [  0 880   0   3   0   4   1   0]
 [  1   0 846  20   8   3   0   0]
 [  0   1  41 641  15  23 150  26]
 [  0   3  13  10 796  47   5   0]
 [  0  17  13  32  38 743  40   0]
 [  0   0   1  56  10  59 772   0]
 [  0   0   0  20   0   0   0 823]]
---------------------------------------------------------------------------------------------------
Tuning the hyperparameter using validation set
Model-6: Support Vector Machine 
Accuracy of Model 6 on test set after tuning:  98.78
Precision of Model 6 on test set after tuning:  98.8
Recall of Model 6 on test set after tuning:  98.8
F1-score of Model-6 on test set after tuning:  98.8
Confusion Matrix of Model-6

In the above code,the SVM classifier is done.The accuracy on the test set was 91 percent and after tuning the hyperparameters of  the SVC,the accuracy rises to 98.78 percent.The precision before tuning the hyperparameter is 91.69 and after tuning it become 98.8 percent.Similarly recall before tuning was 91.72 percent and after tuning it become 98.8 and F1 score after tuning increases by 6.13 percent.In my opinion both accuracy and F1 score can be better option for metric evaluation.

Summary
 1. This is the total analysis done using SVM classifier.
 2. First ,datasets was converted into numpy array so that it can be easily put in the model.
 3. Then we normally train the model without tuning any of its hyperparameter and then test it on test dataset and observe the       metrics 
 4. After that we tuned the hyperparameters by varying with different values and checked it on validation set and after gettig       the best hyperparameters and after that we combine both train and validate and train as a whole.
 5. After that we checked the accuracy,precision,recall,confusion-matrix as well as F1 score of the testing data with predicted     value
 