In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,f1_score,roc_auc_score


In [2]:
def model7(x_train,y_train,x_val,y_val,x_test,y_test):
    sgd=SGDClassifier()
    sgd.fit(x_train,y_train)        #fitting the training data into the model
    y_test_pred=sgd.predict(x_test) #predicting the test data
    print("Model-7: SGD Method ")
    acc_t=round(accuracy_score(y_test_pred,y_test),4)*100         #accuracy of the predicting model
    print("Accuracy of Model 7 on test set before tuning: ",acc_t)
    pre=round(precision_score(y_test_pred,y_test,average='macro'),4)*100    #precision of the predicting model
    print("Precision of Model 7 on test set before tuning : ",pre)
    rec=round(recall_score(y_test_pred,y_test,average='macro'),4)*100        #recall_Score of the model
    print("Recall of Model 7 on test set before tuning: ",rec)
    f_score=round(f1_score(y_test_pred,y_test,average='macro'),4)*100        #f1_score of the model
    print("F1-score of Model-7 on test set before tuning : ",f_score)
    con_mat=confusion_matrix(y_test,y_test_pred,labels=[1,2,3,4,5,6,7,8])    #confusion matrix of the model
    print("Confusion Matrix of Model-7 on test set before tuning: ")
    print(con_mat)
    print("------------------------------------------------------------------------------------------------------------------")
    print("Tuning the hyperparameters with the help of validation set")
    loss=['squared_hinge','modified_huber','perceptron','log','hinge']       #first hyper-parameter list containing the various method to calculate loss
    alpha=[0.00001,0.0001,0.00000001]                                      #second hyper-parameter list containing different alpha
    learning_rate=['adaptive','optimal','invscaling']                        #third hyper-parameter is the list containing different learning rate
    feature=[]                                                          #list that will contain different combination of hyperparameter
    a=[]                                                                #list will contain different accuracy by combining different parameters
    for i in loss:
        for j in alpha:
            for k in learning_rate:
                    sgd=SGDClassifier(loss=i,alpha=j,learning_rate=k,eta0=1,max_iter=1000000,penalty='l1')
                    sgd.fit(x_train,y_train)
                    y_val_pred=sgd.predict(x_val)
                    acc=round(accuracy_score(y_val_pred,y_val),4)*100
                    feature.append((i,j,k))
                    a.append(acc)
    m=max(a)                        #getting the maximum accuracy
    p=a.index(m)                    #getting the index of max accuracy to obtain the best hyperparameters
    sgd=SGDClassifier(loss=feature[p][0],alpha=feature[p][1],learning_rate=feature[p][2],max_iter=1000000,eta0=1,penalty='l1') #declaring the model with best parameter
    x_train=np.concatenate((x_train,x_val))  #Now combining both training and validation data
    y_train=np.concatenate((y_train,y_val))  #Now combing the target values of training and validation data
    sgd.fit(x_train,y_train)        #fitting the training data into the model
    y_test_pred=sgd.predict(x_test) #predicting the test data
    acc_t=round(accuracy_score(y_test_pred,y_test),4)*100         #accuracy of the predicting model
    print("Accuracy of Model 7 on test set after tuning : ",acc_t)
    pre=round(precision_score(y_test_pred,y_test,average='macro'),4)*100    #precision of the predicting model
    print("Precision of Model 7 on test set after tuning : ",pre)
    rec=round(recall_score(y_test_pred,y_test,average='macro'),4)*100        #recall_Score of the model
    print("Recall of Model 7 on test set after tuning : ",rec)
    f_score=round(f1_score(y_test_pred,y_test,average='macro'),4)*100        #f1_score of the model
    print("F1-score of Model-7 on test set after tuning : ",f_score)
    con_mat=confusion_matrix(y_test,y_test_pred,labels=[1,2,3,4,5,6,7,8])    #confusion matrix of the model
    print("Confusion Matrix of Model-7 on test set after tuning : ")
    print(con_mat)
    
    

In [3]:
if __name__ == '__main__':
    train=pd.read_excel('cTTD_features_with_Labels/S2/trainset_60.xls')  #reading the xls file into dataframe
    validate=pd.read_excel('cTTD_features_with_Labels/S2/validate_20.xls')
    test=pd.read_excel('cTTD_features_with_Labels/S2/testset_20.xls')
    
    x_tr=train.drop(43,axis=1)    #separating the target values
    y_tr=train[43]
    x_v=validate.drop(43,axis=1)
    y_v=validate[43]
    x_te=test.drop(43,axis=1)
    y_te=test[43]
    
    x_train=x_tr.to_numpy()        # converting dataframe to numpy array
    y_train=y_tr.to_numpy()
    x_val=x_v.to_numpy()
    y_val=y_v.to_numpy()
    x_test=x_te.to_numpy()
    y_test=y_te.to_numpy()
    
    model7(x_train,y_train,x_val,y_val,x_test,y_test)
    print("==========================================================================================================")
    


Model-7: SGD Method 
Accuracy of Model 7 on test set before tuning:  89.52
Precision of Model 7 on test set before tuning :  89.64999999999999
Recall of Model 7 on test set before tuning:  90.12
F1-score of Model-7 on test set before tuning :  89.03999999999999
Confusion Matrix of Model-7 on test set before tuning: 
[[876   0   3   0   0   0   0   0]
 [  0 888   0   0   0   0   0   0]
 [  6   1 838   0  16   6   0  11]
 [  0  25  32 485   6  27 220 102]
 [  0   6   7   2 826  30   2   1]
 [  0  51  11   4  40 733  44   0]
 [  0   1   0  25  15  41 814   2]
 [  0   0   1   0   0   0   0 842]]
------------------------------------------------------------------------------------------------------------------
Tuning the hyperparameters with the help of validation set
Accuracy of Model 7 on test set after tuning :  94.52000000000001
Precision of Model 7 on test set after tuning :  94.58
Recall of Model 7 on test set after tuning :  94.53
F1-score of Model-7 on test set after tuning :  94.54


This is the total analysis done using SGD Classifier.The model is properly tuned with the help of validation set.Several parameters were tuned to increase the model effciency.As it can be seen that accuracy before tuning the parameter was 89 percent  and after tuning it jumps to 94.52 percent.The precision of the model also raise to 94.58 percent after tuning the model.From here,I choose F1 score is a right metric as it seeks a balance between precision and recall.Accuracy will be also right as there is much difference between precision and recall

Summary
 1. This is the total analysis done using SGD classifier.
 2. First ,datasets was converted into numpy array so that it can be easily put in the model.
 3. Then we normally train the model without tuning any of its hyperparameter and then test it on test dataset and observe the       metrics 
 4. After that we tuned the hyperparameters by varying with different values and checked it on validation set and after gettig       the best hyperparameters and after that we combine both train and validate and train as a whole.
 5. After that we checked the accuracy,precision,recall,confusion-matrix as well as F1 score of the testing data with predicted     value
 