## Support vector machine
### Aravind Dudam
### U04440266


In [1]:
# Import Libraries

In [2]:
import pandas as pd
from sklearn.svm import SVC
from matplotlib import pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report  
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

np.random.seed(1)

In [3]:
# Loading data

In [4]:
df = pd.read_csv("E:/RidingMowers.csv") # let's use the same data as we did in the logistic regression example
df.head(5)

Unnamed: 0,Income,Lot_Size,Ownership
0,60.0,18.4,Owner
1,85.5,16.8,Owner
2,64.8,21.6,Owner
3,61.5,20.8,Owner
4,87.0,23.6,Owner


In [5]:
# Get the number of rows, columns, all elements

In [6]:
df.shape

(24, 3)

In [7]:
# label_encoding

label_encoder = preprocessing.LabelEncoder()

df['Ownership']= label_encoder.fit_transform(df['Ownership'])
  
df

Unnamed: 0,Income,Lot_Size,Ownership
0,60.0,18.4,1
1,85.5,16.8,1
2,64.8,21.6,1
3,61.5,20.8,1
4,87.0,23.6,1
5,110.1,19.2,1
6,108.0,17.6,1
7,82.8,22.4,1
8,69.0,20.0,1
9,93.0,20.8,1


In [8]:
# Splitting of dependent variable and independent variable

In [9]:
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

In [10]:
# using the train test split function

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y , 
                                   test_size=0.30, 
                                   )

In [12]:
# Model the data

In [13]:
performance = pd.DataFrame({"model": [], "Accuracy": [], "Precision": [], "Recall": [], "F1": []})

###  Fit a SVM classification model using linear kernal

In [14]:
svm_lin_model = SVC(kernel="linear" , probability=True)
_ = svm_lin_model.fit(X_train, np.ravel(y_train))

In [15]:
model_preds = svm_lin_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"linear svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])
print(performance)

        model  Accuracy  Precision  Recall   F1
0  linear svm       1.0        1.0     1.0  1.0


###  Fit a SVM classification model using rbf kernal

In [16]:
svm_rbf_model = SVC(kernel="rbf", C=10, gamma='scale', probability=True)
_ = svm_rbf_model.fit(X_train, np.ravel(y_train))

In [17]:
model_preds = svm_rbf_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"rbf svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])
print(performance)

        model  Accuracy  Precision    Recall        F1
0  linear svm      1.00   1.000000  1.000000  1.000000
0     rbf svm      0.75   0.666667  0.666667  0.666667


### Fit a SVM classification model using polynomial kernal

In [18]:
svm_poly_model = SVC(kernel="poly", degree=3, coef0=1, C=10, probability=True)
_ = svm_poly_model.fit(X_train, np.ravel(y_train))

In [19]:
model_preds = svm_poly_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"poly svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])
print(performance)

        model  Accuracy  Precision    Recall        F1
0  linear svm     1.000   1.000000  1.000000  1.000000
0     rbf svm     0.750   0.666667  0.666667  0.666667
0    poly svm     0.875   1.000000  0.666667  0.800000


In [20]:
# Summary

In [21]:
performance

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,1.0,1.0,1.0,1.0
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,poly svm,0.875,1.0,0.666667,0.8


In [22]:
#Results of each of these models (accuracy, precision, recall, and F1)

In [23]:
performance.sort_values(by=['Accuracy'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,poly svm,0.875,1.0,0.666667,0.8
0,linear svm,1.0,1.0,1.0,1.0


In [24]:
performance.sort_values(by=['Precision'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,linear svm,1.0,1.0,1.0,1.0
0,poly svm,0.875,1.0,0.666667,0.8


In [25]:
performance.sort_values(by=['Recall'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,poly svm,0.875,1.0,0.666667,0.8
0,linear svm,1.0,1.0,1.0,1.0


In [26]:
performance.sort_values(by=['F1'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,poly svm,0.875,1.0,0.666667,0.8
0,linear svm,1.0,1.0,1.0,1.0


### Inference: using the train test split we have split the model into 30% test and 70% train for which we have SVM classification model using linear kernel, RBF kernel, polynomial kernel based on the metric results such as ( accuracy, precision, recall, F1) It can be seen that the linear model is overfitting, by looking the above parameters the polynomical kernel shows the best results so consider this to be the winning model for further prediction.

In [27]:
#Saving winning model to pickle file

import pickle
pickle

<module 'pickle' from 'C:\\Users\\aravi\\anaconda3\\lib\\pickle.py'>

In [28]:
pickle.dump(svm_poly_model, open('C:/USF/DSP/pickle.csv', 'wb'))