### SVM Classifier

##### Importing necessary packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn import metrics

%matplotlib inline

##### Reading the data

In [2]:
sn_data = pd.read_csv("./Datasets/Social_Network_Ads.csv")

In [3]:
sn_data

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


##### Separating the independant variables and dependant variables

In [4]:
X = sn_data.iloc[:,1:-1].values   # independant variables/estimators/predictors
y = sn_data.iloc[:,-1].values     # dependant variable

In [5]:
X

array([['Male', 19, 19000],
       ['Male', 35, 20000],
       ['Female', 26, 43000],
       ...,
       ['Female', 50, 20000],
       ['Male', 36, 33000],
       ['Female', 49, 36000]], dtype=object)

In [6]:
y

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,

##### Encoding categorical values

In [7]:
labelencoder = LabelEncoder()
X[:, 0] = labelencoder.fit_transform(X[:, 0])
onehotencoder = OneHotEncoder(categorical_features = [0])
X = onehotencoder.fit_transform(X).toarray()

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [8]:
X

array([[0.0e+00, 1.0e+00, 1.9e+01, 1.9e+04],
       [0.0e+00, 1.0e+00, 3.5e+01, 2.0e+04],
       [1.0e+00, 0.0e+00, 2.6e+01, 4.3e+04],
       ...,
       [1.0e+00, 0.0e+00, 5.0e+01, 2.0e+04],
       [0.0e+00, 1.0e+00, 3.6e+01, 3.3e+04],
       [1.0e+00, 0.0e+00, 4.9e+01, 3.6e+04]])

##### Avoiding the dummy variable trap

In [9]:
X = X[:,1:]
X

array([[1.0e+00, 1.9e+01, 1.9e+04],
       [1.0e+00, 3.5e+01, 2.0e+04],
       [0.0e+00, 2.6e+01, 4.3e+04],
       ...,
       [0.0e+00, 5.0e+01, 2.0e+04],
       [1.0e+00, 3.6e+01, 3.3e+04],
       [0.0e+00, 4.9e+01, 3.6e+04]])

##### Splitting the data into train and test sets

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

In [11]:
print("X_train:");print(X_train)
print("\n\nX_test:");print(X_test)
print("\n\ny_train:");print(y_train)
print("\n\ny_test:");print(y_test)

X_train:
[[0.00e+00 2.70e+01 5.70e+04]
 [1.00e+00 4.60e+01 2.80e+04]
 [0.00e+00 3.90e+01 1.34e+05]
 [0.00e+00 4.40e+01 3.90e+04]
 [0.00e+00 5.70e+01 2.60e+04]
 [1.00e+00 3.20e+01 1.20e+05]
 [1.00e+00 4.10e+01 5.20e+04]
 [1.00e+00 4.80e+01 7.40e+04]
 [1.00e+00 2.60e+01 8.60e+04]
 [1.00e+00 2.20e+01 8.10e+04]
 [1.00e+00 4.90e+01 8.60e+04]
 [0.00e+00 3.60e+01 5.40e+04]
 [1.00e+00 4.00e+01 5.90e+04]
 [0.00e+00 4.10e+01 8.00e+04]
 [1.00e+00 2.60e+01 1.60e+04]
 [0.00e+00 3.90e+01 7.90e+04]
 [1.00e+00 5.90e+01 1.30e+05]
 [1.00e+00 4.20e+01 6.40e+04]
 [0.00e+00 5.30e+01 1.43e+05]
 [1.00e+00 3.40e+01 1.12e+05]
 [0.00e+00 5.70e+01 1.22e+05]
 [0.00e+00 3.90e+01 7.10e+04]
 [1.00e+00 4.70e+01 2.50e+04]
 [1.00e+00 2.40e+01 1.90e+04]
 [0.00e+00 3.60e+01 5.00e+04]
 [0.00e+00 3.20e+01 1.50e+05]
 [0.00e+00 4.80e+01 2.90e+04]
 [1.00e+00 3.00e+01 1.07e+05]
 [1.00e+00 6.00e+01 3.40e+04]
 [1.00e+00 3.80e+01 6.10e+04]
 [1.00e+00 3.30e+01 3.10e+04]
 [1.00e+00 3.90e+01 7.10e+04]
 [1.00e+00 5.50e+01 3.90e+04]
 

##### Feature scaling

In [12]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

##### Modeling our classifier

In [14]:
svm_classifier = SVC(kernel = 'linear', random_state = 42)
svm_classifier.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=42,
    shrinking=True, tol=0.001, verbose=False)

##### Predicting target values

In [16]:
y_pred = svm_classifier.predict(X_test)
y_pred

array([0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0], dtype=int64)

##### Checking the performance of our model

In [17]:
print("Classification Report:");print(metrics.classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.96      0.90        52
           1       0.90      0.68      0.78        28

    accuracy                           0.86        80
   macro avg       0.88      0.82      0.84        80
weighted avg       0.87      0.86      0.86        80



In [18]:
print("Confusion Matrix:");print(metrics.confusion_matrix(y_test, y_pred))

Confusion Matrix:
[[50  2]
 [ 9 19]]


In [19]:
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.8625
