### SVM with Linear Kernel
 

In [5]:
import numpy as np
import pandas as pd

Here I will use the scaled dataset

In [6]:
data = pd.read_csv('scaleddata.csv')
data

Unnamed: 0,X1,X2,y
0,-1.529905,1.524954,1
1,-1.285864,1.309072,1
2,-1.675753,1.421765,1
3,-1.467577,1.351920,1
4,-1.529905,1.524954,1
...,...,...,...
2220,1.243449,1.431132,0
2221,1.384919,1.207115,0
2222,1.357108,1.224123,0
2223,1.210627,1.385059,0


In [7]:
X = data.drop('y', 1)
y = data['y']

In [8]:
# splitting the dataset into the training and test set

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [9]:
from sklearn.svm import SVC
svc = SVC(kernel='linear')

In [10]:
svc.fit(X_train,y_train)

SVC(kernel='linear')

In [11]:
y_pred = svc.predict(X_test)

In [12]:
from sklearn.metrics import *

In [14]:
confusion_matrix(y_test,y_pred)

array([[250,   0],
       [115,  80]], dtype=int64)

In [13]:
accuracy_score(y_test,y_pred)

0.7415730337078652

### Hyper Parameter Tuning

In [15]:
paralist = {
    'C': [0.01, 0.1, 1, 10, 100], 
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel' : ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree':[2,3,4,5,6],
    'tol':[1e-2,1e-3,1e-4,1e-5,1e-6],
    'shrinking' : [True,False],
}

In [16]:
from sklearn.model_selection import RandomizedSearchCV
randsearch = RandomizedSearchCV(estimator=svc,param_distributions=paralist,cv=5,n_jobs=-1,verbose=1)

In [17]:
randsearch.fit(X_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    6.4s finished


RandomizedSearchCV(cv=5, estimator=SVC(kernel='linear'), n_jobs=-1,
                   param_distributions={'C': [0.01, 0.1, 1, 10, 100],
                                        'degree': [2, 3, 4, 5, 6],
                                        'gamma': [1, 0.1, 0.01, 0.001],
                                        'kernel': ['linear', 'poly', 'rbf',
                                                   'sigmoid'],
                                        'shrinking': [True, False],
                                        'tol': [0.01, 0.001, 0.0001, 1e-05,
                                                1e-06]},
                   verbose=1)

In [19]:
randsearch.best_params_

{'tol': 0.0001,
 'shrinking': False,
 'kernel': 'poly',
 'gamma': 1,
 'degree': 6,
 'C': 100}

In [20]:
new_svc = randsearch.best_estimator_
new_svc

SVC(C=100, degree=6, gamma=1, kernel='poly', shrinking=False, tol=0.0001)

In [22]:
ny_pred = new_svc.predict(X_test)

In [25]:
confusion_matrix(y_test,ny_pred)

array([[233,  17],
       [ 10, 185]], dtype=int64)

In [26]:
accuracy_score(y_test,ny_pred)

0.9393258426966292