Demonstrate the working of SVM classifier for a suitable data set

In [1]:
from sklearn import svm, datasets

In [2]:
iris_data = datasets.load_iris()

In [3]:
type(iris_data)

sklearn.utils.Bunch

In [4]:
import pandas as pd

In [5]:
iris=pd.DataFrame(iris_data.data, columns=iris_data.feature_names)
iris.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [6]:
iris['target']=iris_data.target
iris.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [7]:
iris_data.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [8]:
target_names={i: j for i, j in enumerate(iris_data.target_names)}

In [9]:
target_names

{0: 'setosa', 1: 'versicolor', 2: 'virginica'}

In [10]:
iris.target=iris.target.map(target_names)

In [11]:
iris.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [12]:
from sklearn.model_selection import train_test_split

In [13]:
x=iris.drop(['target'], axis=1)
y=iris.target

In [14]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)

In [15]:
model = svm.SVC(kernel='rbf', C=1)

In [16]:
model.fit(x_train, y_train)

SVC(C=1)

In [17]:
model.score(x_test, y_test)

0.9736842105263158

In [18]:
model2 = svm.SVC(kernel='linear', C=1)

In [19]:
model2.fit(x_train, y_train)

SVC(C=1, kernel='linear')

In [20]:
model2.score(x_test, y_test)

0.9736842105263158

<h3>Grid Search</h3>
<p>params:<br>
    C: 1,10,20<br>
    kernel: linear, rbf</p>

In [21]:
from sklearn.model_selection import GridSearchCV

In [22]:
svm_grid = svm.SVC()

In [23]:
grid_search=GridSearchCV(svm_grid,{
    'C':[1, 10, 20],
    'kernel':['linear', 'rbf']
}, cv=5, return_train_score=False)

In [24]:
grid_search.fit(x_train, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [1, 10, 20], 'kernel': ['linear', 'rbf']})

In [25]:
grid_search.cv

5

In [26]:
grid_search.best_estimator_, grid_search.best_params_, grid_search.best_score_

(SVC(C=1, kernel='linear'), {'C': 1, 'kernel': 'linear'}, 0.9731225296442687)

In [27]:
df = pd.DataFrame(grid_search.cv_results_)

In [28]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.004116,0.003523,0.001096,0.00098,1,linear,"{'C': 1, 'kernel': 'linear'}",1.0,0.956522,1.0,0.954545,0.954545,0.973123,0.021957,1
1,0.003605,0.002947,0.0,0.0,1,rbf,"{'C': 1, 'kernel': 'rbf'}",1.0,0.956522,1.0,0.909091,0.954545,0.964032,0.033918,5
2,0.001642,0.002037,0.001573,0.001346,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,1.0,0.909091,0.954545,0.972727,0.036364,3
3,0.002427,0.002005,0.001938,0.002236,10,rbf,"{'C': 10, 'kernel': 'rbf'}",1.0,0.956522,1.0,0.954545,0.954545,0.973123,0.021957,1
4,0.002255,0.003225,0.004227,0.002873,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,0.956522,1.0,0.909091,0.954545,0.964032,0.033918,5
5,0.003201,0.003257,0.003755,0.003373,20,rbf,"{'C': 20, 'kernel': 'rbf'}",1.0,0.913043,1.0,0.954545,0.954545,0.964427,0.032761,4


In [29]:
df[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.973123
1,1,rbf,0.964032
2,10,linear,0.972727
3,10,rbf,0.973123
4,20,linear,0.964032
5,20,rbf,0.964427


### Randamised search cv

In [30]:
from sklearn.model_selection import RandomizedSearchCV

In [31]:
rand_cv = RandomizedSearchCV(svm_grid, {
    'C':[1,10,20],
    'kernel':['linear', 'rbf']
}, cv=5, n_iter=2, return_train_score=False)

In [32]:
rand_cv.fit(x_train, y_train)

RandomizedSearchCV(cv=5, estimator=SVC(), n_iter=2,
                   param_distributions={'C': [1, 10, 20],
                                        'kernel': ['linear', 'rbf']})

In [33]:
rand_cv_df = pd.DataFrame(rand_cv.cv_results_)

In [34]:
rand_cv_df[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,20,linear,0.964032
1,10,linear,0.972727


In [35]:
rand_cv.best_score_, rand_cv.best_estimator_, rand_cv.best_params_

(0.9727272727272727, SVC(C=10, kernel='linear'), {'kernel': 'linear', 'C': 10})