In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.svm import SVC

In [2]:
df = pd.read_csv('data/mushrooms.csv')
df.columns

Index(['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
       'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
       'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-color-above-ring',
       'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'spore-print-color', 'population', 'habitat'],
      dtype='object')

In [13]:
feats = ['cap-shape' , 
         'cap-surface' ,  
         'cap-color', 
         'ring-number',
         'stalk-shape',
         'habitat'
        ]
X = pd.get_dummies(df[feats])
y = df['class']
y.shape, X.shape

((8124,), (8124, 32))

In [14]:
x_train, x_test, y_train,y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
x_train.shape, y_train.shape

((6499, 32), (6499,))

### Base model

In [15]:
model = SVC().fit(x_train, y_train)
model

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [16]:
model.score(x_train, y_train)

0.9399907678104323

In [17]:
y_pred = model.predict(x_test)
accuracy_score(y_test, y_pred)

0.9372307692307692

### GridSearch

In [43]:
params = {"kernel": ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
         'C':[0.1,0.1,3],
         'gamma':[1,1,10],
         'degree': [1,1,10],
         'decision_function_shape' : ['ovo', 'ovr']
         }

model = SVC()

model_search = GridSearchCV(model, params, cv=5,n_jobs=2).fit(x_train, y_train)
best_params = model_search.best_params_
best_params

{'C': 0.1,
 'decision_function_shape': 'ovo',
 'degree': 4,
 'gamma': 1,
 'kernel': 'poly'}

In [44]:
best_model = model_search.best_estimator_
best_model

SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovo', degree=4, gamma=1, kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [45]:
best_model.score(x_train, y_train)

0.8318202800430835

In [46]:
y_pred = best_model.predict(x_test)
accuracy_score(y_test, y_pred)

0.8313846153846154