In [10]:
import pandas as pd
import numpy as np

from sklearn import preprocessing

from sklearn import svm

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV

import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions

In [11]:
df = pd.read_csv('../mushrooms.csv')

# One Hot Encoding - only chosen features

In [15]:
x = pd.get_dummies(df.drop(['habitat','population','spore-print-color','stalk-root','stalk-surface-above-ring',
                 'stalk-surface-below-ring','stalk-color-above-ring','stalk-color-below-ring','veil-type',
                'class','gill-attachment','gill-spacing','gill-size','odor','ring-type','ring-number',
                            'stalk-shape','gill-color'],axis=1))
a = pd.get_dummies(df['class'])
y = a.drop(['p'],axis=1).to_numpy().ravel()

# Split Train - Test 

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

# Looking for the best parameters 

In [7]:
%%time

params = {"kernel": ['linear', 'poly', 'rbf', 'sigmoid'],
         'C':[0.1,1,10],
         'gamma':[3,4,5]}

clf = svm.SVC()

clf_classifier_search = GridSearchCV(clf, params, cv=5,n_jobs=4)
clf_classifier_search.fit(x_train, y_train)
clf_classifier_search.best_params_

CPU times: user 953 ms, sys: 39.8 ms, total: 992 ms
Wall time: 1min 5s


{'C': 0.1, 'gamma': 3, 'kernel': 'poly'}

In [8]:
%%time

best_svm_classifier = \
    svm.SVC(kernel=clf_classifier_search.best_params_["kernel"])

best_svm_classifier.fit(x_train, y_train)
print(best_svm_classifier.score(x_test, y_test))
y_pred = best_svm_classifier.predict(x_test)
print(y_pred)
print(accuracy_score(y_test,y_pred))

0.9138381201044387
[0 0 1 ... 1 1 1]
0.9138381201044387
CPU times: user 554 ms, sys: 15.6 ms, total: 569 ms
Wall time: 568 ms


In [9]:
from joblib import dump, load
dump(best_svm_classifier, 'model.joblib')

['model.joblib']

In [16]:
print(x.columns)

Index(['cap-shape_b', 'cap-shape_c', 'cap-shape_f', 'cap-shape_k',
       'cap-shape_s', 'cap-shape_x', 'cap-surface_f', 'cap-surface_g',
       'cap-surface_s', 'cap-surface_y', 'cap-color_b', 'cap-color_c',
       'cap-color_e', 'cap-color_g', 'cap-color_n', 'cap-color_p',
       'cap-color_r', 'cap-color_u', 'cap-color_w', 'cap-color_y', 'bruises_f',
       'bruises_t', 'veil-color_n', 'veil-color_o', 'veil-color_w',
       'veil-color_y'],
      dtype='object')


Index(['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'veil-color'], dtype='object')