In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC
from collections import Counter

In [2]:
data = pd.read_csv("car_evaluation.csv")
data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,outcome
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [3]:
data.shape

(1728, 7)

In [4]:
X = data.iloc[:,:-1]
y = data.outcome

In [5]:
X.head(3)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,vhigh,vhigh,2,2,small,low
1,vhigh,vhigh,2,2,small,med
2,vhigh,vhigh,2,2,small,high


In [6]:
enc = LabelEncoder()
X.buying = enc.fit_transform(X.buying)
X.maint = enc.fit_transform(X.maint)
X.lug_boot = enc.fit_transform(X.lug_boot)
X.safety = enc.fit_transform(X.safety)
X.head(3)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,3,3,2,2,2,1
1,3,3,2,2,2,2
2,3,3,2,2,2,0


In [7]:
X_train, X_test, y_train, y_test=train_test_split(X,y,random_state=10)

In [8]:
model = SVC(C=2,gamma=1,kernel='rbf')#C=0.1 to 1000, gamma= 0.0001 to 10
model.fit(X_train,y_train)
y_predict = model.predict(X_test)
accuracy_score(y_test,y_predict)

0.9930555555555556

In [9]:
print(Counter(y_test))
pd.crosstab(y_test,y_predict)

Counter({'unacc': 307, 'acc': 84, 'vgood': 22, 'good': 19})


col_0,acc,good,unacc,vgood
outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
acc,83,0,1,0
good,0,19,0,0
unacc,0,0,307,0
vgood,2,0,0,20


### GridSearchCV

In [10]:
from sklearn.model_selection import GridSearchCV

In [11]:
parameters = {'kernel':['rbf'],
                 'C':[1,10,100,500],
                 'gamma': [0.01,0.1,0.5,1.0]}
grid_model = GridSearchCV(SVC(),parameters,verbose=3)
grid_model.fit(X_train,y_train)


Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.696, total=   0.1s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.699, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.699, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.695, total=   0.0s

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s



[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.695, total=   0.1s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.815, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.861, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.795, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.838, total=   0.1s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.822, total=   0.0s
[CV] C=1, gamma=0.5, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.5, kernel=rbf, score=0.942, total=   0.1s
[CV] 

[CV] ....... C=500, gamma=0.01, kernel=rbf, score=0.973, total=   0.1s
[CV] C=500, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=500, gamma=0.01, kernel=rbf, score=0.961, total=   0.1s
[CV] C=500, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=500, gamma=0.1, kernel=rbf, score=0.985, total=   0.1s
[CV] C=500, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=500, gamma=0.1, kernel=rbf, score=0.988, total=   0.1s
[CV] C=500, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=500, gamma=0.1, kernel=rbf, score=0.996, total=   0.1s
[CV] C=500, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=500, gamma=0.1, kernel=rbf, score=0.977, total=   0.1s
[CV] C=500, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=500, gamma=0.1, kernel=rbf, score=0.977, total=   0.1s
[CV] C=500, gamma=0.5, kernel=rbf ....................................
[CV] .

[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    6.0s finished


GridSearchCV(estimator=SVC(),
             param_grid={'C': [1, 10, 100, 500], 'gamma': [0.01, 0.1, 0.5, 1.0],
                         'kernel': ['rbf']},
             verbose=3)

In [12]:
grid_model.best_score_

0.9845678645678646

In [13]:
grid_model.best_params_

{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}

In [None]:
enc = LabelEncoder()
X.battery_power = enc.fit_transform(X.battery_power)
X.blue = enc.fit_transform(X.blue)
X.clock_speed = enc.fit_transform(X.clock_speed)
X.dual_sim = enc.fit_transform(X.dual_sim)
X.fc = enc.fit_transform(X.fc)
X.four_g = enc.fit_transform(X.four_g)
X.int_memory = enc.fit_transform(X.int_memory)
X.m_dep = enc.fit_transform(X.m_dep)
X.mobile_wt = enc.fit_transform(X.mobile_wt)
X.n_cores = enc.fit_transform(X.n_cores)
X.pc = enc.fit_transform(X.pc)
X.px_height = enc.fit_transform(X.px_height)
X.px_width = enc.fit_transform(X.px_width)
X.ram = enc.fit_transform(X.ram)
X.sc_h = enc.fit_transform(X.sc_h)
X.sc_w = enc.fit_transform(X.sc_w)
X.talk_time = enc.fit_transform(X.talk_time)
X.three_g = enc.fit_transform(X.three_g)
X.touch_screen = enc.fit_transform(X.touch_screen)
X.wifi = enc.fit_transform(X.wifi)