In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from tqdm import tqdm
from sklearn.metrics import log_loss, accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer, make_column_selector
import os

In [2]:
vehicle = pd.read_csv("D:/Training/Academy/ML(Python)/Cases/Vehicle Silhouettes/Vehicle.csv")
X, y = vehicle.drop('Class', axis=1), vehicle['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25,
                                                   stratify=y)

w/o scaling

In [4]:
svm = SVC(kernel="linear")
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.8464566929133859


In [5]:
svm = SVC(kernel="linear", decision_function_shape='ovo')
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.8464566929133859


In [6]:
std_scaler = StandardScaler()
Cs = np.linspace(0.01, 5, 15)
dfs = ['ovo','ovr']
scores = []
for c in Cs:
    for s in dfs:
        svm = SVC(kernel='linear', C=c, decision_function_shape=s)
        pipe = Pipeline([('SCL', std_scaler), ('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([c,s, accuracy_score(y_test, y_pred)])
df_scores = pd.DataFrame( scores, columns=['C','decision','score'] )
df_scores.sort_values('score', ascending=False)

Unnamed: 0,C,decision,score
25,4.287143,ovr,0.84252
24,4.287143,ovo,0.84252
9,1.435714,ovr,0.838583
17,2.861429,ovr,0.838583
16,2.861429,ovo,0.838583
22,3.930714,ovo,0.838583
23,3.930714,ovr,0.838583
8,1.435714,ovo,0.838583
12,2.148571,ovo,0.834646
14,2.505,ovo,0.834646


#### Polynomial Kernel

In [7]:
Cs = np.linspace(0.01, 5, 10)
deg = [2,3,4]
dfs = ['ovo','ovr']
scores = []
for d in tqdm(deg):
    for c in Cs:
        for s in dfs:
            svm = SVC(kernel='poly', C=c, degree=d , decision_function_shape=s)
            pipe = Pipeline([('SCL', std_scaler), ('SVM', svm)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            scores.append([c,s, d, accuracy_score(y_test, y_pred)])
df_scores = pd.DataFrame( scores, columns=['C', 'decision','degree' ,'score'] )
df_scores.sort_values('score', ascending=False)

100%|██████████| 3/3 [00:01<00:00,  2.49it/s]


Unnamed: 0,C,decision,degree,score
39,5.0,ovr,3,0.791339
38,5.0,ovo,3,0.791339
36,4.445556,ovo,3,0.771654
37,4.445556,ovr,3,0.771654
34,3.891111,ovo,3,0.76378
35,3.891111,ovr,3,0.76378
33,3.336667,ovr,3,0.759843
32,3.336667,ovo,3,0.759843
31,2.782222,ovr,3,0.751969
29,2.227778,ovr,3,0.751969


#### Radial Kernel

In [8]:
Cs = np.linspace(0.01, 5, 10)
Gs = np.linspace(0.01, 5, 10)
dfs = ['ovo','ovr']
scores = []
for g in tqdm(Gs):
    for c in Cs:
        for s in dfs:
            svm = SVC(kernel='rbf', C=c, gamma=g , decision_function_shape=s)
            pipe = Pipeline([('SCL', std_scaler), ('SVM', svm)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            scores.append([c,s, g, accuracy_score(y_test, y_pred)])
df_scores = pd.DataFrame( scores, columns=['C', 'decision','gamma' ,'score'] )
df_scores.sort_values('score', ascending=False)

100%|██████████| 10/10 [00:07<00:00,  1.41it/s]


Unnamed: 0,C,decision,gamma,score
19,5.000000,ovr,0.010000,0.779528
18,5.000000,ovo,0.010000,0.779528
16,4.445556,ovo,0.010000,0.771654
17,4.445556,ovr,0.010000,0.771654
15,3.891111,ovr,0.010000,0.763780
...,...,...,...,...
140,0.010000,ovo,3.891111,0.255906
141,0.010000,ovr,3.891111,0.255906
160,0.010000,ovo,4.445556,0.255906
161,0.010000,ovr,4.445556,0.255906


#### Glass Identification

In [11]:
os.chdir("D:/Training/Academy/ML(Python)/Cases/Glass_Identification")
glass = pd.read_csv("Glass.csv")
X, y = glass.drop('Type', axis=1), glass['Type']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25,
                                                   stratify=y)

In [12]:
std_scaler = StandardScaler()
Cs = np.linspace(0.01, 5, 15)
dfs = ['ovo','ovr']
scores = []
for c in Cs:
    for s in dfs:
        svm = SVC(kernel='linear', C=c, decision_function_shape=s)
        pipe = Pipeline([('SCL', std_scaler), ('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([c,s, accuracy_score(y_test, y_pred)])
df_scores = pd.DataFrame( scores, columns=['C','decision','score'] )
df_scores.sort_values('score', ascending=False)

Unnamed: 0,C,decision,score
29,5.0,ovr,0.615385
26,4.643571,ovo,0.615385
2,0.366429,ovo,0.615385
3,0.366429,ovr,0.615385
28,5.0,ovo,0.615385
27,4.643571,ovr,0.615385
22,3.930714,ovo,0.6
21,3.574286,ovr,0.6
20,3.574286,ovo,0.6
19,3.217857,ovr,0.6


##### Polynomial

In [13]:
Cs = np.linspace(0.01, 5, 10)
deg = [2,3,4]
dfs = ['ovo','ovr']
scores = []
for d in tqdm(deg):
    for c in Cs:
        for s in dfs:
            svm = SVC(kernel='poly', C=c, degree=d , decision_function_shape=s)
            pipe = Pipeline([('SCL', std_scaler), ('SVM', svm)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            scores.append([c,s, d, accuracy_score(y_test, y_pred)])
df_scores = pd.DataFrame( scores, columns=['C', 'decision','degree' ,'score'] )
df_scores.sort_values('score', ascending=False)

100%|██████████| 3/3 [00:00<00:00,  7.42it/s]


Unnamed: 0,C,decision,degree,score
14,3.891111,ovo,2,0.630769
15,3.891111,ovr,2,0.630769
19,5.0,ovr,2,0.630769
18,5.0,ovo,2,0.630769
17,4.445556,ovr,2,0.630769
16,4.445556,ovo,2,0.630769
10,2.782222,ovo,2,0.615385
11,2.782222,ovr,2,0.615385
12,3.336667,ovo,2,0.615385
13,3.336667,ovr,2,0.615385


##### Radial

In [14]:
Cs = np.linspace(0.01, 5, 10)
Gs = np.linspace(0.01, 5, 10)
dfs = ['ovo','ovr']
scores = []
for g in tqdm(Gs):
    for c in Cs:
        for s in dfs:
            svm = SVC(kernel='rbf', C=c, gamma=g , decision_function_shape=s)
            pipe = Pipeline([('SCL', std_scaler), ('SVM', svm)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            scores.append([c,s, g, accuracy_score(y_test, y_pred)])
df_scores = pd.DataFrame( scores, columns=['C', 'decision','gamma' ,'score'] )
df_scores.sort_values('score', ascending=False)

100%|██████████| 10/10 [00:01<00:00,  6.96it/s]


Unnamed: 0,C,decision,gamma,score
6,1.673333,ovo,0.010000,0.707692
7,1.673333,ovr,0.010000,0.707692
26,1.673333,ovo,0.564444,0.692308
27,1.673333,ovr,0.564444,0.692308
4,1.118889,ovo,0.010000,0.692308
...,...,...,...,...
121,0.010000,ovr,3.336667,0.353846
140,0.010000,ovo,3.891111,0.353846
141,0.010000,ovr,3.891111,0.353846
160,0.010000,ovo,4.445556,0.353846
