In [19]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.metrics import classification_report, f1_score, accuracy_score
from sklearn.pipeline import Pipeline
from tqdm import tqdm

In [8]:
glass = pd.read_csv("C:/Python/Cases/Glass_Identification/Glass.csv")
y = glass['Type']
X = glass.drop('Type', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25,
                                                    stratify=y)

In [11]:
svm = SVC(kernel='linear', C=1, decision_function_shape='ovr')
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print( classification_report(y_test, y_pred) )

                                      precision    recall  f1-score   support

    building_windows_float_processed       0.64      0.76      0.70        21
building_windows_non_float_processed       0.61      0.74      0.67        23
                          containers       0.40      0.50      0.44         4
                           headlamps       1.00      0.67      0.80         9
                           tableware       1.00      0.33      0.50         3
     vehicle_windows_float_processed       0.00      0.00      0.00         5

                            accuracy                           0.65        65
                           macro avg       0.61      0.50      0.52        65
                        weighted avg       0.63      0.65      0.62        65



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [12]:
svm.decision_function(X_test)

array([[ 5.29278099,  4.298937  , -0.30691158,  0.70802884,  2.05800162,
         3.23952419],
       [ 4.28119527,  5.29363161, -0.2903951 ,  1.75633485,  0.71970439,
         3.22133033],
       [ 4.2910082 ,  5.29030707, -0.28456425,  1.73206112,  0.71357976,
         3.23571882],
       [ 4.2891609 ,  5.29108752,  0.72330663,  0.72457003,  0.71643957,
         3.21848647],
       [ 4.2883828 ,  5.29054543, -0.29497612,  1.75351267,  0.722913  ,
         3.2307326 ],
       [ 5.29871608,  4.2960304 , -0.29952916,  0.71943733,  1.71372487,
         3.26234829],
       [ 5.28794473,  4.29700451, -0.29889564,  0.70636814,  1.89486817,
         3.2228325 ],
       [ 5.29282246,  4.29189026, -0.29966796,  0.729708  ,  1.74399441,
         3.23007527],
       [ 5.29319911,  4.29088182, -0.2908677 ,  0.73357027,  1.71542858,
         3.23837078],
       [ 4.26456208,  5.29266242, -0.28622185,  0.73662004,  1.81908819,
         2.99493212],
       [ 3.26366692,  5.29312761, -0.2759505 ,  4.

In [15]:
Cs = np.linspace(0.001, 5, 20) 
dfs = ['ovo','ovr']
scores = []
for c in Cs:
    for d in dfs:
        svm = SVC(kernel='linear', C=c, decision_function_shape=d)
        svm.fit(X_train, y_train)
        y_pred = svm.predict(X_test)
        scores.append([c, d ,accuracy_score(y_test, y_pred)] )

df_scores = pd.DataFrame( scores, columns=['C','dfs' ,'score'] )
df_scores.sort_values( 'score', ascending=False )

Unnamed: 0,C,dfs,score
11,1.316526,ovr,0.661538
10,1.316526,ovo,0.661538
39,5.0,ovr,0.646154
9,1.053421,ovr,0.646154
17,2.105842,ovr,0.646154
16,2.105842,ovo,0.646154
15,1.842737,ovr,0.646154
14,1.842737,ovo,0.646154
13,1.579632,ovr,0.646154
12,1.579632,ovo,0.646154


In [16]:
scaler = StandardScaler()
Cs = np.linspace(0.001, 5, 20) 
dfs = ['ovo','ovr']
scores = []
for c in Cs:
    for d in dfs:
        svm = SVC(kernel='linear', C=c, decision_function_shape=d)
        pipe = Pipeline([('SCL',scaler),('SVM',svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([c, d ,accuracy_score(y_test, y_pred)] )

df_scores = pd.DataFrame( scores, columns=['C','dfs' ,'score'] )
df_scores.sort_values( 'score', ascending=False )

Unnamed: 0,C,dfs,score
39,5.0,ovr,0.615385
2,0.264105,ovo,0.615385
3,0.264105,ovr,0.615385
38,5.0,ovo,0.615385
37,4.736895,ovr,0.615385
36,4.736895,ovo,0.615385
17,2.105842,ovr,0.6
26,3.421368,ovo,0.6
25,3.158263,ovr,0.6
24,3.158263,ovo,0.6


In [20]:
scaler = StandardScaler()
Cs = np.linspace(0.001, 5, 20) 
Gs = np.linspace(0.001, 5, 20) 
dfs = ['ovo','ovr']
scores = []
for c in tqdm(Cs):
    for g in Gs:
        for d in dfs:
            svm = SVC(kernel='rbf', C=c, decision_function_shape=d, gamma=g)
            pipe = Pipeline([('SCL',scaler),('SVM',svm)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            scores.append([c, g, d ,accuracy_score(y_test, y_pred)] )

df_scores = pd.DataFrame( scores, columns=['C','gamma' ,'dfs' ,'score'] )
df_scores.sort_values( 'score', ascending=False )

100%|██████████| 20/20 [00:05<00:00,  3.53it/s]


Unnamed: 0,C,gamma,dfs,score
325,2.105842,0.527211,ovr,0.707692
443,2.895158,0.264105,ovr,0.707692
323,2.105842,0.264105,ovr,0.707692
322,2.105842,0.264105,ovo,0.707692
362,2.368947,0.264105,ovo,0.707692
...,...,...,...,...
32,0.001000,4.210684,ovo,0.353846
31,0.001000,3.947579,ovr,0.353846
30,0.001000,3.947579,ovo,0.353846
29,0.001000,3.684474,ovr,0.353846


In [32]:
from ucimlrepo import fetch_ucirepo 
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [31]:
car_evaluation = fetch_ucirepo(id=19) 

X = car_evaluation.data.features 
y = car_evaluation.data.targets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25,
                                                    stratify=y)

In [30]:
le = LabelEncoder()
y = le.fit_transform( y['class'] )
le.classes_

array(['acc', 'good', 'unacc', 'vgood'], dtype=object)

In [33]:
ohe = OneHotEncoder(drop='first')
Cs = np.linspace(0.001, 5, 20) 
dfs = ['ovo','ovr']
scores = []
for c in Cs:
    for d in dfs:
        svm = SVC(kernel='linear', C=c, decision_function_shape=d)
        pipe = Pipeline([('OHE',ohe),('SVM',svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([c, d ,accuracy_score(y_test, y_pred)] )

df_scores = pd.DataFrame( scores, columns=['C','dfs' ,'score'] )
df_scores.sort_values( 'score', ascending=False )

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Unnamed: 0,C,dfs,score
20,2.632053,ovo,0.932563
21,2.632053,ovr,0.932563
31,3.947579,ovr,0.932563
30,3.947579,ovo,0.932563
38,5.0,ovo,0.928709
37,4.736895,ovr,0.928709
36,4.736895,ovo,0.928709
35,4.473789,ovr,0.928709
34,4.473789,ovo,0.928709
39,5.0,ovr,0.928709


In [35]:
ohe = OneHotEncoder(drop='first')
Cs = np.linspace(0.001, 5, 20) 
Gs = np.linspace(0.001, 5, 20) 
dfs = ['ovo','ovr']
scores = []
for c in tqdm(Cs):
    for g in Gs:
        for d in dfs:
            svm = SVC(kernel='rbf', C=c, decision_function_shape=d, gamma=g)
            pipe = Pipeline([('OHE',ohe),('SVM',svm)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            scores.append([c, g ,d ,accuracy_score(y_test, y_pred)] )

df_scores = pd.DataFrame( scores, columns=['C','gamma','dfs' ,'score'] )
df_scores.sort_values( 'score', ascending=False )

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Unnamed: 0,C,gamma,dfs,score
762,5.000000,0.264105,ovo,0.982659
763,5.000000,0.264105,ovr,0.982659
723,4.736895,0.264105,ovr,0.978805
682,4.473789,0.264105,ovo,0.978805
603,3.947579,0.264105,ovr,0.978805
...,...,...,...,...
310,1.842737,3.947579,ovo,0.699422
311,1.842737,3.947579,ovr,0.699422
312,1.842737,4.210684,ovo,0.699422
313,1.842737,4.210684,ovr,0.699422
