# SVC

# Linear Kernel

In [25]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.svm import  SVC


## Wisconsin (Breast Cancer) Dataset

In [2]:
wisconsin = pd.read_csv("./Cases/Wisconsin/BreastCancer.csv", index_col=0)
wisconsin.head()

Unnamed: 0_level_0,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
61634,5,4,3,1,2,2,2,3,1,Benign
63375,9,1,2,6,4,10,7,7,2,Malignant
76389,10,4,7,2,2,8,6,1,1,Malignant
95719,6,10,10,10,8,10,7,10,7,Malignant
128059,1,1,1,1,2,5,5,1,1,Benign


In [3]:
X = wisconsin.drop('Class', axis=1)
y = wisconsin['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.3, random_state=24, stratify=y)


In [4]:
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.9714285714285714


In [5]:
Cs = [0.001,0.5,1,3,5,10]
scores = []
for c in Cs:
    svm = SVC(kernel='linear', C=c)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))

i_max = np.argmax(scores)
print("Best k : ", Cs[i_max])
print("Best score: ", scores[i_max])

Best k :  0.5
Best score:  0.9714285714285714


## HR Dataset

In [6]:
hr = pd.read_csv("./Cases/human-resources-analytics/HR_comma_sep.csv")
hr.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,Department,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.1,0.77,6,247,4,0,1,0,sales,low
3,0.92,0.85,5,259,5,0,1,0,sales,low
4,0.89,1.0,5,224,5,0,1,0,sales,low


In [7]:
X = hr.drop("left", axis=1)
y = hr['left']

In [8]:
X_dum = pd.get_dummies(X, drop_first=True)

In [9]:
# X_train, X_test, y_train, y_test = train_test_split(X_dum,y, test_size=0.3, random_state=24, stratify=y)
# Cs = [0.001,0.5,1,3,5,10]
# scores = []
# for c in Cs:
#     svm = SVC(kernel='linear', C=c)
#     svm.fit(X_train, y_train)
#     y_pred = svm.predict(X_test)
#     scores.append(accuracy_score(y_test, y_pred))

# i_max = np.argmax(scores)
# print("Best k : ", Cs[i_max])
# print("Best score: ", scores[i_max])

## Kyphosis Dataset

In [10]:
kyp = pd.read_csv("./Cases/Kyphosis/Kyphosis.csv")
kyp.head()

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15


In [11]:
from tqdm import tqdm

In [12]:
X = kyp.drop('Kyphosis', axis = 1)
y = kyp['Kyphosis']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=24, stratify=y)

In [13]:

Cs = [0.001,0.5,1,3,5,10]
scores = []
for c in tqdm(range(len(Cs))):
    svm = SVC(kernel='linear', C=Cs[c])
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))

i_max = np.argmax(scores)
print("Best k : ", Cs[i_max])
print("Best score: ", scores[i_max])

100%|██████████| 6/6 [00:00<00:00, 38.13it/s]

Best k :  0.001
Best score:  0.8





### Scaling

In [14]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [20]:
scaler = MinMaxScaler()
Cs = [0.001,0.5,1,2,3,5,10]
scores = []
for c in tqdm(range(len(Cs))):
    svm = SVC(kernel='linear', C=Cs[c])
    pipe = Pipeline([('SCL', scaler), ('SVM', svm)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))

i_max = np.argmax(scores)
print("Best k : ", Cs[i_max])
print("Best score: ", scores[i_max])




100%|██████████| 7/7 [00:00<00:00, 137.34it/s]

Best k :  2
Best score:  0.88





In [21]:
scaler = StandardScaler()
Cs = [0.001,0.5,1,2,3,5,10]
scores = []
for c in tqdm(range(len(Cs))):
    svm = SVC(kernel='linear', C=Cs[c])
    pipe = Pipeline([('SCL', scaler), ('SVM', svm)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))

i_max = np.argmax(scores)
print("Best k : ", Cs[i_max])
print("Best score: ", scores[i_max])

100%|██████████| 7/7 [00:00<00:00, 112.98it/s]

Best k :  0.001
Best score:  0.8





# Radial Basis Function (kernel = 'rbf)

## Kyphosis Dataset

In [38]:
Cs = np.linspace(0.001,4,10)
Gs = np.linspace(0.001,4,10)
scaler = MinMaxScaler()
scores = []

for i in range(len(Cs)):
    for j in range(len(Gs)):
    
        svm = SVC(kernel='rbf', C=Cs[i], gamma=Gs[j])
        pipe = Pipeline([('SCL', scaler), ('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([i,j,f1_score(y_test, y_pred, pos_label='present')])
scores_df = pd.DataFrame(scores, columns=['C','Gamma','F1_scores'])
scores_df = scores_df.sort_values('F1_scores', ascending=False)
scores_df.head()


Unnamed: 0,C,Gamma,F1_scores
99,9,9,0.615385
77,7,7,0.615385
34,3,4,0.615385
36,3,6,0.615385
37,3,7,0.615385


## Sonar Dataset

In [39]:
sonar = pd.read_csv("./Cases/Sonar/Sonar.csv")
sonar.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V52,V53,V54,V55,V56,V57,V58,V59,V60,Class
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [40]:
X = sonar.drop('Class', axis=1)
y = sonar['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)

In [43]:
Cs = np.linspace(0.001,4,10)
Gs = np.linspace(0.001,4,10)
scores = []

for i in range(len(Cs)):
    for j in range(len(Gs)):
        svm = SVC(kernel='rbf', C=Cs[i], gamma=Gs[j])
        pipe = Pipeline([('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([Cs[i],Gs[j],f1_score(y_test, y_pred, pos_label='R')])
scores_df = pd.DataFrame(scores, columns=['C','Gamma','F1_scores'])
scores_df = scores_df.sort_values('F1_scores', ascending=False)
scores_df.head()

Unnamed: 0,C,Gamma,F1_scores
57,2.222667,3.111333,0.881356
66,2.667,2.667,0.881356
37,1.334,3.111333,0.881356
87,3.555667,3.111333,0.881356
46,1.778333,2.667,0.881356


# Polynomial Kernel

## Kyphosis Dataset

In [47]:
X = kyp.drop('Kyphosis', axis = 1)
y = kyp['Kyphosis']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=24, stratify=y)

In [48]:
Cs = np.linspace(0.001,4,10)
Ds = [2,3,4]
scaler = MinMaxScaler()
scores = []

for i in range(len(Cs)):
    for j in range(len(Ds)):
    
        svm = SVC(kernel='poly', C=Cs[i], degree=Ds[j])
        pipe = Pipeline([('SCL', scaler), ('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([Cs[i],Ds[j],f1_score(y_test, y_pred, pos_label='present')])
scores_df = pd.DataFrame(scores, columns=['C','Gamma','F1_scores'])
scores_df = scores_df.sort_values('F1_scores', ascending=False)
scores_df.head()

Unnamed: 0,C,Gamma,F1_scores
27,4.0,2,0.4
24,3.555667,2,0.4
21,3.111333,2,0.4
15,2.222667,2,0.285714
13,1.778333,3,0.285714


## Sonar Dataset


In [49]:
X = sonar.drop('Class', axis=1)
y = sonar['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)

In [52]:
Cs = np.linspace(0.001,4,10)
Ds = [2,3,4]

scores = []

for i in range(len(Cs)):
    for j in range(len(Ds)):
    
        svm = SVC(kernel='poly', C=Cs[i], degree=Ds[j])
        pipe = Pipeline([('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([Cs[i],Ds[j],f1_score(y_test, y_pred, pos_label='R')])
scores_df = pd.DataFrame(scores, columns=['C','Gamma','F1_scores'])
scores_df = scores_df.sort_values('F1_scores', ascending=False)
scores_df.head()

Unnamed: 0,C,Gamma,F1_scores
4,0.445333,3,0.862069
9,1.334,2,0.833333
15,2.222667,2,0.827586
8,0.889667,4,0.827586
21,3.111333,2,0.827586


#  OVO and OVR Classification

## Glass dataset

In [59]:
glass = pd.read_csv("./Cases/Glass Identification/Glass.csv")
glass.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,building_windows_float_processed
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,building_windows_float_processed
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,building_windows_float_processed
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,building_windows_float_processed
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,building_windows_float_processed


In [60]:
X = glass.drop('Type', axis=1)
y = glass['Type']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=24, stratify=y)

In [61]:
scaler = MinMaxScaler()
Cs = np.linspace(0.001,4,10)
DFs = ['ovo','ovr']
scores = []
for i in range(len(Cs)):
    for d in DFs:
        svm = SVC(kernel='linear', C=Cs[i], decision_function_shape=d)
        pipe = Pipeline([('SCL',scaler),('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([Cs[i], d, accuracy_score(y_test, y_pred)])

scores_df = pd.DataFrame(scores, columns=['C','DFS','accuracy'])
scores_df = scores_df.sort_values('accuracy', ascending=False)
scores_df.head()

Unnamed: 0,C,DFS,accuracy
10,2.222667,ovo,0.584615
11,2.222667,ovr,0.584615
18,4.0,ovo,0.584615
17,3.555667,ovr,0.584615
16,3.555667,ovo,0.584615


In [62]:
scaler = StandardScaler()
Cs = np.linspace(0.001,4,10)
DFs = ['ovo','ovr']
scores = []
for i in range(len(Cs)):
    for d in DFs:
        svm = SVC(kernel='linear', C=Cs[i], decision_function_shape=d)
        pipe = Pipeline([('SCL',scaler),('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([Cs[i], d, accuracy_score(y_test, y_pred)])

scores_df = pd.DataFrame(scores, columns=['C','DFS','accuracy'])
scores_df = scores_df.sort_values('accuracy', ascending=False)
scores_df.head()

Unnamed: 0,C,DFS,accuracy
19,4.0,ovr,0.630769
12,2.667,ovo,0.630769
18,4.0,ovo,0.630769
17,3.555667,ovr,0.630769
16,3.555667,ovo,0.630769


## Image Segmentation

In [63]:
image_df = pd.read_csv("./Cases/Image Segmentation/Image_Segmentation.csv")
image_df.head()

Unnamed: 0,Class,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,BRICKFACE,188,133,9,0.0,0.0,0.333333,0.266667,0.5,0.077778,6.666666,8.333334,7.777778,3.888889,5.0,3.333333,-8.333333,8.444445,0.53858,-0.924817
1,BRICKFACE,105,139,9,0.0,0.0,0.277778,0.107407,0.833333,0.522222,6.111111,7.555555,7.222222,3.555556,4.333334,3.333333,-7.666666,7.555555,0.532628,-0.965946
2,BRICKFACE,34,137,9,0.0,0.0,0.5,0.166667,1.111111,0.474074,5.851852,7.777778,6.444445,3.333333,5.777778,1.777778,-7.555555,7.777778,0.573633,-0.744272
3,BRICKFACE,39,111,9,0.0,0.0,0.722222,0.374074,0.888889,0.429629,6.037037,7.0,7.666666,3.444444,2.888889,4.888889,-7.777778,7.888889,0.562919,-1.175773
4,BRICKFACE,16,128,9,0.0,0.0,0.5,0.077778,0.666667,0.311111,5.555555,6.888889,6.666666,3.111111,4.0,3.333333,-7.333334,7.111111,0.561508,-0.985811


In [64]:
X = image_df.drop('Class', axis=1)
y = image_df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)

In [65]:
scaler = StandardScaler()
Cs = np.linspace(0.001,4,10)
DFs = ['ovo','ovr']
scores = []
for i in range(len(Cs)):
    for d in DFs:
        svm = SVC(kernel='linear', C=Cs[i], decision_function_shape=d)
        pipe = Pipeline([('SCL',scaler),('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([Cs[i], d, accuracy_score(y_test, y_pred)])

scores_df = pd.DataFrame(scores, columns=['C','DFS','accuracy'])
scores_df = scores_df.sort_values('accuracy', ascending=False)
scores_df.head()

Unnamed: 0,C,DFS,accuracy
19,4.0,ovr,0.873016
2,0.445333,ovo,0.873016
3,0.445333,ovr,0.873016
4,0.889667,ovo,0.873016
5,0.889667,ovr,0.873016


In [66]:
scaler = MinMaxScaler()
Cs = np.linspace(0.001,4,10)
DFs = ['ovo','ovr']
scores = []
for i in range(len(Cs)):
    for d in DFs:
        svm = SVC(kernel='linear', C=Cs[i], decision_function_shape=d)
        pipe = Pipeline([('SCL',scaler),('SVM', svm)])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        scores.append([Cs[i], d, accuracy_score(y_test, y_pred)])

scores_df = pd.DataFrame(scores, columns=['C','DFS','accuracy'])
scores_df = scores_df.sort_values('accuracy', ascending=False)
scores_df.head()

Unnamed: 0,C,DFS,accuracy
19,4.0,ovr,0.857143
18,4.0,ovo,0.857143
17,3.555667,ovr,0.857143
16,3.555667,ovo,0.857143
15,3.111333,ovr,0.84127


### Radial 

In [67]:
scaler = StandardScaler()
Cs = np.linspace(0.001,4,10)
Gs = np.linspace(0.001,4,10)
DFs = ['ovo','ovr']
scores = []
for i in range(len(Cs)):
    for j in range(len(Gs)):
        for d in DFs:
            svm = SVC(kernel='rbf', C=Cs[i],gamma=Gs[j] , decision_function_shape=d)
            pipe = Pipeline([('SCL',scaler),('SVM', svm)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            scores.append([Cs[i],Gs[j], d, accuracy_score(y_test, y_pred)])

scores_df = pd.DataFrame(scores, columns=['C','G','DFS','accuracy'])
scores_df = scores_df.sort_values('accuracy', ascending=False)
scores_df.head()

Unnamed: 0,C,G,DFS,accuracy
146,3.111333,1.334,ovo,0.84127
166,3.555667,1.334,ovo,0.84127
147,3.111333,1.334,ovr,0.84127
167,3.555667,1.334,ovr,0.84127
186,4.0,1.334,ovo,0.84127


In [68]:
scaler = MinMaxScaler()
Cs = np.linspace(0.001,4,10)
Gs = np.linspace(0.001,4,10)
DFs = ['ovo','ovr']
scores = []
for i in range(len(Cs)):
    for j in range(len(Gs)):
        for d in DFs:
            svm = SVC(kernel='rbf', C=Cs[i],gamma=Gs[j] , decision_function_shape=d)
            pipe = Pipeline([('SCL',scaler),('SVM', svm)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            scores.append([Cs[i],Gs[j], d, accuracy_score(y_test, y_pred)])

scores_df = pd.DataFrame(scores, columns=['C','G','DFS','accuracy'])
scores_df = scores_df.sort_values('accuracy', ascending=False)
scores_df.head()

Unnamed: 0,C,G,DFS,accuracy
166,3.555667,1.334,ovo,0.888889
186,4.0,1.334,ovo,0.888889
187,4.0,1.334,ovr,0.888889
147,3.111333,1.334,ovr,0.888889
167,3.555667,1.334,ovr,0.888889
