In [16]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.metrics import classification_report, f1_score
from sklearn.pipeline import Pipeline

In [3]:
kyph = pd.read_csv("C:/Python/Cases/Kyphosis/Kyphosis.csv")
y = kyph['Kyphosis']
X = kyph.drop('Kyphosis', axis=1)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25,
                                                    stratify=y)

In [8]:
svm = SVC(kernel='linear', C=1)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print( classification_report(y_test, y_pred) )
print(f1_score(y_test, y_pred, pos_label='present'))

              precision    recall  f1-score   support

      absent       0.87      1.00      0.93        20
     present       1.00      0.40      0.57         5

    accuracy                           0.88        25
   macro avg       0.93      0.70      0.75        25
weighted avg       0.90      0.88      0.86        25

0.5714285714285714


In [13]:
Cs = np.linspace(0.001, 5, 20) 
scores = []
for c in Cs:
    svm = SVC(kernel='linear', C=c)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    scores.append([c, f1_score(y_test, y_pred, pos_label='present')] )

df_scores = pd.DataFrame( scores, columns=['C', 'score'] )
df_scores.sort_values( 'score', ascending=False )

Unnamed: 0,C,score
10,2.632053,0.571429
1,0.264105,0.571429
18,4.736895,0.571429
17,4.473789,0.571429
16,4.210684,0.571429
15,3.947579,0.571429
14,3.684474,0.571429
13,3.421368,0.571429
12,3.158263,0.571429
11,2.895158,0.571429


In [15]:
scaler = StandardScaler()
Cs = np.linspace(0.001, 5, 20) 
scores = []
for c in Cs:
    svm = SVC(kernel='linear', C=c)
    pipe = Pipeline([('SCL',scaler),('SVM',svm)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    scores.append([c, f1_score(y_test, y_pred, pos_label='present')] )

df_scores = pd.DataFrame( scores, columns=['C', 'score'] )
df_scores.sort_values( 'score', ascending=False )

Unnamed: 0,C,score
10,2.632053,0.571429
11,2.895158,0.571429
18,4.736895,0.571429
17,4.473789,0.571429
16,4.210684,0.571429
15,3.947579,0.571429
14,3.684474,0.571429
13,3.421368,0.571429
12,3.158263,0.571429
19,5.0,0.571429


In [18]:
scaler = MinMaxScaler()
Cs = np.linspace(0.001, 5, 20) 
scores = []
for c in Cs:
    svm = SVC(kernel='linear', C=c)
    pipe = Pipeline([('SCL',scaler),('SVM',svm)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    scores.append([c, f1_score(y_test, y_pred, pos_label='present')] )

df_scores = pd.DataFrame( scores, columns=['C', 'score'] )
df_scores.sort_values( 'score', ascending=False )

Unnamed: 0,C,score
0,0.001,0.0
1,0.264105,0.0
18,4.736895,0.0
17,4.473789,0.0
16,4.210684,0.0
15,3.947579,0.0
14,3.684474,0.0
13,3.421368,0.0
12,3.158263,0.0
11,2.895158,0.0


### Breast Cancer 

In [20]:
bcancer = pd.read_csv("C:/Python/Cases/Wisconsin/BreastCancer.csv", index_col=0)
X = bcancer.drop('Class', axis=1)
y = bcancer['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25,
                                                    stratify=y)

In [24]:
Cs = np.linspace(0.001, 5, 50) 
scores = []
for c in Cs:
    svm = SVC(kernel='linear', C=c)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    scores.append([c, f1_score(y_test, y_pred, pos_label='Malignant')] )

df_scores = pd.DataFrame( scores, columns=['C', 'score'] )
df_scores.sort_values( 'score', ascending=False )

Unnamed: 0,C,score
25,2.55151,0.957746
26,2.653531,0.957746
28,2.857571,0.957746
29,2.959592,0.957746
30,3.061612,0.957746
31,3.163633,0.957746
32,3.265653,0.957746
33,3.367673,0.957746
34,3.469694,0.957746
35,3.571714,0.957746


In [25]:
scaler = StandardScaler()
Cs = np.linspace(0.001, 5, 20) 
scores = []
for c in Cs:
    svm = SVC(kernel='linear', C=c)
    pipe = Pipeline([('SCL',scaler),('SVM',svm)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    scores.append([c, f1_score(y_test, y_pred, pos_label='Malignant')] )

df_scores = pd.DataFrame( scores, columns=['C', 'score'] )
df_scores.sort_values( 'score', ascending=False )

Unnamed: 0,C,score
10,2.632053,0.957746
11,2.895158,0.957746
18,4.736895,0.957746
17,4.473789,0.957746
16,4.210684,0.957746
15,3.947579,0.957746
14,3.684474,0.957746
13,3.421368,0.957746
12,3.158263,0.957746
19,5.0,0.957746


### Radial Basis Function / Radial Kernel

In [30]:
svm = SVC(kernel='rbf', C=1, gamma=0.5)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print( classification_report(y_test, y_pred) )


              precision    recall  f1-score   support

      Benign       1.00      0.88      0.93       138
   Malignant       0.81      1.00      0.89        72

    accuracy                           0.92       210
   macro avg       0.90      0.94      0.91       210
weighted avg       0.93      0.92      0.92       210



In [31]:
Cs = np.linspace(0.001, 5, 20) 
Gs = np.linspace(0.001, 5, 20) 
scores = []
for c in Cs:
    for g in Gs:
        svm = SVC(kernel='rbf', C=c, gamma=g)
        svm.fit(X_train, y_train)
        y_pred = svm.predict(X_test)
        scores.append([c, g, f1_score(y_test, y_pred, pos_label='Malignant')] )
df_scores = pd.DataFrame(scores, columns=['C', 'gamma', 'score'])
df_scores.sort_values('score', ascending=False)

Unnamed: 0,C,gamma,score
200,2.632053,0.001000,0.957143
140,1.842737,0.001000,0.957143
40,0.527211,0.001000,0.957143
360,4.736895,0.001000,0.957143
340,4.473789,0.001000,0.957143
...,...,...,...
29,0.264105,2.368947,0.000000
28,0.264105,2.105842,0.000000
27,0.264105,1.842737,0.000000
26,0.264105,1.579632,0.000000


### Polynomial Kernel

In [33]:
svm = SVC(kernel='poly', C=1, degree=4)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
print( classification_report(y_test, y_pred) )

              precision    recall  f1-score   support

      Benign       0.93      0.99      0.96       138
   Malignant       0.98      0.85      0.91        72

    accuracy                           0.94       210
   macro avg       0.95      0.92      0.93       210
weighted avg       0.95      0.94      0.94       210

