In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import matplotlib as plt
from sklearn.model_selection import train_test_split, GridSearchCV

In [2]:
file_path = './data./Data Set for Chapter - Sheet1.csv'
df = pd.read_csv(file_path)
df.dropna(inplace=True)

In [3]:
df

Unnamed: 0,S.no,Age,Partners,Protection,Symptoms,Location,Education,Testing,STD Status
0,1,45.0,4,0,0,2,1,1,1
1,2,43.0,4,0,0,2,1,1,1
2,3,39.0,5,0,0,3,1,1,1
3,4,35.0,5,2,0,3,1,0,1
4,5,32.0,1,1,1,2,1,0,0
...,...,...,...,...,...,...,...,...,...
540,541,36.0,2,0,1,2,0,1,1
541,542,45.0,4,0,0,2,0,1,1
542,543,43.0,4,0,0,3,0,0,0
543,544,39.0,5,0,0,3,0,0,0


for protection usage:
0: never
1: sometimes
2: always

for STD testing history:
0: no
1: yes


In [4]:

x = df.loc[:,'Age':'Testing']
y = df['STD Status']

scaler = StandardScaler()
#x = scaler.fit_transform(x)

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=41)


In [5]:
x

Unnamed: 0,Age,Partners,Protection,Symptoms,Location,Education,Testing
0,45.0,4,0,0,2,1,1
1,43.0,4,0,0,2,1,1
2,39.0,5,0,0,3,1,1
3,35.0,5,2,0,3,1,0
4,32.0,1,1,1,2,1,0
...,...,...,...,...,...,...,...
540,36.0,2,0,1,2,0,1
541,45.0,4,0,0,2,0,1
542,43.0,4,0,0,3,0,0
543,39.0,5,0,0,3,0,0


In [6]:
param_grid = {'C': [0.1,0.2,0.15,0.3,0.05,0.25,0.001,0.01,1,2,3,4,5, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001,5,10,15],
              'kernel': ['rbf','linear'],
              }

rbf_linear_grid = GridSearchCV(SVC(), param_grid,verbose=10)

# fitting the model for grid search
rbf_linear_grid.fit(X_train, y_train)
print("\n \n")
print(rbf_linear_grid.best_params_)
print(rbf_linear_grid.best_estimator_)

Fitting 5 folds for each of 256 candidates, totalling 1280 fits
[CV 1/5; 1/256] START C=0.1, gamma=1, kernel=rbf................................
[CV 1/5; 1/256] END .C=0.1, gamma=1, kernel=rbf;, score=0.526 total time=   0.0s
[CV 2/5; 1/256] START C=0.1, gamma=1, kernel=rbf................................
[CV 2/5; 1/256] END .C=0.1, gamma=1, kernel=rbf;, score=0.526 total time=   0.0s
[CV 3/5; 1/256] START C=0.1, gamma=1, kernel=rbf................................
[CV 3/5; 1/256] END .C=0.1, gamma=1, kernel=rbf;, score=0.526 total time=   0.0s
[CV 4/5; 1/256] START C=0.1, gamma=1, kernel=rbf................................
[CV 4/5; 1/256] END .C=0.1, gamma=1, kernel=rbf;, score=0.539 total time=   0.0s
[CV 5/5; 1/256] START C=0.1, gamma=1, kernel=rbf................................
[CV 5/5; 1/256] END .C=0.1, gamma=1, kernel=rbf;, score=0.539 total time=   0.0s
[CV 1/5; 2/256] START C=0.1, gamma=1, kernel=linear.............................
[CV 1/5; 2/256] END C=0.1, gamma=1, kernel=li

In [7]:
param_grid = {'C': [500,1500, 750,1000, 5000,2000,3000],
              'kernel': ['poly',],
              'degree': [1,2,3,4,5,6,7,8,9,10],}

poly_grid = GridSearchCV(SVC(), param_grid,verbose=10)

# fitting the model for grid search
poly_grid.fit(X_train, y_train)
print("\n \n")
print(poly_grid.best_params_)
print(poly_grid.best_estimator_)

Fitting 5 folds for each of 70 candidates, totalling 350 fits
[CV 1/5; 1/70] START C=500, degree=1, kernel=poly...............................
[CV 1/5; 1/70] END C=500, degree=1, kernel=poly;, score=0.855 total time=   0.0s
[CV 2/5; 1/70] START C=500, degree=1, kernel=poly...............................
[CV 2/5; 1/70] END C=500, degree=1, kernel=poly;, score=0.855 total time=   0.0s
[CV 3/5; 1/70] START C=500, degree=1, kernel=poly...............................
[CV 3/5; 1/70] END C=500, degree=1, kernel=poly;, score=0.816 total time=   0.0s
[CV 4/5; 1/70] START C=500, degree=1, kernel=poly...............................
[CV 4/5; 1/70] END C=500, degree=1, kernel=poly;, score=0.895 total time=   0.0s
[CV 5/5; 1/70] START C=500, degree=1, kernel=poly...............................
[CV 5/5; 1/70] END C=500, degree=1, kernel=poly;, score=0.868 total time=   0.0s
[CV 1/5; 2/70] START C=500, degree=2, kernel=poly...............................
[CV 1/5; 2/70] END C=500, degree=2, kernel=poly

In [8]:
#score
print("score of the rbf and linear kernel model: ", rbf_linear_grid.score(X_test,y_test))

score of the rbf and linear kernel model:  0.8658536585365854


In [9]:
#score
print("score of the polynomial kernel model: ", poly_grid.score(X_test,y_test))

score of the polynomial kernel model:  0.8536585365853658


In [10]:
print("rbf and linear kernel grid")
rbf_linear_grid_predictions = rbf_linear_grid.predict(X_test)
# print classification report
print(classification_report(y_test, rbf_linear_grid_predictions))

rbf and linear kernel grid
              precision    recall  f1-score   support

           0       0.82      0.91      0.86        74
           1       0.91      0.83      0.87        90

    accuracy                           0.87       164
   macro avg       0.87      0.87      0.87       164
weighted avg       0.87      0.87      0.87       164



In [11]:
print("polynomial kernel grid")
rbf_linear_grid_predictions = rbf_linear_grid.predict(X_test)

# print classification report
print(classification_report(y_test, rbf_linear_grid_predictions))

polynomial kernel grid
              precision    recall  f1-score   support

           0       0.82      0.91      0.86        74
           1       0.91      0.83      0.87        90

    accuracy                           0.87       164
   macro avg       0.87      0.87      0.87       164
weighted avg       0.87      0.87      0.87       164

