In [1]:
import csv

import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score, make_scorer, roc_auc_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import matplotlib as plt
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from scipy.stats import uniform
from scipy import stats

In [2]:
file_path = './data./Data Set for Chapter - Sheet1.csv'
df = pd.read_csv(file_path)
df.dropna(inplace=True)

In [3]:
df

Unnamed: 0,S.no,Age,Intimate Partners,"Protection Usage (0: Never, 1: Sometimes, 2: Always)",Symptoms,Location,Education,Testing,STD Status
0,1,45.0,4,0,0,2,1,1,1
1,2,43.0,4,0,0,2,1,1,1
2,3,39.0,5,0,0,3,1,1,1
3,4,35.0,5,2,0,3,1,0,1
4,5,32.0,1,1,1,2,1,0,0
...,...,...,...,...,...,...,...,...,...
540,541,36.0,2,0,1,2,0,1,1
541,542,45.0,4,0,0,2,0,1,1
542,543,43.0,4,0,0,3,0,0,0
543,544,39.0,5,0,0,3,0,0,0


for protection usage:
0: never
1: sometimes
2: always

for STD testing history:
0: no
1: yes


In [4]:

x = df.loc[:,'Age':'Testing']
y = df['STD Status']

scaler = StandardScaler()
x = scaler.fit_transform(x)
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=41)


In [5]:
param_grid = {'C': [0.1,0.001,0.01,1,5, 10, 50,100, 1000],
              'gamma': [1, 0.1, 0.01,0.001,10,50,100],
              'kernel': ['rbf','linear'],
              }

rbf_linear_grid = GridSearchCV(SVC(), param_grid,verbose=10)

# fitting the model for grid search
rbf_linear_grid.fit(X_train, y_train)
print("\n \n")
print(rbf_linear_grid.best_params_)


Fitting 5 folds for each of 126 candidates, totalling 630 fits
[CV 1/5; 1/126] START C=0.1, gamma=1, kernel=rbf................................
[CV 1/5; 1/126] END .C=0.1, gamma=1, kernel=rbf;, score=0.793 total time=   0.0s
[CV 2/5; 1/126] START C=0.1, gamma=1, kernel=rbf................................
[CV 2/5; 1/126] END .C=0.1, gamma=1, kernel=rbf;, score=0.713 total time=   0.0s
[CV 3/5; 1/126] START C=0.1, gamma=1, kernel=rbf................................
[CV 3/5; 1/126] END .C=0.1, gamma=1, kernel=rbf;, score=0.759 total time=   0.0s
[CV 4/5; 1/126] START C=0.1, gamma=1, kernel=rbf................................
[CV 4/5; 1/126] END .C=0.1, gamma=1, kernel=rbf;, score=0.770 total time=   0.0s
[CV 5/5; 1/126] START C=0.1, gamma=1, kernel=rbf................................
[CV 5/5; 1/126] END .C=0.1, gamma=1, kernel=rbf;, score=0.713 total time=   0.0s
[CV 1/5; 2/126] START C=0.1, gamma=1, kernel=linear.............................
[CV 1/5; 2/126] END C=0.1, gamma=1, kernel=lin

In [6]:
param_grid = {'C': [0.1, 0.001, 0.01, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01,0.001, 0.0001],
              'kernel': ['poly',],
              'degree': [1,2,3,4,5],}

poly_grid = GridSearchCV(SVC(), param_grid,verbose=10)

# fitting the model for grid search
poly_grid.fit(X_train, y_train)
print("\n \n")
print(poly_grid.best_params_)
print(poly_grid.best_estimator_)

Fitting 5 folds for each of 175 candidates, totalling 875 fits
[CV 1/5; 1/175] START C=0.1, degree=1, gamma=1, kernel=poly.....................
[CV 1/5; 1/175] END C=0.1, degree=1, gamma=1, kernel=poly;, score=0.862 total time=   0.0s
[CV 2/5; 1/175] START C=0.1, degree=1, gamma=1, kernel=poly.....................
[CV 2/5; 1/175] END C=0.1, degree=1, gamma=1, kernel=poly;, score=0.851 total time=   0.0s
[CV 3/5; 1/175] START C=0.1, degree=1, gamma=1, kernel=poly.....................
[CV 3/5; 1/175] END C=0.1, degree=1, gamma=1, kernel=poly;, score=0.828 total time=   0.0s
[CV 4/5; 1/175] START C=0.1, degree=1, gamma=1, kernel=poly.....................
[CV 4/5; 1/175] END C=0.1, degree=1, gamma=1, kernel=poly;, score=0.862 total time=   0.0s
[CV 5/5; 1/175] START C=0.1, degree=1, gamma=1, kernel=poly.....................
[CV 5/5; 1/175] END C=0.1, degree=1, gamma=1, kernel=poly;, score=0.874 total time=   0.0s
[CV 1/5; 2/175] START C=0.1, degree=1, gamma=0.1, kernel=poly................

In [7]:
'''auc = make_scorer(roc_auc_score)
rand_params = {'C': [0.1, 0.001, 0.01, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01,0.001, 0.0001,10,100],
             
              'kernel': ['poly',],
              'degree': [1,2,3,4,5]
               }

rand_search = RandomizedSearchCV(SVC(), param_distributions = rand_params, n_iter = 30)
rand_search.fit(X_train, y_train)
print(rand_search.best_params_)

'''

"auc = make_scorer(roc_auc_score)\nrand_params = {'C': [0.1, 0.001, 0.01, 1, 10, 100, 1000],\n              'gamma': [1, 0.1, 0.01,0.001, 0.0001,10,100],\n             \n              'kernel': ['poly',],\n              'degree': [1,2,3,4,5]\n               }\n\nrand_search = RandomizedSearchCV(SVC(), param_distributions = rand_params, n_iter = 30)\nrand_search.fit(X_train, y_train)\nprint(rand_search.best_params_)\n\n"

In [None]:
'''polynomial_grid_predictions = poly_grid.predict(X_test)
rand_search_predictions = rand_search.predict(X_test)
rbf_linear_grid_predictions = rbf_linear_grid.predict(X_test)'''

In [None]:
'''rbf_linear_grid_accuracy = accuracy_score(y_test, rbf_linear_grid_predictions)
rand_search_accuracy = accuracy_score(y_test, rand_search_predictions)
polynomial_grid_accuracy = accuracy_score(y_test, polynomial_grid_predictions)

if ((polynomial_grid_accuracy > rbf_linear_grid_accuracy)):
    if (polynomial_grid_accuracy > rand_search_accuracy):
        print("Polynomial Grid Search: ")
        conf_matrix = confusion_matrix(y_test, polynomial_grid_predictions)
        report = classification_report(y_test, polynomial_grid_predictions)
        print(report)
        print(conf_matrix)
    else:
        print("Random Search: ")
        conf_matrix = confusion_matrix(y_test, rand_search_predictions)
        report = classification_report(y_test, rand_search_predictions)
        print(report)
        print(conf_matrix)
if (rbf_linear_grid_accuracy >= polynomial_grid_accuracy):
    if (rbf_linear_grid_accuracy >= rand_search_accuracy):
        print("RBF and Linear Grid Search: ")
        conf_matrix = confusion_matrix(y_test, rbf_linear_grid_predictions)
        report = classification_report(y_test, rbf_linear_grid_predictions)
        print(report)
        print(conf_matrix)
    else:
        print("Random Search: ")
        conf_matrix = confusion_matrix(y_test, rand_search_predictions)
        report = classification_report(y_test, rand_search_predictions)
        print(report)
        print(conf_matrix)
   '''

In [9]:
#polynomial_grid_predictions = poly_grid.predict(X_test)
poly_grid_predictions = poly_grid.predict(X_test)
poly_grid_accuracy = accuracy_score(y_test, poly_grid_predictions)
rbf_linear_grid_predictions = rbf_linear_grid.predict(X_test)
rbf_linear_grid_accuracy = accuracy_score(y_test, rbf_linear_grid_predictions)
#polynomial_grid_accuracy = accuracy_score(y_test, polynomial_grid_predictions)

In [10]:
if poly_grid_accuracy > rbf_linear_grid_accuracy:
    print("Random Search: ")
    conf_matrix = confusion_matrix(y_test, poly_grid_predictions)
    report = classification_report(y_test, poly_grid_predictions)
    print(report)
    print(conf_matrix)
else:
    print("RBF and Linear Grid Search: ")
    conf_matrix = confusion_matrix(y_test, rbf_linear_grid_predictions)
    report = classification_report(y_test, rbf_linear_grid_predictions)
    print(report)
    print(conf_matrix)

RBF and Linear Grid Search: 
              precision    recall  f1-score   support

           0       0.78      0.94      0.85        48
           1       0.94      0.79      0.86        61

    accuracy                           0.85       109
   macro avg       0.86      0.86      0.85       109
weighted avg       0.87      0.85      0.85       109

[[45  3]
 [13 48]]
