# Performance Tuning the LogisticRegression Model

## 1. Import the necessary libraries

In [11]:
%matplotlib inline
from IPython.display import display, clear_output, Image
from sklearn import neighbors
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, auc, classification_report, confusion_matrix, make_scorer, precision_recall_curve, precision_score, precision_score, recall_score, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelBinarizer
from sklearn.svm import SVC
from sklearn.tree import export_graphviz
import graphviz
import ipywidgets as widgets
import joblib
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydot
import time
plt.style.use("ggplot")
print("The libraries were successfully loaded!")

The libraries were successfully loaded!


## 2. Import the data

In [2]:
# Import Dataset
df = pd.read_csv('data/cleansed-healthcare-dataset-stroke-data.csv',delimiter=',',header='infer')
# Display top rows of the dataset
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,1,67.0,0,1,1,2,1,228.69,36.6,1,1
1,1,80.0,0,1,1,2,0,105.92,32.5,2,1
2,0,49.0,0,0,1,2,1,171.23,34.4,3,1
3,0,79.0,1,0,1,3,0,174.12,24.0,2,1
4,1,81.0,0,0,1,2,1,186.21,29.0,1,1


## 3. Split the data into a training and testing set

In [3]:
# Separate features and labels
features = ['gender', 'age', 'hypertension', 'heart_disease', 'ever_married',
       'work_type', 'Residence_type', 'avg_glucose_level', 'bmi',
       'smoking_status']
label = 'stroke'
X, y = df[features].values, df[label].values

# Split data 70%-30% into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)


print('Training cases: %d\nTest cases: %d' % (X_train.size, X_test.size))

Training cases: 34360
Test cases: 14730


## 4. Define the Grid Search Function to test

In [4]:
def grid_search_wrapper(refit_score='precision_score'):
    """
    fits a GridSearchCV classifier using refit_score for optimization
    prints classifier performance metrics
    """
    skf = StratifiedKFold(n_splits=10)
    
    #  Define grid search
    grid_search = GridSearchCV(clf, param_grid, scoring=scorers, refit=refit_score,
                           cv=skf, return_train_score=True, n_jobs=-1)
    
    # Train the models 
    grid_search.fit(X_train.values, y_train.values)

    # Test the models
    y_pred = grid_search.predict(X_test.values)
    
    # Print the best hyperparameters
    print('Best params for {}'.format(refit_score))
    print(grid_search.best_params_)

    # confusion matrix on the test data.
    print('\nConfusion matrix of model optimized for {} on the test data:'.format(refit_score))
    print(pd.DataFrame(confusion_matrix(y_test, y_pred),
                 columns=['pred_neg', 'pred_pos'], index=['neg', 'pos']))
    return grid_search

## 5. Logistic Regression Optimisation

In [5]:
# Create model
reg = 0.01
clf = LogisticRegression(random_state=0)

# Define models hyperparameters
param_grid = {
    'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'dual' : [True, False],
    'tol' : [0.00001, 0.0001, 0.001, 0.01, 0.1],
    'C' : [1, 10, 100, 1000, 10000],
    'fit_intercept' :[True, False],
    'intercept_scaling' : [0.05, 0.075, 0.1, 0.125, 0.15],
    'class_weight' : ['balanced'],
    'solver' : ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga'],
    'max_iter' : [75, 100, 125, 150, 175, 200],
    'multi_class': ['auto', 'ovr', 'multinomial'],
    'warm_start' : [True, False]
}

# Define scores to test
scorers = {
    'precision_score': make_scorer(precision_score),
    'recall_score': make_scorer(recall_score),
    'accuracy_score': make_scorer(accuracy_score)
}

In [12]:
# Perform grid search to identify best hyperparameters for recall_score
print("Round 1")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 1


  return f(*args, **kwargs)


Best params for recall_score
{'C': 100, 'class_weight': 'balanced', 'dual': False, 'fit_intercept': True, 'intercept_scaling': 0.05, 'max_iter': 175, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'saga', 'tol': 1e-05, 'warm_start': True}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg       929       489
pos         8        47


In [13]:
# Define new hyperparameters
param_grid = {
    'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'dual' : [True, False],
    'tol' : [0.00001, 0.0001, 0.001, 0.01, 0.1],
    'C' : [90, 100, 110],
    'fit_intercept' :[True, False],
    'intercept_scaling' : [0.025, 0.05, 0.055],
    'class_weight' : ['balanced'],
    'solver' : ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga'],
    'max_iter' : [160, 175, 180],
    'multi_class': ['auto', 'ovr', 'multinomial'],
    'warm_start' : [True, False]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 2")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 2


  return f(*args, **kwargs)


Best params for recall_score
{'C': 100, 'class_weight': 'balanced', 'dual': False, 'fit_intercept': True, 'intercept_scaling': 0.025, 'max_iter': 175, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'saga', 'tol': 1e-05, 'warm_start': True}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg       929       489
pos         8        47


In [14]:
# Define new hyperparameters
param_grid = {
    'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'dual' : [True, False],
    'tol' : [0.0000001, 0.000001, 0.00001, 0.0001],
    'C' : [99, 100, 101],
    'fit_intercept' :[True, False],
    'intercept_scaling' : [0.01, 0.015, 0.02, 0.025, 0.03],
    'class_weight' : ['balanced'],
    'solver' : ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga'],
    'max_iter' : [174, 175, 176],
    'multi_class': ['auto', 'ovr', 'multinomial'],
    'warm_start' : [True, False]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 3")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 3


  return f(*args, **kwargs)


Best params for recall_score
{'C': 100, 'class_weight': 'balanced', 'dual': False, 'fit_intercept': True, 'intercept_scaling': 0.01, 'max_iter': 175, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'saga', 'tol': 1e-07, 'warm_start': True}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg       929       489
pos         8        47




In [15]:
# Define new hyperparameters
param_grid = {
    'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'dual' : [True, False],
    'tol' : [0.000000001, 0.00000001, 0.0000001],
    'C' : [99, 100, 101],
    'fit_intercept' :[True, False],
    'intercept_scaling' : [0.001, 0.0075, 0.01],
    'class_weight' : ['balanced'],
    'solver' : ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga'],
    'max_iter' : [174, 175, 176],
    'multi_class': ['auto', 'ovr', 'multinomial'],
    'warm_start' : [True, False]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 4")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 4


  return f(*args, **kwargs)


Best params for recall_score
{'C': 100, 'class_weight': 'balanced', 'dual': False, 'fit_intercept': True, 'intercept_scaling': 0.001, 'max_iter': 175, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'saga', 'tol': 1e-09, 'warm_start': True}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg       929       489
pos         8        47


In [16]:
# Define new hyperparameters
param_grid = {
    'penalty' : ['l2'],
    'dual' : [False],
    'tol' : [0.000000001, 0.0000000001, 0.00000000001],
    'C' : [100],
    'fit_intercept' :[True],
    'intercept_scaling' : [0.001,0.0001,0.00001],
    'class_weight' : ['balanced'],
    'solver' : ['saga'],
    'max_iter' : [175],
    'multi_class': ['multinomial'],
    'warm_start' : [True]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 5")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 5


  return f(*args, **kwargs)


Best params for recall_score
{'C': 100, 'class_weight': 'balanced', 'dual': False, 'fit_intercept': True, 'intercept_scaling': 0.001, 'max_iter': 175, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'saga', 'tol': 1e-09, 'warm_start': True}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg       929       489
pos         8        47


In [17]:
# Define new hyperparameters

param_grid = {
    'penalty' : ['l2'],
    'dual' : [False],
    'tol' : [0.000000001, 0.0000000011, 0.0000000009],
    'C' : [100],
    'fit_intercept' :[True],
    'intercept_scaling' : [0.001,0.0011,0.0009],
    'class_weight' : ['balanced'],
    'solver' : ['saga'],
    'max_iter' : [175],
    'multi_class': ['multinomial'],
    'warm_start' : [True]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 6")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 6
Best params for recall_score
{'C': 100, 'class_weight': 'balanced', 'dual': False, 'fit_intercept': True, 'intercept_scaling': 0.001, 'max_iter': 175, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'saga', 'tol': 1e-09, 'warm_start': True}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg       929       489
pos         8        47


  return f(*args, **kwargs)


## 6. Random Forest Optimisation

In [18]:
# Create model
clf = RandomForestClassifier(random_state=0)

# Define models hyperparameters
param_grid = {
    'bootstrap': [False, True],
    'min_samples_split': [2, 3, 4, 5], 
    'min_samples_leaf': [1, 2, 3, 4, 5],
    'n_estimators' : [75, 100, 125, 150, 175],
    'max_depth': [10, 15, 20, 25, 30],
    'max_features': [8, 9, 10, 11]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 1")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 1


  self.best_estimator_.fit(X, y, **fit_params)


Best params for recall_score
{'bootstrap': False, 'max_depth': 20, 'max_features': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 75}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg      1345        73
pos        46         9


In [19]:
# Define new hyperparameters
param_grid = {
    'bootstrap': [False, True],
    'min_samples_split': [1, 2, 3], 
    'min_samples_leaf': [1, 2],
    'n_estimators' : [70, 75, 80],
    'max_depth': [19, 20, 21],
    'max_features': [9, 10, 11]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 2")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 2


 0.06938154 0.06738858 0.06892857        nan        nan        nan
 0.10871906 0.10871906 0.11053724 0.10871906 0.10871906 0.11053724
        nan        nan        nan 0.07805736 0.08235994 0.07610994
 0.07997899 0.07976254 0.07997899        nan        nan        nan
 0.08805556 0.09305556 0.09305556 0.08805556 0.09305556 0.09305556
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan 0.0700887  0.07330128 0.07307692
 0.06915043 0.06772949 0.07002747        nan        nan        nan
 0.09760795 0.09760795 0.10134921 0.09760795 0.09760795 0.10134921
        nan        nan        nan 0.08412879 0.08759804 0.07732207
 0.07331232 0.07309587 0.07331232        nan        nan        nan
 0.08583333 0.09083333 0.09261905 0.08583333 0.09083333 0.09261905
        nan        nan        nan        nan        nan       

Best params for recall_score
{'bootstrap': False, 'max_depth': 20, 'max_features': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 75}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg      1345        73
pos        46         9


In [20]:
# Define new hyperparameters
param_grid = {
    'bootstrap': [False, True],
    'min_samples_split': [1, 2, 3], 
    'min_samples_leaf': [1, 2],
    'n_estimators' : [74, 75, 76],
    'max_depth': [19, 20, 21],
    'max_features': [9, 10, 11]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 3")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 3


 0.06738858 0.06738858 0.07002747        nan        nan        nan
 0.11064214 0.10871906 0.10871906 0.11064214 0.10871906 0.10871906
        nan        nan        nan 0.08319328 0.08235994 0.07767857
 0.07976254 0.07976254 0.07997899        nan        nan        nan
 0.09305556 0.09305556 0.09305556 0.09305556 0.09305556 0.09305556
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan 0.0700887  0.07330128 0.0700887
 0.06863858 0.06772949 0.06911838        nan        nan        nan
 0.09953102 0.09760795 0.10841991 0.09953102 0.09760795 0.10841991
        nan        nan        nan 0.08797683 0.08759804 0.08329545
 0.07309587 0.07309587 0.07331232        nan        nan        nan
 0.09083333 0.09083333 0.09083333 0.09083333 0.09083333 0.09083333
        nan        nan        nan        nan        nan        

Best params for recall_score
{'bootstrap': False, 'max_depth': 20, 'max_features': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 74}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg      1346        72
pos        46         9


In [21]:
# Define new hyperparameters

param_grid = {
    'bootstrap': [False, True],
    'min_samples_split': [1, 2, 3], 
    'min_samples_leaf': [1, 2],
    'n_estimators' : [72, 73, 74],
    'max_depth': [19, 20, 21],
    'max_features': [9, 10, 11]
}

print("Round 4")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 4


 0.07002747 0.06647949 0.06738858        nan        nan        nan
 0.11064214 0.11064214 0.11064214 0.11064214 0.11064214 0.11064214
        nan        nan        nan 0.08319328 0.08319328 0.08319328
 0.07976254 0.07976254 0.07976254        nan        nan        nan
 0.09305556 0.09305556 0.09305556 0.09305556 0.09305556 0.09305556
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan 0.0700887  0.06880665 0.0700887
 0.07002747 0.06786838 0.06863858        nan        nan        nan
 0.10841991 0.10841991 0.09953102 0.10841991 0.10841991 0.09953102
        nan        nan        nan 0.08843137 0.08759804 0.08797683
 0.07309587 0.07309587 0.07309587        nan        nan        nan
 0.09083333 0.09083333 0.09083333 0.09083333 0.09083333 0.09083333
        nan        nan        nan        nan        nan        

Best params for recall_score
{'bootstrap': False, 'max_depth': 20, 'max_features': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 72}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg      1346        72
pos        46         9


In [22]:
# Define new hyperparameters
param_grid = {
    'bootstrap': [False, True],
    'min_samples_split': [1, 2, 3], 
    'min_samples_leaf': [1, 2],
    'n_estimators' : [70, 71, 72],
    'max_depth': [19, 20, 21],
    'max_features': [9, 10, 11]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 5")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')


Round 5


 0.06938154 0.06813154 0.07002747        nan        nan        nan
 0.10871906 0.11064214 0.11064214 0.10871906 0.11064214 0.11064214
        nan        nan        nan 0.07805736 0.08235994 0.08319328
 0.07997899 0.07976254 0.07976254        nan        nan        nan
 0.08805556 0.08805556 0.09305556 0.08805556 0.08805556 0.09305556
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan 0.0700887  0.0700887  0.0700887
 0.06915043 0.06915043 0.07002747        nan        nan        nan
 0.09760795 0.10841991 0.10841991 0.09760795 0.10841991 0.10841991
        nan        nan        nan 0.08412879 0.08843137 0.08843137
 0.07331232 0.07309587 0.07309587        nan        nan        nan
 0.08583333 0.08583333 0.09083333 0.08583333 0.08583333 0.09083333
        nan        nan        nan        nan        nan        

Best params for recall_score
{'bootstrap': False, 'max_depth': 20, 'max_features': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 71}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg      1345        73
pos        46         9


## 7. KNN Optimisation

In [23]:
# Create model
clf = neighbors.KNeighborsClassifier()

# Define models hyperparameters
param_grid = {
    'n_neighbors': [1, 5, 10],
    'weights': ['uniform', 'distance'], 
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size' : [10, 30, 50],
    'p': [1, 2],
    'n_jobs': [1, 5, 10]
}

print("Round 1")
# Perform grid search to identify best hyperparameters for recall_score
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 1


  return self._fit(X, y)


Best params for recall_score
{'algorithm': 'auto', 'leaf_size': 10, 'n_jobs': 1, 'n_neighbors': 1, 'p': 2, 'weights': 'uniform'}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg      1361        57
pos        47         8


In [24]:
# Define new hyperparameters
param_grid = {
    'n_neighbors': [1, 2, 3],
    'weights': ['uniform', 'distance'], 
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size' : [5, 10, 15],
    'p': [1, 2, 3],
    'n_jobs': [1, 2, 3]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 2")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 2


  return self._fit(X, y)


Best params for recall_score
{'algorithm': 'auto', 'leaf_size': 5, 'n_jobs': 1, 'n_neighbors': 1, 'p': 2, 'weights': 'uniform'}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg      1361        57
pos        47         8


In [25]:
# Define new hyperparameters
param_grid = {
    'n_neighbors': [1, 2],
    'weights': ['uniform'], 
    'algorithm': ['auto'],
    'leaf_size' : [4, 5, 6],
    'p': [1, 2, 3],
    'n_jobs': [1, 2]
}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 3")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 3
Best params for recall_score
{'algorithm': 'auto', 'leaf_size': 4, 'n_jobs': 1, 'n_neighbors': 1, 'p': 2, 'weights': 'uniform'}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg      1361        57
pos        47         8


  return self._fit(X, y)


In [26]:
# Define new hyperparameters
param_grid = {
    'n_neighbors': [1, 2],
    'weights': ['uniform'], 
    'algorithm': ['auto'],
    'leaf_size' : [1, 2, 3, 4],
    'p': [1, 2, 3],
    'n_jobs': [1, 2]
}

print("Round 4")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 4
Best params for recall_score
{'algorithm': 'auto', 'leaf_size': 1, 'n_jobs': 1, 'n_neighbors': 1, 'p': 2, 'weights': 'uniform'}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg      1361        57
pos        47         8


  return self._fit(X, y)


## 8. SVC Optimisation

In [6]:
# Create model
clf = SVC(random_state=0, probability=True)

# Define models hyperparameters
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 
    'degree': [1, 3, 5],
    'gamma' : ['scale', 'auto'],
    'coef0': [0, 1],
    'shrinking': [True, False],
    'tol': [0.00001, 0.001, 0.1],
    'cache_size': [100, 200, 300],
    'verbose': [True, False],
    'max_iter': [1, 3, 5],
    'decision_function_shape': ['ovo', 'ovr'],
    'break_ties': [True, False]
}

print("Round 1")
# Perform grid search to identify best hyperparameters for recall_score
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 1


  return f(*args, **kwargs)


[LibSVM]Best params for recall_score
{'C': 0.1, 'break_ties': True, 'cache_size': 100, 'coef0': 0, 'decision_function_shape': 'ovr', 'degree': 1, 'gamma': 'auto', 'kernel': 'sigmoid', 'max_iter': 1, 'shrinking': True, 'tol': 1e-05, 'verbose': True}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg         0      1418
pos         0        55


In [7]:
# Define new hyperparameters
param_grid = {'C': [0.01, 0.1, 0.5],
 'kernel': ['sigmoid'],
 'degree': [0, 1, 2],
 'gamma': ['auto'],
 'coef0': [0],
 'shrinking': [True],
 'tol': [1.00000005, 1.0000005, 1.000005],
 'cache_size': [50, 100, 150],
 'verbose': [True],
 'max_iter': [0, 1, 2],
 'decision_function_shape': ['ovr'],
 'break_ties': [True]}

# Perform grid search to identify best hyperparameters for recall_score
print("Round 2")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 2
[LibSVM]Best params for recall_score
{'C': 0.01, 'break_ties': True, 'cache_size': 50, 'coef0': 0, 'decision_function_shape': 'ovr', 'degree': 0, 'gamma': 'auto', 'kernel': 'sigmoid', 'max_iter': 0, 'shrinking': True, 'tol': 1.00000005, 'verbose': True}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg         0      1418
pos         0        55


  return f(*args, **kwargs)


In [8]:
# Define models hyperparameters
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 
    'degree': [1, 3, 5],
    'gamma' : ['scale', 'auto'],
    'coef0': [0, 1],
    'shrinking': [True, False],
    'tol': [0.00001, 0.001, 0.1],
    'cache_size': [100, 200, 300],
    'verbose': [True, False],
    'max_iter': [1, 3, 5],
    'decision_function_shape': ['ovo', 'ovr'],
    'break_ties': [True, False]
}

# Perform grid search to identify best hyperparameters for precision_score
print("Round 3")
grid_search_clf = grid_search_wrapper(refit_score='precision_score')

Round 3


  return f(*args, **kwargs)


[LibSVM]Best params for precision_score
{'C': 0.1, 'break_ties': True, 'cache_size': 100, 'coef0': 0, 'decision_function_shape': 'ovr', 'degree': 5, 'gamma': 'auto', 'kernel': 'poly', 'max_iter': 5, 'shrinking': True, 'tol': 1e-05, 'verbose': True}

Confusion matrix of model optimized for precision_score on the test data:
     pred_neg  pred_pos
neg      1352        66
pos        42        13


In [9]:
# Define models hyperparameters
param_grid = {
    'C': [0.001, 0.01, 0.1],
    'kernel': ['poly'], 
    'degree': [5, 10, 15],
    'gamma' : ['auto'],
    'coef0': [0],
    'shrinking': [True],
    'tol': [0.0000001, 0.000001, 0.00001],
    'cache_size': [50, 100, 150],
    'verbose': [True],
    'max_iter': [5, 10, 15],
    'decision_function_shape': ['ovr'],
    'break_ties': [True]
}

# Perform grid search to identify best hyperparameters for precision_score
print("Round 4")
grid_search_clf = grid_search_wrapper(refit_score='precision_score')

Round 4
[LibSVM]Best params for precision_score
{'C': 0.001, 'break_ties': True, 'cache_size': 50, 'coef0': 0, 'decision_function_shape': 'ovr', 'degree': 5, 'gamma': 'auto', 'kernel': 'poly', 'max_iter': 5, 'shrinking': True, 'tol': 1e-07, 'verbose': True}

Confusion matrix of model optimized for precision_score on the test data:
     pred_neg  pred_pos
neg      1352        66
pos        42        13


  return f(*args, **kwargs)


In [10]:
 # Define models hyperparameters
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear'], 
    'degree': [1, 3, 5],
    'gamma' : ['scale', 'auto'],
    'coef0': [0, 1],
    'shrinking': [True, False],
    'tol': [0.00001, 0.001, 0.1],
    'cache_size': [100, 200, 300],
    'verbose': [True, False],
    'max_iter': [1, 3, 5],
    'decision_function_shape': ['ovo', 'ovr'],
    'break_ties': [True, False]
}

print("Round 5")
grid_search_clf = grid_search_wrapper(refit_score='recall_score')

Round 5
[LibSVM]Best params for recall_score
{'C': 0.1, 'break_ties': True, 'cache_size': 100, 'coef0': 0, 'decision_function_shape': 'ovr', 'degree': 1, 'gamma': 'scale', 'kernel': 'linear', 'max_iter': 3, 'shrinking': True, 'tol': 1e-05, 'verbose': True}

Confusion matrix of model optimized for recall_score on the test data:
     pred_neg  pred_pos
neg       633       785
pos         1        54


  return f(*args, **kwargs)
