<div style="padding:30px 0px;">
    <h1 align="center" style="padding:50px">Tuning Models for Precision</h1>
    <p align="center" style="font-size:small;">Seth Pruitt<br>spruitt@norstal.com<br>www.github.com/faradical</p>
</div>

## Importing Dependencies

In [39]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, precision_score
from sklearn.svm import SVC
from sklearn.metrics import classification_report

## Preparing our Data

In [40]:
# Reading in our data
df = pd.read_csv(os.path.join("Resources", "diabetes.csv"))
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [41]:
# Preparing our target
target = df["Outcome"]
target_names = ["negative", "positive"]

# Remove target from X data
data = df.drop("Outcome", axis=1)
feature_names = data.columns

# Split the data into traing and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)
X_train.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
357,13,129,0,30,0,39.9,0.569,44
73,4,129,86,20,270,35.1,0.231,23
352,3,61,82,28,0,34.4,0.243,46
497,2,81,72,15,76,30.1,0.547,25
145,0,102,75,23,0,0.0,0.572,21


In [42]:
# Fitting the scaler to our X train data
scaler = StandardScaler()
scaler.fit(X_train)

# Scaling our X data with the fitted scaler
X_trainScaled = scaler.transform(X_train)
X_testScaled = scaler.transform(X_test)

# Print the Scaled X_train data as a dataframe for our viewing pleasure (It is originally a nupy array)
pd.DataFrame(X_trainScaled)

Unnamed: 0,0,1,2,3,4,5,6,7
0,2.803468,0.259779,-3.780779,0.616770,-0.692052,1.039740,0.296085,0.963521
1,0.078327,0.259779,0.897245,-0.032106,1.633077,0.409454,-0.700876,-0.862956
2,-0.224467,-1.858253,0.679662,0.486995,-0.692052,0.317537,-0.665480,1.137471
3,-0.527260,-1.235302,0.135706,-0.356544,-0.037571,-0.247095,0.231195,-0.689006
4,-1.132847,-0.581204,0.298893,0.162557,-0.692052,-4.199517,0.304934,-1.036906
...,...,...,...,...,...,...,...,...
571,0.381120,0.571254,-0.299459,0.941208,0.513571,-0.444059,-0.169950,-0.602031
572,-0.830054,-0.768089,2.855487,-1.329858,-0.692052,-1.258179,-0.771666,-0.515056
573,1.895088,-0.612352,0.897245,1.070984,-0.692052,1.788206,1.968502,0.441670
574,-1.132847,0.633549,-3.780779,-1.329858,-0.692052,1.368015,-0.777565,-0.341105


## Preparing the SVC Model with GridSearchCV

[SVC Documentation](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)

In [43]:
# Create a dictionary of hyperparameters to search over
param_grid = {
    'gamma': [0.0001, 0.0005, 0.001, 0.005],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': list(np.arange(0.01, 1, 0.01))
}

# Create a custom scoring function based on precision
scorer = make_scorer(precision_score)

# Create a GridSearchCV object with precision scoring
grid_search = GridSearchCV(SVC(), param_grid, scoring=scorer)

## Training the model

In [44]:
# Fit the GridSearchCV object to the data
import warnings
with warnings.catch_warnings(): # Ignoring zero division warnings
    warnings.filterwarnings("ignore")
    grid_search.fit(X_trainScaled, y_train)

## Evaluating the results

In [45]:
# Print the best hyperparameters and precision score
print("Best parameters:", grid_search.best_params_)
print("Best precision score:", grid_search.best_score_)

Best parameters: {'C': 0.45, 'gamma': 0.005, 'kernel': 'rbf'}
Best precision score: 0.8260784313725489


In [46]:
# Printing the classification report
predictions = grid_search.predict(X_testScaled)
print(classification_report(y_test, predictions, target_names=target_names))

              precision    recall  f1-score   support

    negative       0.76      0.91      0.83       123
    positive       0.75      0.48      0.58        69

    accuracy                           0.76       192
   macro avg       0.75      0.69      0.71       192
weighted avg       0.75      0.76      0.74       192

