In [1]:
# Step 1: Dependencies

import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Step 2: Get the data

df = pd.read_csv('resources/diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
# Step 3: Assign the X (features and data) and y (target and outcome)

target = df['Outcome']
target_names = ['negative', 'positive']

In [4]:
data = df.drop("Outcome", axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [5]:
# Step 4: employ train_test_split to get training and testing data

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [8]:
# Step 5: Create the SVM Model

from sklearn.svm import SVC

model = SVC(kernel='linear')
model

SVC(kernel='linear')

In [10]:
# Step 6: Create the GridSearch parameters and grid

from sklearn.model_selection import GridSearchCV

# Remember that these parameters can be changed, if its not as 'strong' as you would like, come
# back here to change the parameters
param_grid = {'C': [1, 5, 10],
             'gamma': [0.0001, 0.001, 0.01]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [11]:
# Step 7: Fit the training data to the hypertuned model 

grid.fit(X_train, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ................... C=1, gamma=0.0001, score=0.793, total=   4.8s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.8s remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.757, total=   4.6s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    9.4s remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.809, total=   1.8s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.722, total=   1.7s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.765, total=   2.6s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.793, total=   6.1s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.757, total=   5.0s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.809, total=   1.5s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.722, total=   1.6s
[CV] C=1, gamma=0.001 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:  9.6min finished


GridSearchCV(estimator=SVC(kernel='linear'),
             param_grid={'C': [1, 5, 10], 'gamma': [0.0001, 0.001, 0.01]},
             verbose=3)

In [12]:
# Step 8: print out the gridsearch training details

print(grid.best_params_)
print(grid.best_score_)

{'C': 1, 'gamma': 0.0001}
0.7690554722638681


In [13]:
# Step 9: Make predictions using the hypertuned model

predictions = grid.predict(X_test)

In [15]:
# Step 10: Get the classification report

from sklearn.metrics import classification_report

print(classification_report(y_test, predictions, target_names = target_names))

              precision    recall  f1-score   support

    negative       0.79      0.78      0.79       123
    positive       0.62      0.64      0.63        69

    accuracy                           0.73       192
   macro avg       0.71      0.71      0.71       192
weighted avg       0.73      0.73      0.73       192



<strong>Predictions</strong>
* Out of all claims that the results are negative, the model was 79% correct  
* Out of all claims that the results are positive, the model was 62% correct  

<strong>Recall</strong>
* Out of all the negative results, the model captured 78%
* Out of all the positive results, the model captured 64%  

<strong>F1_score</strong>
* the model was overall 79% accurate when predicting negative results
* the model was overall 63% accurate when predicting positive results

<strong>Overall:</strong>
* The hypertuned model was 73% 'accurate'