**SVM**

In [1]:
import numpy as np
import pandas as pd 
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, mean_squared_error, r2_score, roc_auc_score, roc_curve, classification_report
from sklearn.svm import SVC
import warnings
warnings.filterwarnings("ignore")

In [2]:
df=pd.read_pickle("saved_df.pkl")

In [3]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
df.shape

(707, 9)

In [5]:
X=df.drop(["Outcome"], axis=1)
y=df["Outcome"]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20, stratify=y, random_state=42)

**SVM**

In [6]:
X=df.drop(["Outcome"], axis=1)
y=df["Outcome"]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30, stratify=y, random_state=42)

In [7]:
svm_model = SVC().fit(X_train, y_train)

In [8]:
svm_model

SVC()

In [9]:
y_pred = svm_model.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[132,   9],
       [ 37,  35]], dtype=int64)

In [10]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.78      0.94      0.85       141
           1       0.80      0.49      0.60        72

    accuracy                           0.78       213
   macro avg       0.79      0.71      0.73       213
weighted avg       0.79      0.78      0.77       213



**SVM (with scaled data)**

In [11]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
svm_model = SVC().fit(X_train, y_train)

In [13]:
y_pred = svm_model.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[124,  17],
       [ 26,  46]], dtype=int64)

In [14]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.83      0.88      0.85       141
           1       0.73      0.64      0.68        72

    accuracy                           0.80       213
   macro avg       0.78      0.76      0.77       213
weighted avg       0.79      0.80      0.79       213



**model tuning**

In [15]:
svm = SVC()

In [16]:
svm_params = {"kernel": ["linear","rbf", "poly", "sigmoid"]}

In [31]:
svm_cv_model = GridSearchCV(svm, svm_params, cv = 5, n_jobs = -1, verbose = 2).fit(X_train, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:    4.0s finished


In [18]:
svm_cv_model.best_params_

{'kernel': 'rbf'}

In [24]:
# try an other gridsearch

In [19]:
svm = SVC()

In [21]:
svm_params = {"C": [0.1,0.5,1,10,20,50], "gamma":[1,0.1,0.01,0.001,0]}

In [32]:
svm_cv_model = GridSearchCV(svm, svm_params, cv = 5, n_jobs = -1, verbose = 2).fit(X_train, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:    0.3s finished


In [23]:
svm_cv_model.best_params_

{'C': 1, 'gamma': 0.01}

In [35]:
svm_tuned_2 = SVC(C = 1, gamma=0.01).fit(X_train, y_train)

In [36]:
y_pred = svm_tuned_2.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[129,  12],
       [ 32,  40]], dtype=int64)

In [37]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.91      0.85       141
           1       0.77      0.56      0.65        72

    accuracy                           0.79       213
   macro avg       0.79      0.74      0.75       213
weighted avg       0.79      0.79      0.78       213



In [33]:
# third gridsearch

In [27]:
svm = SVC(gamma="auto")

In [28]:
svm_model = SVC().fit(X_train, y_train)

In [29]:
y_pred = svm_model.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[124,  17],
       [ 26,  46]], dtype=int64)

In [30]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.83      0.88      0.85       141
           1       0.73      0.64      0.68        72

    accuracy                           0.80       213
   macro avg       0.78      0.76      0.77       213
weighted avg       0.79      0.80      0.79       213

