In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn import svm

In [None]:
# old performances: Decision Tree ---> 0.649   Random Forest-----> 0.86

In [3]:
data = pd.read_csv("diabetes.csv")
data.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [4]:
feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age','Glucose','BloodPressure','DiabetesPedigreeFunction']
x = data[feature_cols]
y = data.Outcome

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3 ,random_state=1)

In [6]:
svcf = svm.SVC(kernel="linear", probability=True)
svcf.fit(x_train, y_train)
y_pred = svcf.predict(x_test)
y_pred_prob = svcf.predict_proba(x_test)[:,1]
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Auc Score:", roc_auc_score(y_test, y_pred))
print("Auc Score Prob:", roc_auc_score(y_test, y_pred_prob))
print("Recall", recall_score(y_test, y_pred))
print("Precission:", precision_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

[[133  13]
 [ 37  48]]
Accuracy: 0.7835497835497836
Auc Score: 0.7378323932312651
Auc Score Prob: 0.865511684125705
Recall 0.5647058823529412
Precission: 0.7868852459016393
F1 Score: 0.6575342465753424


In [7]:
svcf = svm.SVC(kernel="linear", C=0.1, probability=True)
svcf.fit(x_train, y_train)
y_pred = svcf.predict(x_test)
y_pred_prob = svcf.predict_proba(x_test)[:,1]
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Auc Score:", roc_auc_score(y_test, y_pred))
print("Auc Score Prob:", roc_auc_score(y_test, y_pred_prob))
print("Recall", recall_score(y_test, y_pred))
print("Precission:", precision_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

[[133  13]
 [ 36  49]]
Accuracy: 0.7878787878787878
Auc Score: 0.7437147461724416
Auc Score Prob: 0.8642224012892828
Recall 0.5764705882352941
Precission: 0.7903225806451613
F1 Score: 0.6666666666666666


In [8]:
svcf = svm.SVC(kernel="linear", C=10, probability=True)
svcf.fit(x_train, y_train)
y_pred = svcf.predict(x_test)
y_pred_prob = svcf.predict_proba(x_test)[:,1]
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Auc Score:", roc_auc_score(y_test, y_pred))
print("Auc Score Prob:", roc_auc_score(y_test, y_pred_prob))
print("Recall", recall_score(y_test, y_pred))
print("Precission:", precision_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

[[133  13]
 [ 36  49]]
Accuracy: 0.7878787878787878
Auc Score: 0.7437147461724416
Auc Score Prob: 0.8662369057211925
Recall 0.5764705882352941
Precission: 0.7903225806451613
F1 Score: 0.6666666666666666


In [9]:
svcf = svm.SVC(kernel="poly", probability=True)
svcf.fit(x_train, y_train)
y_pred = svcf.predict(x_test)
y_pred_prob = svcf.predict_proba(x_test)[:,1]
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Auc Score:", roc_auc_score(y_test, y_pred))
print("Auc Score Prob:", roc_auc_score(y_test, y_pred_prob))
print("Recall", recall_score(y_test, y_pred))
print("Precission:", precision_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

[[140   6]
 [ 43  42]]
Accuracy: 0.7878787878787878
Auc Score: 0.7265108783239324
Auc Score Prob: 0.8586623690572119
Recall 0.49411764705882355
Precission: 0.875
F1 Score: 0.631578947368421


In [10]:
svcf = svm.SVC(kernel="poly", degree=2, probability=True)
svcf.fit(x_train, y_train)
y_pred = svcf.predict(x_test)
y_pred_prob = svcf.predict_proba(x_test)[:,1]
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Auc Score:", roc_auc_score(y_test, y_pred))
print("Auc Score Prob:", roc_auc_score(y_test, y_pred_prob))
print("Recall", recall_score(y_test, y_pred))
print("Precission:", precision_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

[[137   9]
 [ 41  44]]
Accuracy: 0.7835497835497836
Auc Score: 0.7280016116035455
Auc Score Prob: 0.8507252215954875
Recall 0.5176470588235295
Precission: 0.8301886792452831
F1 Score: 0.6376811594202898


In [11]:
svcf = svm.SVC(kernel="linear", degree=5, probability=True)
svcf.fit(x_train, y_train)
y_pred = svcf.predict(x_test)
y_pred_prob = svcf.predict_proba(x_test)[:,1]
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Auc Score:", roc_auc_score(y_test, y_pred))
print("Auc Score Prob:", roc_auc_score(y_test, y_pred_prob))
print("Recall", recall_score(y_test, y_pred))
print("Precission:", precision_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

[[133  13]
 [ 37  48]]
Accuracy: 0.7835497835497836
Auc Score: 0.7378323932312651
Auc Score Prob: 0.865511684125705
Recall 0.5647058823529412
Precission: 0.7868852459016393
F1 Score: 0.6575342465753424


In [12]:
svcf = svm.SVC(kernel="rbf", probability=True)
svcf.fit(x_train, y_train)
y_pred = svcf.predict(x_test)
y_pred_prob = svcf.predict_proba(x_test)[:,1]
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Auc Score:", roc_auc_score(y_test, y_pred))
print("Auc Score Prob:", roc_auc_score(y_test, y_pred_prob))
print("Recall", recall_score(y_test, y_pred))
print("Precission:", precision_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

[[138   8]
 [ 46  39]]
Accuracy: 0.7662337662337663
Auc Score: 0.7020145044319097
Auc Score Prob: 0.8527800161160354
Recall 0.4588235294117647
Precission: 0.8297872340425532
F1 Score: 0.5909090909090908
