In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
import seaborn

In [2]:
dataset = pd.read_csv('diabetes.csv')

In [217]:
x = dataset[['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']] 
y = dataset['Outcome']

In [218]:
from sklearn.feature_selection import SelectKBest, chi2

In [219]:
select_feature = SelectKBest(score_func = chi2,k=6)

In [220]:
select_feature.fit(x,y)

SelectKBest(k=6, score_func=<function chi2 at 0x00000214FFA509D0>)

In [221]:
score_col = pd.DataFrame(select_feature.scores_,columns = ['score_value'])

In [222]:
score_col

Unnamed: 0,score_value
0,111.519691
1,1411.887041
2,17.605373
3,53.10804
4,2175.565273
5,127.669343
6,5.392682
7,181.303689


In [223]:
feature_col  = pd.DataFrame(x.columns)

In [224]:
top_feature = pd.concat([feature_col, score_col], axis=1)

In [225]:
top_feature

Unnamed: 0,0,score_value
0,Pregnancies,111.519691
1,Glucose,1411.887041
2,BloodPressure,17.605373
3,SkinThickness,53.10804
4,Insulin,2175.565273
5,BMI,127.669343
6,DiabetesPedigreeFunction,5.392682
7,Age,181.303689


In [226]:
top_feature.nlargest(6,'score_value')

Unnamed: 0,0,score_value
4,Insulin,2175.565273
1,Glucose,1411.887041
7,Age,181.303689
5,BMI,127.669343
0,Pregnancies,111.519691
3,SkinThickness,53.10804


In [227]:
dataset.drop('BloodPressure',
  axis='columns', inplace=True)

In [228]:
dataset

Unnamed: 0,Pregnancies,Glucose,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,35,0,33.6,0.627,50,1
1,1,85,29,0,26.6,0.351,31,0
2,8,183,0,0,23.3,0.672,32,1
3,1,89,23,94,28.1,0.167,21,0
4,0,137,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...
763,10,101,48,180,32.9,0.171,63,0
764,2,122,27,0,36.8,0.340,27,0
765,5,121,23,112,26.2,0.245,30,0
766,1,126,0,0,30.1,0.349,47,1


In [229]:
dataset.drop('DiabetesPedigreeFunction',
  axis='columns', inplace=True)

In [230]:
x = dataset[['Pregnancies','Glucose','SkinThickness','Insulin','BMI','Age']] 
y = dataset['Outcome']

# MinMax scalling

In [231]:
from sklearn.preprocessing import MinMaxScaler

In [232]:
mms = MinMaxScaler(feature_range = (0, 1))

In [233]:
x_min = mms.fit_transform(x)

# standardization

In [234]:
from sklearn.preprocessing import StandardScaler

In [235]:
standard = StandardScaler()

In [236]:
x_stand = standard.fit_transform(x) 

In [237]:
from sklearn.model_selection import train_test_split

In [238]:
xtrain,xtest,ytrain,ytest = train_test_split(x_min,y,test_size = .30,random_state = 1)

In [239]:
from sklearn.svm import SVC

In [240]:
model = SVC()

In [241]:
model.fit(xtrain,ytrain)

SVC()

In [242]:
model.predict(xtest)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0], dtype=int64)

In [244]:
model.score(xtest,ytest)

0.7922077922077922

In [205]:
from sklearn.metrics import accuracy_score

In [206]:
predict = model.predict(xtest)

In [207]:
accuracy_score(ytest,predict)

0.7922077922077922

In [208]:
from sklearn.metrics import confusion_matrix

In [209]:
confusion_matrix(ytest,predict)

array([[133,  13],
       [ 35,  50]], dtype=int64)

In [210]:
from sklearn.metrics import classification_report

In [211]:
print(classification_report(ytest,predict))

              precision    recall  f1-score   support

           0       0.79      0.91      0.85       146
           1       0.79      0.59      0.68        85

    accuracy                           0.79       231
   macro avg       0.79      0.75      0.76       231
weighted avg       0.79      0.79      0.78       231



In [212]:
x1train,x1test,y1train,y1test = train_test_split(x_stand,y,test_size = .25,random_state = 1)

In [213]:
model.fit(x1train,y1train)

SVC()

In [178]:
model.fit(x1train,y1train)

SVC()

In [179]:
predict = model.predict(x1test)

In [180]:
accuracy_score(y1test,predict)

0.7760416666666666

In [181]:
print(classification_report(y1test,predict))

              precision    recall  f1-score   support

           0       0.79      0.88      0.83       123
           1       0.73      0.59      0.66        69

    accuracy                           0.78       192
   macro avg       0.76      0.74      0.74       192
weighted avg       0.77      0.78      0.77       192



In [186]:
x2train,x2test,y2train,y2test = train_test_split(x_stand,y,test_size = .40,random_state = 1)

In [187]:
model.fit(x2train,y2train)

SVC()

In [188]:
predict = model.predict(x2test)

In [189]:
accuracy_score(y2test,predict)

0.7532467532467533

In [190]:
print(classification_report(y2test,predict))

              precision    recall  f1-score   support

           0       0.77      0.88      0.82       199
           1       0.70      0.52      0.60       109

    accuracy                           0.75       308
   macro avg       0.74      0.70      0.71       308
weighted avg       0.75      0.75      0.74       308



In [192]:
x3train,x3test,y3train,y3test = train_test_split(x_stand,y,test_size = .20,random_state = 1)

In [193]:
model.fit(x3train,y3train)

SVC()

In [194]:
predict = model.predict(x3test)

In [195]:
accuracy_score(y3test,predict)

0.7727272727272727

In [196]:
print(classification_report(y3test,predict))

              precision    recall  f1-score   support

           0       0.78      0.90      0.84        99
           1       0.75      0.55      0.63        55

    accuracy                           0.77       154
   macro avg       0.77      0.72      0.73       154
weighted avg       0.77      0.77      0.76       154

