In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import seaborn

In [3]:
dataset = pd.read_csv('diabetes.csv')

In [4]:
x = dataset[['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']] 
y = dataset['Outcome']

In [5]:
from sklearn.feature_selection import SelectKBest, chi2

In [6]:
select_feature = SelectKBest(score_func = chi2,k=6)

In [7]:
select_feature.fit(x,y)

SelectKBest(k=6, score_func=<function chi2 at 0x000002093DB78CA0>)

In [8]:
score_col = pd.DataFrame(select_feature.scores_,columns = ['score_value'])

In [9]:
score_col

Unnamed: 0,score_value
0,111.519691
1,1411.887041
2,17.605373
3,53.10804
4,2175.565273
5,127.669343
6,5.392682
7,181.303689


In [10]:
feature_col  = pd.DataFrame(x.columns)

In [11]:
top_feature = pd.concat([feature_col, score_col], axis=1)

In [12]:
top_feature

Unnamed: 0,0,score_value
0,Pregnancies,111.519691
1,Glucose,1411.887041
2,BloodPressure,17.605373
3,SkinThickness,53.10804
4,Insulin,2175.565273
5,BMI,127.669343
6,DiabetesPedigreeFunction,5.392682
7,Age,181.303689


In [13]:
top_feature.nlargest(6,'score_value')

Unnamed: 0,0,score_value
4,Insulin,2175.565273
1,Glucose,1411.887041
7,Age,181.303689
5,BMI,127.669343
0,Pregnancies,111.519691
3,SkinThickness,53.10804


In [14]:
dataset.drop('BloodPressure',
  axis='columns', inplace=True)

In [15]:
dataset

Unnamed: 0,Pregnancies,Glucose,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,35,0,33.6,0.627,50,1
1,1,85,29,0,26.6,0.351,31,0
2,8,183,0,0,23.3,0.672,32,1
3,1,89,23,94,28.1,0.167,21,0
4,0,137,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...
763,10,101,48,180,32.9,0.171,63,0
764,2,122,27,0,36.8,0.340,27,0
765,5,121,23,112,26.2,0.245,30,0
766,1,126,0,0,30.1,0.349,47,1


In [16]:
dataset.drop('DiabetesPedigreeFunction',
  axis='columns', inplace=True)

In [17]:
x = dataset[['Pregnancies','Glucose','SkinThickness','Insulin','BMI','Age']] 
y = dataset['Outcome']

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size = .30,random_state = 1)

In [20]:
from sklearn.svm import SVC

In [21]:

model = SVC()

In [22]:
model.fit(xtrain,ytrain)

SVC()

In [23]:
model.predict(xtest)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0], dtype=int64)

In [24]:
model.score(xtest,ytest)

0.7705627705627706

In [25]:
from sklearn.metrics import accuracy_score

In [26]:
predict = model.predict(xtest)

In [27]:
accuracy_score(ytest,predict)

0.7705627705627706

In [28]:
from sklearn.metrics import confusion_matrix

In [29]:
confusion_matrix(ytest,predict)

array([[138,   8],
       [ 45,  40]], dtype=int64)

In [30]:
from sklearn.metrics import classification_report

In [31]:
print(classification_report(ytest,predict))

              precision    recall  f1-score   support

           0       0.75      0.95      0.84       146
           1       0.83      0.47      0.60        85

    accuracy                           0.77       231
   macro avg       0.79      0.71      0.72       231
weighted avg       0.78      0.77      0.75       231



In [32]:
x1train,x1test,y1train,y1test = train_test_split(x,y,test_size = .25,random_state = 1)

In [33]:
model.fit(x1train,y1train)

SVC()

In [34]:
model.fit(x1train,y1train)

SVC()

In [35]:
predict = model.predict(x1test)

In [36]:
accuracy_score(y1test,predict)

0.7760416666666666

In [37]:
print(classification_report(y1test,predict))

              precision    recall  f1-score   support

           0       0.77      0.93      0.84       123
           1       0.80      0.51      0.62        69

    accuracy                           0.78       192
   macro avg       0.78      0.72      0.73       192
weighted avg       0.78      0.78      0.76       192



In [38]:
x2train,x2test,y2train,y2test = train_test_split(x,y,test_size = .40,random_state = 1)

In [39]:
model.fit(x2train,y2train)

SVC()

In [40]:
predict = model.predict(x2test)

In [41]:
accuracy_score(y2test,predict)

0.75

In [42]:
print(classification_report(y2test,predict))

              precision    recall  f1-score   support

           0       0.75      0.92      0.83       199
           1       0.76      0.43      0.55       109

    accuracy                           0.75       308
   macro avg       0.75      0.68      0.69       308
weighted avg       0.75      0.75      0.73       308



In [43]:
x3train,x3test,y3train,y3test = train_test_split(x,y,test_size = .20,random_state = 1)

In [44]:
model.fit(x3train,y3train)

SVC()

In [45]:
predict = model.predict(x3test)

In [46]:
accuracy_score(y3test,predict)

0.7857142857142857

In [47]:
print(classification_report(y3test,predict))

              precision    recall  f1-score   support

           0       0.77      0.95      0.85        99
           1       0.84      0.49      0.62        55

    accuracy                           0.79       154
   macro avg       0.81      0.72      0.74       154
weighted avg       0.80      0.79      0.77       154

