In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import seaborn

In [2]:
dataset = pd.read_csv('diabetes.csv')

In [3]:
x = dataset[['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']] 
y = dataset['Outcome']

In [4]:
from sklearn.feature_selection import SelectKBest, chi2

In [5]:
select_feature = SelectKBest(score_func = chi2,k=6)

In [6]:
select_feature.fit(x,y)

SelectKBest(k=6, score_func=<function chi2 at 0x000001C7B420EAF0>)

In [7]:
score_col = pd.DataFrame(select_feature.scores_,columns = ['score_value'])

In [8]:
score_col

Unnamed: 0,score_value
0,111.519691
1,1411.887041
2,17.605373
3,53.10804
4,2175.565273
5,127.669343
6,5.392682
7,181.303689


In [9]:
feature_col  = pd.DataFrame(x.columns)

In [10]:
top_feature = pd.concat([feature_col, score_col], axis=1)

In [11]:
top_feature

Unnamed: 0,0,score_value
0,Pregnancies,111.519691
1,Glucose,1411.887041
2,BloodPressure,17.605373
3,SkinThickness,53.10804
4,Insulin,2175.565273
5,BMI,127.669343
6,DiabetesPedigreeFunction,5.392682
7,Age,181.303689


In [13]:
top_feature.nlargest(5,'score_value')

Unnamed: 0,0,score_value
4,Insulin,2175.565273
1,Glucose,1411.887041
7,Age,181.303689
5,BMI,127.669343
0,Pregnancies,111.519691


In [14]:
dataset.drop('BloodPressure',
  axis='columns', inplace=True)

In [15]:
dataset.drop('DiabetesPedigreeFunction',
  axis='columns', inplace=True)

In [16]:
dataset.drop('SkinThickness',
  axis='columns', inplace=True)

In [17]:
x = dataset[['Pregnancies','Glucose','Insulin','BMI','Age']] 
y = dataset['Outcome']

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size = .30,random_state = 1)

In [20]:
from sklearn.naive_bayes import MultinomialNB 

In [21]:
reg = MultinomialNB()

In [22]:
reg.fit(xtrain,ytrain)

MultinomialNB()

In [23]:
reg.predict(xtest)

array([1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1], dtype=int64)

In [24]:
reg.score(xtest,ytest)

0.5411255411255411

In [25]:
from sklearn.metrics import accuracy_score

In [26]:
predict = reg.predict(xtest)

In [27]:
accuracy_score(ytest,predict)

0.5411255411255411

In [28]:
from sklearn.metrics import classification_report

In [29]:
print(classification_report(ytest,predict))

              precision    recall  f1-score   support

           0       0.64      0.61      0.63       146
           1       0.39      0.42      0.40        85

    accuracy                           0.54       231
   macro avg       0.52      0.52      0.52       231
weighted avg       0.55      0.54      0.54       231



In [30]:
x1train,x1test,y1train,y1test = train_test_split(x,y,test_size = .25,random_state = 1)

In [31]:
reg.fit(x1train,y1train)

MultinomialNB()

In [32]:
predict = reg.predict(x1test)

In [33]:
accuracy_score(y1test,predict)

0.5364583333333334

In [34]:
print(classification_report(y1test,predict))

              precision    recall  f1-score   support

           0       0.65      0.59      0.62       123
           1       0.38      0.43      0.40        69

    accuracy                           0.54       192
   macro avg       0.51      0.51      0.51       192
weighted avg       0.55      0.54      0.54       192



In [35]:
x2train,x2test,y2train,y2test = train_test_split(x,y,test_size = .40,random_state = 1)

In [36]:
predict = reg.predict(x2test)

In [37]:
accuracy_score(y2test,predict)

0.5324675324675324

In [39]:
print(classification_report(y2test,predict))

              precision    recall  f1-score   support

           0       0.65      0.60      0.62       199
           1       0.36      0.41      0.38       109

    accuracy                           0.53       308
   macro avg       0.51      0.51      0.50       308
weighted avg       0.55      0.53      0.54       308



In [40]:
x3train,x3test,y3train,y3test = train_test_split(x,y,test_size = .20,random_state = 1)

In [41]:
predict = reg.predict(x3test)

In [42]:
accuracy_score(y3test,predict)

0.5194805194805194

In [43]:
print(classification_report(y3test,predict))

              precision    recall  f1-score   support

           0       0.64      0.58      0.61        99
           1       0.35      0.42      0.38        55

    accuracy                           0.52       154
   macro avg       0.50      0.50      0.49       154
weighted avg       0.54      0.52      0.53       154

