In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
import seaborn

In [7]:
dataset = pd.read_csv('diabetes.csv')

In [3]:
x = dataset[['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']] 
y = dataset['Outcome']

In [120]:
from sklearn.feature_selection import mutual_info_classif

In [121]:
mutual_info = mutual_info_classif(x, y)

In [122]:
mutual_info

array([0.04647074, 0.11354051, 0.00630194, 0.06820315, 0.08859411,
       0.06511412])

In [123]:
from sklearn.feature_selection import SelectKBest

In [124]:
select_feature = SelectKBest(mutual_info_classif,k=6)

In [126]:
select_feature.fit(x,y)

SelectKBest(k=6,
            score_func=<function mutual_info_classif at 0x000001E76BC0F700>)

In [127]:
score_col = pd.DataFrame(select_feature.scores_,columns = ['score_value'])

In [128]:
score_col

Unnamed: 0,score_value
0,0.031507
1,0.127673
2,0.040437
3,0.034565
4,0.097429
5,0.053465


In [129]:
feature_col  = pd.DataFrame(x.columns)

In [130]:
top_feature = pd.concat([feature_col, score_col], axis=1)

In [131]:
top_feature.nlargest(6,'score_value')

Unnamed: 0,0,score_value
1,Glucose,0.127673
4,BMI,0.097429
5,Age,0.053465
2,SkinThickness,0.040437
3,Insulin,0.034565
0,Pregnancies,0.031507


In [133]:
dataset

Unnamed: 0,Pregnancies,Glucose,SkinThickness,Insulin,BMI,Age,Outcome
0,6,148,35,0,33.6,50,1
1,1,85,29,0,26.6,31,0
2,8,183,0,0,23.3,32,1
3,1,89,23,94,28.1,21,0
4,0,137,35,168,43.1,33,1
...,...,...,...,...,...,...,...
763,10,101,48,180,32.9,63,0
764,2,122,27,0,36.8,27,0
765,5,121,23,112,26.2,30,0
766,1,126,0,0,30.1,47,1


In [134]:
x = dataset[['Pregnancies','Glucose','SkinThickness','Insulin','BMI','Age']] 
y = dataset['Outcome']

# Feature Scalling

In [135]:
from sklearn.preprocessing import MinMaxScaler

In [136]:
mms = MinMaxScaler(feature_range = (0, 1))

In [137]:
x_min = mms.fit_transform(x)

In [138]:
from sklearn.preprocessing import StandardScaler

In [139]:
standard = StandardScaler()

In [162]:
x_stand = standard.fit_transform(x)

In [163]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
xtrain,xtest,ytrain,ytest = train_test_split(x_stand,y,test_size = .30,random_state = 1)

In [164]:
from sklearn.svm import SVC

In [165]:
model = SVC()

In [166]:
model.fit(xtrain,ytrain)

SVC()

In [167]:
model.predict(xtest)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0], dtype=int64)

In [168]:
model.score(xtest,ytest)

0.7878787878787878

In [169]:
from sklearn.metrics import accuracy_score

In [170]:
predict = model.predict(xtest)

In [171]:
accuracy_score(ytest,predict)

0.7878787878787878

In [172]:
from sklearn.metrics import confusion_matrix

In [173]:
confusion_matrix(ytest,predict)

array([[132,  14],
       [ 35,  50]], dtype=int64)

In [174]:
from sklearn.metrics import classification_report

In [175]:
print(classification_report(ytest,predict))

              precision    recall  f1-score   support

           0       0.79      0.90      0.84       146
           1       0.78      0.59      0.67        85

    accuracy                           0.79       231
   macro avg       0.79      0.75      0.76       231
weighted avg       0.79      0.79      0.78       231



In [176]:
x1train,x1test,y1train,y1test = train_test_split(x_stand,y,test_size = .25,random_state = 1)

In [177]:
model.fit(x1train,y1train)

SVC()

In [178]:
model.fit(x1train,y1train)

SVC()

In [179]:
predict = model.predict(x1test)

In [180]:
accuracy_score(y1test,predict)

0.7760416666666666

In [181]:
print(classification_report(y1test,predict))

              precision    recall  f1-score   support

           0       0.79      0.88      0.83       123
           1       0.73      0.59      0.66        69

    accuracy                           0.78       192
   macro avg       0.76      0.74      0.74       192
weighted avg       0.77      0.78      0.77       192



In [182]:
x2train,x2test,y2train,y2test = train_test_split(x_stand,y,test_size = .40,random_state = 1)

In [183]:
model.fit(x2train,y2train)

SVC()

In [184]:
predict = model.predict(x2test)

In [185]:
accuracy_score(y2test,predict)

0.7532467532467533