In [1]:
import pandas as pd
import numpy as np 

In [2]:
df=pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


### All function checks for zero in array if zero is found it returns false 

In [4]:
df['Pregnancies'].unique().all()

False

In [5]:
df['Glucose'].unique().all()

False

In [6]:
df['BloodPressure'].unique().all()

False

In [7]:
df['SkinThickness'].unique().all()

False

In [8]:
df['Insulin'].unique().all()

False

In [9]:
df['BMI'].unique().all()

False

In [10]:
df['DiabetesPedigreeFunction'].unique().all()

True

In [11]:
df['Age'].unique().all()

True

In [12]:
df['Outcome'].unique()

array([1, 0])

### From the above observation we have found that all the columns have zero values which can't be zero except DiabetesPedigreeFunction(no zero found) , pregnancies(zero means no pregnancy yet) , outcome(target class is zero which is fine ),Age, now we are going to replace all the meaning less zeroes with the mean of the respective column

In [20]:
df['Glucose']

0      148
1       85
2      183
3       89
4      137
      ... 
763    101
764    122
765    121
766    126
767     93
Name: Glucose, Length: 768, dtype: int64

In [22]:
df['Glucose'].mean()

120.89453125

In [25]:
df['Glucose'].replace(0,df['Glucose'].mean(),inplace=True)

In [26]:
df['BloodPressure'].replace(0,df['BloodPressure'].mean(),inplace=True)

In [27]:
df['SkinThickness'].replace(0,df['SkinThickness'].mean(),inplace=True)

In [28]:
df['Insulin'].replace(0,df['Insulin'].mean(),inplace=True)

In [29]:
df['BMI'].replace(0,df['BMI'].mean(),inplace=True)

### scaling the values using sandard scaler 

In [46]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [43]:
x_train,x_test,y_train,y_test=train_test_split(df.drop('Outcome',axis='columns'),df['Outcome'],test_size=0.2,random_state=42)

In [44]:
len(x_train),len(y_train),len(x_test),len(y_test)

(614, 614, 154, 154)

In [45]:
scaler=StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.transform(x_test)

In [47]:
lr=LogisticRegression()

In [49]:
lr.fit(x_train_scaled,y_train)

In [50]:
lr.score(x_test_scaled,y_test)

0.7662337662337663

### Now we are going to try grid search cv 

In [59]:
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [54]:
grid={'penalty':['l1','l2','elasticnet'],
     'C':[1.0,2.0,3.0],
     'solver':['lbfgs','liblinear','newton-cg','newton-cholesky','sag','saga'],
     'max_iter':[100,200,300]}

In [56]:
gscv=GridSearchCV(lr,grid,scoring='accuracy',cv=5,verbose=1,return_train_score=True)

In [60]:
gscv.fit(x_train_scaled,y_train)

Fitting 5 folds for each of 162 candidates, totalling 810 fits


In [61]:
gscv.best_params_

{'C': 1.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'}

In [62]:
gscv.best_score_

0.7703985072637611

In [68]:
pd.DataFrame(gscv.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_max_iter,param_penalty,param_solver,params,split0_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,0.001655,0.001666,0.000000,0.000000,1.0,100,l1,lbfgs,"{'C': 1.0, 'max_iter': 100, 'penalty': 'l1', '...",,...,,,73,,,,,,,
1,0.001685,0.000074,0.000589,0.000036,1.0,100,l1,liblinear,"{'C': 1.0, 'max_iter': 100, 'penalty': 'l1', '...",0.739837,...,0.767133,0.032138,64,0.782077,0.761711,0.784114,0.782077,0.776423,0.77728,0.008196
2,0.000602,0.000011,0.000000,0.000000,1.0,100,l1,newton-cg,"{'C': 1.0, 'max_iter': 100, 'penalty': 'l1', '...",,...,,,73,,,,,,,
3,0.000600,0.000013,0.000000,0.000000,1.0,100,l1,newton-cholesky,"{'C': 1.0, 'max_iter': 100, 'penalty': 'l1', '...",,...,,,73,,,,,,,
4,0.000586,0.000001,0.000000,0.000000,1.0,100,l1,sag,"{'C': 1.0, 'max_iter': 100, 'penalty': 'l1', '...",,...,,,73,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,0.000592,0.000008,0.000000,0.000000,3.0,300,elasticnet,liblinear,"{'C': 3.0, 'max_iter': 300, 'penalty': 'elasti...",,...,,,73,,,,,,,
158,0.000588,0.000007,0.000000,0.000000,3.0,300,elasticnet,newton-cg,"{'C': 3.0, 'max_iter': 300, 'penalty': 'elasti...",,...,,,73,,,,,,,
159,0.000590,0.000011,0.000000,0.000000,3.0,300,elasticnet,newton-cholesky,"{'C': 3.0, 'max_iter': 300, 'penalty': 'elasti...",,...,,,73,,,,,,,
160,0.000583,0.000006,0.000000,0.000000,3.0,300,elasticnet,sag,"{'C': 3.0, 'max_iter': 300, 'penalty': 'elasti...",,...,,,73,,,,,,,


In [64]:
import pickle 

In [70]:
pickle.dump(gscv,open('logistic_model.pkl','wb'))
pickle.dump(scaler,open('scaler.pkl','wb'))

In [2]:
pip install flask

Collecting flask
  Downloading flask-3.0.0-py3-none-any.whl (99 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.7/99.7 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Collecting blinker>=1.6.2
  Downloading blinker-1.6.2-py3-none-any.whl (13 kB)
Collecting Werkzeug>=3.0.0
  Downloading werkzeug-3.0.0-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.6/226.6 kB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
Collecting itsdangerous>=2.1.2
  Downloading itsdangerous-2.1.2-py3-none-any.whl (15 kB)
Installing collected packages: Werkzeug, itsdangerous, blinker, flask
  Attempting uninstall: blinker
    Found existing installation: blinker 1.5
    Uninstalling blinker-1.5:
      Successfully uninstalled blinker-1.5
Successfully installed Werkzeug-3.0.0 blinker-1.6.2 flask-3.0.0 itsdangerous-2.1.2
Note: you may need to restart the kernel to use updated packages.


In [None]:
from flask import Flask,render_template,request 
import pickle 
import sklearn
import pandas as pd

scaler=pickle.load(open('scaler.pkl','rb'))
model=pickle.load(open('logistic_model.pkl','rb'))

app=Flask(__name__)

@app.route('/')
def homepage():
    return render_template('index.html')

@app.route('/predict',methods=['POST','GET'])
def predict_datapoint():
    result='' # given this here other wise it gives local and global variable error 'mentioned before assignment'
    if request.method=='POST':
        pregnancies=float(request.form.get('Pregnancies'))
        glucose=float(request.form.get('Glucose'))
        bloodpressure=float(request.form.get('BloodPressure'))     
        skinthickness=float(request.form.get('SkinThickness'))
        insulin=float(request.form.get('Insulin'))
        bmi=float(request.form.get('BMI'))
        diabetespedigreefunction=float(request.form.get('DiabetesPedigreeFunction'))
        Age=float(request.form.get('Age'))
        input_params=pd.DataFrame([pregnancies,glucose,bloodpressure,skinthickness,insulin,bmi,diabetespedigreefunction,Age]).T
        scaler_params=scaler.transform(input_params)
        pred=model.predict(scaler_params)
        if pred[0]==0:
            result='Not a diabetic'
        elif pred[0]==1:
            result='Diabetic'
    return render_template('home.html',result=result)
if __name__=='__main__':
    app.run(host='0.0.0.0')
        
                      
                      

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.18.0.10:5000
Press CTRL+C to quit
172.18.0.45 - - [06/Oct/2023 20:50:17] "GET /predict HTTP/1.1" 200 -
172.18.0.45 - - [06/Oct/2023 20:50:47] "POST /predict HTTP/1.1" 200 -
