Project Overview:

The aim of this project is to develop a predictive model for diabetes diagnosis using logistic regression.
The dataset used for this project contains various health metrics and personal information of patients,
which will be utilized to predict whether an individual has diabetes

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
#load the dataset:
df=pd.read_csv('diabetes2.csv')
#show the first 4 elements in the file :
print(df.head())
#to encrypt the non nuerical values to numerical or boolean value :
df_enc=pd.get_dummies(df,drop_first=True)

In [None]:
#load the model 
model=LogisticRegression(max_iter=5000)
#prepare and split the data to train and test the model 
X_train,X_test,y_train,y_test=train_test_split(df_enc.drop(columns='Outcome'),df_enc['Outcome'],test_size=0.3,random_state=0)
#fit the model with the test data:
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_pred,y_test)
print(accuracy)
print(f'Model Accuracy: {accuracy:.2f}')

In [None]:
# Example input to test the model
example_input = pd.DataFrame({
    'Pregnancies': [6],
    'Glucose': [148],
    'BloodPressure': [72],
    'SkinThickness': [35],
    'Insulin': [0],
    'BMI': [33.6],
    'DiabetesPedigreeFunction': [0.627],
    'Age': [50]
})

# Make a prediction for the example input
example_prediction = model.predict(example_input)
example_prediction_prob = model.predict_proba(example_input)[:, 1]
print(example_prediction)
print(example_prediction_prob)
