In [1]:
pip install streamlit pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install dash plotly pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [3]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

In [4]:
# Load the data
df = pd.read_csv("C:/Users/ankuv/Desktop/DAB/Semester 4/DAB 304/DIABETIES - PROJECT/diabetes.csv")

# Prepare the data for modeling
X = df.drop('Outcome', axis=1)
y = df['Outcome']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Create the Dash app
app = dash.Dash(__name__)


In [5]:
app.layout = html.Div([
    html.H1("Diabetes Prediction Dashboard"),
    
    html.Div([
        html.Div([
            dcc.Graph(id='age-bmi-scatter')
        ], style={'width': '50%', 'display': 'inline-block'}),
        
        html.Div([
            dcc.Graph(id='glucose-insulin-scatter')
        ], style={'width': '50%', 'display': 'inline-block'})
    ]),
    
    html.Div([
        html.Div([
            dcc.Graph(id='outcome-distribution')
        ], style={'width': '50%', 'display': 'inline-block'}),
        
        html.Div([
            dcc.Graph(id='feature-importance')
        ], style={'width': '50%', 'display': 'inline-block'})
    ]),
    
    html.Div([
        html.H3("Predict Diabetes"),
        html.Div([
            html.Label("Pregnancies"),
            dcc.Input(id='pregnancies-input', type='number', value=0),
            html.Label("Glucose"),
            dcc.Input(id='glucose-input', type='number', value=0),
            html.Label("Blood Pressure"),
            dcc.Input(id='blood-pressure-input', type='number', value=0),
            html.Label("Skin Thickness"),
            dcc.Input(id='skin-thickness-input', type='number', value=0),
            html.Label("Insulin"),
            dcc.Input(id='insulin-input', type='number', value=0),
            html.Label("BMI"),
            dcc.Input(id='bmi-input', type='number', value=0),
            html.Label("Diabetes Pedigree Function"),
            dcc.Input(id='dpf-input', type='number', value=0),
            html.Label("Age"),
            dcc.Input(id='age-input', type='number', value=0),
        ]),
        html.Button('Predict', id='predict-button', n_clicks=0),
        html.Div(id='prediction-output')
    ])
])

In [6]:
@app.callback(
    Output('age-bmi-scatter', 'figure'),
    Input('age-bmi-scatter', 'relayoutData')
)
def update_age_bmi_scatter(relayoutData):
    fig = px.scatter(df, x='Age', y='BMI', color='Outcome', title='Age vs BMI')
    return fig

@app.callback(
    Output('glucose-insulin-scatter', 'figure'),
    Input('glucose-insulin-scatter', 'relayoutData')
)
def update_glucose_insulin_scatter(relayoutData):
    fig = px.scatter(df, x='Glucose', y='Insulin', color='Outcome', title='Glucose vs Insulin')
    return fig

@app.callback(
    Output('outcome-distribution', 'figure'),
    Input('outcome-distribution', 'relayoutData')
)
def update_outcome_distribution(relayoutData):
    fig = px.histogram(df, x='Outcome', title='Outcome Distribution')
    return fig

@app.callback(
    Output('feature-importance', 'figure'),
    Input('feature-importance', 'relayoutData')
)
def update_feature_importance(relayoutData):
    importance = rf_model.feature_importances_
    feature_importance = pd.DataFrame({'feature': X.columns, 'importance': importance})
    feature_importance = feature_importance.sort_values('importance', ascending=False)
    fig = px.bar(feature_importance, x='feature', y='importance', title='Feature Importance')
    return fig

@app.callback(
    Output('prediction-output', 'children'),
    Input('predict-button', 'n_clicks'),
    Input('pregnancies-input', 'value'),
    Input('glucose-input', 'value'),
    Input('blood-pressure-input', 'value'),
    Input('skin-thickness-input', 'value'),
    Input('insulin-input', 'value'),
    Input('bmi-input', 'value'),
    Input('dpf-input', 'value'),
    Input('age-input', 'value')
)
def predict_diabetes(n_clicks, pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, dpf, age):
    if n_clicks > 0:
        input_data = [[pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, dpf, age]]
        input_data_scaled = scaler.transform(input_data)
        prediction = rf_model.predict(input_data_scaled)
        probability = rf_model.predict_proba(input_data_scaled)[0][1]
        
        if prediction[0] == 1:
            return f"The model predicts that the patient has diabetes with a probability of {probability:.2f}"
        else:
            return f"The model predicts that the patient does not have diabetes with a probability of {1-probability:.2f}"
    return ""

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)