<a href="https://colab.research.google.com/github/deepthi-naresh/diseaseriskprediction/blob/main/ModelDevelopmentandapp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
df=pd.read_csv('/content/diabetesforproject.csv')
X = df.drop("Outcome", axis=1)  # Features
y = df["Outcome"]               # Target variable (0: No Diabetes, 1: Diabetes)

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y  # Maintains class distribution
)

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Initialize models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC()
}

# Train and evaluate
for name, model in models.items():
    model.fit(X_train_scaled, y_train)  # Use X_train if not scaling
    score = model.score(X_test_scaled, y_test)
    print(f"{name} Test Accuracy: {score:.2f}")

Logistic Regression Test Accuracy: 0.71
Random Forest Test Accuracy: 0.75
SVM Test Accuracy: 0.75


In [9]:
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    roc_auc_score
)

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]  # For AUC

    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    print(f"ROC-AUC Score: {roc_auc_score(y_test, y_proba):.2f}")

# Example for Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)  # Use unscaled data for tree-based models
evaluate_model(rf_model,X_test,y_test)

Confusion Matrix:
[[82 18]
 [24 30]]

Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.82      0.80       100
           1       0.62      0.56      0.59        54

    accuracy                           0.73       154
   macro avg       0.70      0.69      0.69       154
weighted avg       0.72      0.73      0.72       154

ROC-AUC Score: 0.80


In [10]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_estimators": [100, 200],
    "max_depth": [10, 20, None],
    "min_samples_split": [2, 5]
}

grid_search = GridSearchCV(
    RandomForestClassifier(),
    param_grid,
    cv=5,
    scoring="accuracy"
)
grid_search.fit(X_train, y_train)

print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Accuracy: {grid_search.best_score_:.2f}")

Best Parameters: {'max_depth': 20, 'min_samples_split': 5, 'n_estimators': 200}
Best Accuracy: 0.78


In [12]:
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)
evaluate_model(best_model,X_test,y_test)

Confusion Matrix:
[[84 16]
 [21 33]]

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.84      0.82       100
           1       0.67      0.61      0.64        54

    accuracy                           0.76       154
   macro avg       0.74      0.73      0.73       154
weighted avg       0.76      0.76      0.76       154

ROC-AUC Score: 0.82


In [14]:
import joblib

# Save the model
joblib.dump(best_model, "diabetes_risk_model.pkl")

# Save the scaler (if applicable)
joblib.dump(scaler,"scaler.pkl")

['scaler.pkl']

In [29]:
!pip install streamlit pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.5-py3-none-any.whl.metadata (8.9 kB)
Downloading pyngrok-7.2.5-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.5


In [30]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib

# Load model (replace with your model path)
model = joblib.load("diabetes_risk_model.pkl")

# App UI
st.title("Disease Risk Prediction 🩺")
st.markdown("---")

# Input form
with st.form("input_form"):
    pregnancies = st.number_input("Pregnancies", 0, 20, 1)
    glucose = st.number_input("Glucose", 0, 300, 100)
    blood_pressure = st.number_input("Blood Pressure", 0, 200, 70)
    skin_thickness = st.number_input("Skin Thickness", 0, 100, 20)
    insulin = st.number_input("Insulin", 0, 1000, 80)
    bmi = st.number_input("BMI", 0.0, 70.0, 25.0)
    diabetes_pedigree = st.number_input("Diabetes Pedigree", 0.0, 3.0, 0.5)
    age = st.number_input("Age", 0, 120, 30)

    submitted = st.form_submit_button("Predict")

if submitted:
    input_data = pd.DataFrame([[pregnancies, glucose, blood_pressure, skin_thickness,
                              insulin, bmi, diabetes_pedigree, age]],
                            columns=["Pregnancies", "Glucose", "BloodPressure", "SkinThickness",
                                     "Insulin", "BMI", "DiabetesPedigreeFunction", "Age"])
    prediction = model.predict(input_data)[0]
    risk = "High Risk 🚨" if prediction == 1 else "Low Risk ✅"
    st.success(f"## Result: *{risk}*")


Writing app.py


In [35]:
!pip install pyngrok
!ngrok authtoken 2wfMNnc5cPxGP60VlbLEkjBweCE_3BJAs5eWnLzavFHSvePg4

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from pyngrok import ngrok

# Terminate old tunnels
ngrok.kill()

# Start a tunnel with TLS binding
public_url = ngrok.connect(addr='8501', proto='http', bind_tls=True)  # Fix: bind_tls=True instead of bind tis-tree
print("Streamlit App URL:", public_url)

# Run Streamlit with corrected flags
!streamlit run app.py --server.port 8501 --server.headless=true  # Fix: headless=true instead of headles=true

Streamlit App URL: NgrokTunnel: "https://a0ba-34-74-39-248.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.74.39.248:8501[0m
[0m
