In [1]:
# installing libraries
!pip install -q scikit-learn pandas numpy

In [44]:
# importing required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import json

In [48]:
# loading cancer dataset
from sklearn.datasets import load_breast_cancer

cancer_data = load_breast_cancer()
df_cancer = pd.DataFrame(cancer_data.data, columns=cancer_data.feature_names)
df_cancer["target"] = cancer_data.target

X_cancer = df_cancer.drop(columns=["target"])
y_cancer = df_cancer["target"]

# spliting dataset into training and testing set
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_cancer, y_cancer, test_size=0.2, random_state=42)

# training model
cancer_model = RandomForestClassifier(random_state=42)
cancer_model.fit(X_train_c, y_train_c)

In [49]:
# loading diabetes dataset
url_diabetes = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns_diabetes = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin",
                    "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"]

df_diabetes = pd.read_csv(url_diabetes, header=None, names=columns_diabetes)

X_diabetes = df_diabetes.drop(columns=["Outcome"])
y_diabetes = df_diabetes["Outcome"]

# spliting dataset into training and testing set
X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(X_diabetes, y_diabetes, test_size=0.2, random_state=42)

# training model
diabetes_model = RandomForestClassifier(random_state=42)
diabetes_model.fit(X_train_d, y_train_d)

In [50]:
# loading heart disease dataset
url_heart = "https://raw.githubusercontent.com/sharmaroshan/Heart-UCI-Dataset/refs/heads/master/heart.csv"
df_heart = pd.read_csv(url_heart)

X_heart = df_heart.drop(columns=["target"])
y_heart = df_heart["target"]

# spliting dataset into training and testing set
X_train_h, X_test_h, y_train_h, y_test_h = train_test_split(X_heart, y_heart, test_size=0.2, random_state=42)

# training model
heart_model = RandomForestClassifier(random_state=42)
heart_model.fit(X_train_h, y_train_h)

In [51]:
# pridicts heart risk
def predict_heart_risk(heart_input):
    input_df = pd.DataFrame([heart_input], columns=X_heart.columns)
    heart_prob = heart_model.predict_proba(input_df)[0][1] * 100
    return round(heart_prob, 2)


In [59]:
def predict_all_from_user_input(user_input):
    # Diabetes model input
    diabetes_input = [
        user_input["pregnancies"],
        user_input["glucose"],
        user_input["blood_pressure"],
        user_input["skin_thickness"],
        user_input["insulin"],
        user_input["bmi"],
        user_input["diabetes_pedigree_function"],
        user_input["age"]
    ]

    # Heart disease model input
    heart_input = [
        user_input["age"],
        1 if user_input["gender"].lower() == "male" else 0,
        user_input["chest_pain_type"],
        user_input["resting_bp"],
        user_input["cholesterol"],
        user_input["fasting_blood_sugar"],
        user_input["rest_ecg"],
        user_input["max_heart_rate"],
        user_input["exercise_induced_angina"],
        user_input["oldpeak"],
        user_input["slope"],
        user_input["num_major_vessels"],
        user_input["thal"]
    ]

    cancer_input = X_cancer.iloc[0].tolist()

    # Running predictions
    diabetes_prob = diabetes_model.predict_proba(pd.DataFrame([diabetes_input], columns=X_diabetes.columns))[0][1] * 100
    cancer_prob = cancer_model.predict_proba(pd.DataFrame([cancer_input], columns=X_cancer.columns))[0][1] * 100
    heart_prob = predict_heart_risk(heart_input)

    return json.dumps({
        "diabetes_risk": round(diabetes_prob, 2),
        "cancer_risk": round(cancer_prob, 2),
        "heart_disease_risk": round(heart_prob, 2)
    }, indent=2)


In [60]:
user_input = {
    "age": 50,
    "gender": "male",
    "pregnancies": 0,
    "glucose": 150,
    "blood_pressure": 85,
    "skin_thickness": 20,
    "insulin": 90,
    "bmi": 27.5,
    "diabetes_pedigree_function": 0.6,
    "cholesterol": 250,
    "resting_bp": 130,
    "chest_pain_type": 0,
    "fasting_blood_sugar": 0,
    "rest_ecg": 1,
    "max_heart_rate": 140,
    "exercise_induced_angina": 0,
    "oldpeak": 1.2,
    "slope": 1,
    "num_major_vessels": 0,
    "thal": 2
}

print(predict_all_from_user_input(user_input))


{
  "diabetes_risk": 63.0,
  "cancer_risk": 4.0,
  "heart_disease_risk": 70.0
}


# Testing Models

In [61]:
from sklearn.metrics import classification_report, accuracy_score

# Cancer model evaluation
print("Cancer Model Performance:")
y_pred_cancer = cancer_model.predict(X_test_c)
print(classification_report(y_test_c, y_pred_cancer))
print("Accuracy:", round(accuracy_score(y_test_c, y_pred_cancer) * 100, 2), "%\n")

# Diabetes model evaluation
print("Diabetes Model Performance:")
y_pred_diabetes = diabetes_model.predict(X_test_d)
print(classification_report(y_test_d, y_pred_diabetes))
print("Accuracy:", round(accuracy_score(y_test_d, y_pred_diabetes) * 100, 2), "%\n")

# Heart Disease model evaluation
print("Heart Disease Model Performance:")
y_pred_heart = heart_model.predict(X_test_h)
print(classification_report(y_test_h, y_pred_heart))
print("Accuracy:", round(accuracy_score(y_test_h, y_pred_heart) * 100, 2), "%")


Cancer Model Performance:
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

Accuracy: 96.49 %

Diabetes Model Performance:
              precision    recall  f1-score   support

           0       0.79      0.78      0.78        99
           1       0.61      0.62      0.61        55

    accuracy                           0.72       154
   macro avg       0.70      0.70      0.70       154
weighted avg       0.72      0.72      0.72       154

Accuracy: 72.08 %

Heart Disease Model Performance:
              precision    recall  f1-score   support

           0       0.83      0.83      0.83        29
           1       0.84      0.84      0.84        32

    accuracy                           0.84        61
   