In [22]:
pip install pandas scikit-learn xgboost joblib




In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
import joblib

In [24]:
df = pd.read_csv("C:\\Users\\VARSHA\\Downloads\\TANCAM DATASET.csv") 

In [25]:
df = df.drop(columns=['S No', 'NAME'])

label_encoders = {}
categorical_cols = ['Sex', 'Blood Group', 'Smoking', 'Alcohol Consumption', 
                    'Family History', 'Diet', 'Obesity']


for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

In [26]:
def split_bp(bp):
    try:
        systolic, diastolic = bp.split('/')
        return pd.Series({'Systolic_BP': int(systolic), 'Diastolic_BP': int(diastolic)})
    except:
        return pd.Series({'Systolic_BP': None, 'Diastolic_BP': None})

bp_split = df['Blood Pressure'].apply(split_bp)
df = df.drop(columns=['Blood Pressure'])
df = pd.concat([df, bp_split], axis=1)

df = df.fillna(df.median(numeric_only=True))

feature_cols = df.columns.difference(['Diabetes', 'Heart Attack Risk'])

X = df[feature_cols]
y_diabetes = df['Diabetes']
y_heart_risk = df['Heart Attack Risk']

In [27]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [28]:
X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(X_scaled, y_diabetes, test_size=0.2, random_state=42)
diabetes_model = XGBClassifier(eval_metric='logloss')
diabetes_model.fit(X_train_d, y_train_d)

In [29]:
X_train_h, X_test_h, y_train_h, y_test_h = train_test_split(X_scaled, y_heart_risk, test_size=0.2, random_state=42)
heart_model = XGBClassifier(eval_metric='logloss')
heart_model.fit(X_train_h, y_train_h)

In [30]:
print("Diabetes Model Report:")
print(classification_report(y_test_d, diabetes_model.predict(X_test_d)))

Diabetes Model Report:
              precision    recall  f1-score   support

           0       0.37      0.18      0.25       119
           1       0.67      0.84      0.74       232

    accuracy                           0.62       351
   macro avg       0.52      0.51      0.49       351
weighted avg       0.56      0.62      0.57       351



In [31]:
print("\nHeart Attack Risk Model Report:")
print(classification_report(y_test_h, heart_model.predict(X_test_h)))


Heart Attack Risk Model Report:
              precision    recall  f1-score   support

           0       0.45      0.42      0.44       159
           1       0.55      0.58      0.56       192

    accuracy                           0.51       351
   macro avg       0.50      0.50      0.50       351
weighted avg       0.50      0.51      0.51       351



In [32]:
joblib.dump(diabetes_model, 'diabetes_model.pkl')
joblib.dump(heart_model, 'heart_attack_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')

['label_encoders.pkl']

In [33]:
def predict_patient_status(patient_data_dict):
    import numpy as np
    
    diabetes_model = joblib.load('diabetes_model.pkl')
    heart_model = joblib.load('heart_attack_model.pkl')
    scaler = joblib.load('scaler.pkl')
    label_encoders = joblib.load('label_encoders.pkl')

    df_new = pd.DataFrame([patient_data_dict])

    for col, le in label_encoders.items():
        if col in df_new:
            df_new[col] = le.transform([df_new[col][0]]) 

    df_new = df_new[scaler.feature_names_in_]

    X_new_scaled = scaler.transform(df_new)

    diabetes_pred = diabetes_model.predict(X_new_scaled)[0]
    heart_pred = heart_model.predict(X_new_scaled)[0]

    return {
        "Diabetes": bool(diabetes_pred),
        "Heart Attack Risk": bool(heart_pred)
    }

In [34]:
def get_user_input():
    print("Please enter the following patient details:")
    patient = {
        'Age': int(input("Age: ")),
        'Sex': input("Sex (Male/Female): "),
        'Height': float(input("Height (in cm): ")),
        'Weight': float(input("Weight (in kg): ")),
        'Blood Group': input("Blood Group (e.g., A+, O-): "),
        'Body Temperature': float(input("Body Temperature (F): ")),
        'Physical Activity Days Per Week': int(input("Physical Activity Days Per Week: ")),
        'BMI': float(input("BMI: ")),
        'Heart Rate': int(input("Heart Rate (bpm): ")),
        'SpO2': int(input("SpO2 (%): ")),
        'Blood Pressure': input("Blood Pressure (e.g., 120/80): "),
        'Cholesterol': float(input("Cholesterol (mg/dL): ")),
        'Blood Glucose': float(input("Blood Glucose (mg/dL): ")),
        'Sweat Biomarkers': float(input("Sweat Biomarkers: ")),
        'Stress Level': int(input("Stress Level (1-10): ")),
        'Smoking': input("Smoking (Yes/No): "),
        'Alcohol Consumption': input("Alcohol Consumption (Yes/No): "),
        'Family History': input("Family History (Yes/No): "),
        'Diet': input("Diet (Average/Healthy/Unhealthy): "),
        'Sleep Hours Per Day': float(input("Sleep Hours Per Day: ")),
        'Obesity': input("Obesity (Yes/No): "),
        'Triglycerides': float(input("Triglycerides (mg/dL): "))
    }
    return patient


In [35]:
def preprocess_and_predict(patient):
    import pandas as pd

    patient_df = pd.DataFrame([patient])

    for col in categorical_cols:
        le = label_encoders[col]
        patient_df[col] = le.transform(patient_df[col].astype(str))

    bp_split = patient_df['Blood Pressure'].apply(lambda bp: pd.Series({
        'Systolic_BP': int(bp.split('/')[0]),
        'Diastolic_BP': int(bp.split('/')[1])
    }))
    patient_df = patient_df.drop(columns=['Blood Pressure'])
    patient_df = pd.concat([patient_df, bp_split], axis=1)

    patient_X = patient_df[feature_cols]
    patient_X_scaled = scaler.transform(patient_X)

    diabetes_pred = diabetes_model.predict(patient_X_scaled)[0]
    heart_pred = heart_model.predict(patient_X_scaled)[0]

    print("\n=== Prediction Results ===")
    print("Diabetes: ", "Yes" if diabetes_pred else "No")
    print("Heart Attack Risk: ", "High Risk" if heart_pred else "Low Risk")

In [38]:
user_patient = get_user_input()
preprocess_and_predict(user_patient)

Please enter the following patient details:


Age:  58
Sex (Male/Female):  Male
Height (in cm):  158
Weight (in kg):  70
Blood Group (e.g., A+, O-):  O+
Body Temperature (F):  38.6
Physical Activity Days Per Week:  0
BMI:  28.04
Heart Rate (bpm):  120
SpO2 (%):  97
Blood Pressure (e.g., 120/80):  160/120
Cholesterol (mg/dL):  280
Blood Glucose (mg/dL):  289
Sweat Biomarkers:  4.2
Stress Level (1-10):  10
Smoking (Yes/No):  1
Alcohol Consumption (Yes/No):  1
Family History (Yes/No):  1
Diet (Average/Healthy/Unhealthy):  Unhealthy
Sleep Hours Per Day:  5
Obesity (Yes/No):  1
Triglycerides (mg/dL):  260



=== Prediction Results ===
Diabetes:  Yes
Heart Attack Risk:  High Risk


In [39]:
from sklearn.metrics import accuracy_score

In [40]:
diabetes_accuracy = accuracy_score(y_test_d, diabetes_model.predict(X_test_d))
heart_accuracy = accuracy_score(y_test_h, heart_model.predict(X_test_h))

print(f"\nDiabetes Model Accuracy: {diabetes_accuracy * 100:.2f}%")
print(f"Heart Attack Risk Model Accuracy: {heart_accuracy * 100:.2f}%")


Diabetes Model Accuracy: 61.54%
Heart Attack Risk Model Accuracy: 50.71%
