In [2]:
import numpy as np
import pandas as pd
# Load the dataset
df = pd.read_csv(r"C:\Users\bhara\Documents\GitHub\smart-health-monitoring-ai-iot\health_upgraded_0_15.csv", encoding='cp1252')

In [3]:
df.head()  # Display the first few rows of the dataframe

Unnamed: 0,name,gender,location,age,heart_rate,spo2,temperature,ecg,eeg,health_label,hr_s,spo2_s,temp_s,ecg_s,health_level_0_15
0,Subhajit Banerjee,Male,Kharagpur,56,94,99,98.8,2.08,1.14,0,0,0,0,2,2
1,Pradip Majumdar,Female,Asansol,69,101,97,98.9,0.76,1.05,1,1,0,0,1,5
2,Sourav Bhattacharya,Female,Bardhaman,46,133,93,96.7,1.73,0.23,2,2,1,1,1,14
3,Rahul Chatterjee,Female,Durgapur,32,129,93,97.3,1.36,0.9,2,2,1,0,0,11
4,Debasis Mondal,Male,Siliguri,60,78,87,99.9,1.22,0.91,2,0,2,1,0,8


In [4]:
df.info()  # Display information about the dataframe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12000 entries, 0 to 11999
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   name               12000 non-null  object 
 1   gender             12000 non-null  object 
 2   location           12000 non-null  object 
 3   age                12000 non-null  int64  
 4   heart_rate         12000 non-null  int64  
 5   spo2               12000 non-null  int64  
 6   temperature        12000 non-null  float64
 7   ecg                12000 non-null  float64
 8   eeg                12000 non-null  float64
 9   health_label       12000 non-null  int64  
 10  hr_s               12000 non-null  int64  
 11  spo2_s             12000 non-null  int64  
 12  temp_s             12000 non-null  int64  
 13  ecg_s              12000 non-null  int64  
 14  health_level_0_15  12000 non-null  int64  
dtypes: float64(3), int64(9), object(3)
memory usage: 1.4+ MB


In [5]:
df.isnull().sum()  # Check for missing values in the dataframe

name                 0
gender               0
location             0
age                  0
heart_rate           0
spo2                 0
temperature          0
ecg                  0
eeg                  0
health_label         0
hr_s                 0
spo2_s               0
temp_s               0
ecg_s                0
health_level_0_15    0
dtype: int64

In [6]:
df.dropna(inplace=True)  # Drop rows with missing values

In [7]:
df.drop("name", axis=1, inplace=True)  # Drop the 'name' column as it's not needed for modeling
df.drop("eeg" , axis=1, inplace=True)  # Drop the 'eeg' column as it's not needed for modeling

In [8]:
df.info()  # Display information about the dataframe after dropping columns
df.describe()  # Display statistical summary of the dataframe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12000 entries, 0 to 11999
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gender             12000 non-null  object 
 1   location           12000 non-null  object 
 2   age                12000 non-null  int64  
 3   heart_rate         12000 non-null  int64  
 4   spo2               12000 non-null  int64  
 5   temperature        12000 non-null  float64
 6   ecg                12000 non-null  float64
 7   health_label       12000 non-null  int64  
 8   hr_s               12000 non-null  int64  
 9   spo2_s             12000 non-null  int64  
 10  temp_s             12000 non-null  int64  
 11  ecg_s              12000 non-null  int64  
 12  health_level_0_15  12000 non-null  int64  
dtypes: float64(2), int64(9), object(2)
memory usage: 1.2+ MB


Unnamed: 0,age,heart_rate,spo2,temperature,ecg,health_label,hr_s,spo2_s,temp_s,ecg_s,health_level_0_15
count,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0
mean,48.700583,97.106667,91.998583,98.604525,1.499332,1.313083,0.744333,0.999583,0.46125,0.6415,7.472667
std,17.879394,24.550455,4.32049,1.204591,0.400879,0.743937,0.800534,0.817805,0.541931,0.668966,4.132272
min,18.0,55.0,85.0,94.3,-0.01,0.0,0.0,0.0,0.0,0.0,0.0
25%,33.0,75.0,88.0,97.8,1.23,1.0,0.0,0.0,0.0,0.0,4.0
50%,49.0,97.0,92.0,98.6,1.5,1.0,1.0,1.0,0.0,1.0,7.0
75%,64.0,119.0,96.0,99.4,1.77,2.0,1.0,2.0,1.0,1.0,11.0
max,79.0,139.0,99.0,103.3,2.93,2.0,2.0,2.0,2.0,2.0,15.0


In [9]:
df.shape  # Display the shape of the cleaned dataframe

(12000, 13)

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
# Set the style for better looking plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

In [29]:
# Map health labels to meaningful names
health_label_names = {
    0:"Optimal", 1:"Normal+", 2:"Minor", 3:"Mild",
    4:"Risk1", 5:"Cardiac1", 6:"Resp1", 7:"Multi1",
    8:"High1", 9:"Cardiac2", 10:"OxyTemp2", 11:"High2",
    12:"Serious", 13:"Danger", 14:"Critical", 15:"Emergency"
}

df['health_status'] = df['health_level_0_15'].map(health_label_names)

# Create a figure with 3 subplots
fig = plt.figure(figsize=(18, 6))

<Figure size 1800x600 with 0 Axes>

In [21]:
# Features
X = df[["age", "heart_rate", "spo2", "temperature", "ecg"]]

# Target
le = LabelEncoder()
y = le.fit_transform(df["health_level_0_15"])

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Model
rf = RandomForestClassifier(random_state=42)
rf.fit(x_train, y_train)

# Prediction
y_pred = rf.predict(x_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9895833333333334
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       111
           1       0.99      1.00      0.99        99
           2       1.00      1.00      1.00        97
           3       1.00      1.00      1.00       195
           4       0.98      1.00      0.99       171
           5       0.99      0.97      0.98       136
           6       0.99      1.00      0.99       230
           7       0.98      1.00      0.99       204
           8       0.99      0.98      0.98       204
           9       0.98      0.99      0.99       192
          10       0.98      0.98      0.98       176
          11       0.99      0.98      0.98       136
          12       0.99      0.97      0.98       102
          13       0.98      0.99      0.98        96
          14       1.00      0.98      0.99        84
          15       1.00      0.99      1.00       167

    accuracy                           0.99      24

In [25]:
df.rename(columns={"health status & improvement": "health_status"}, inplace=True)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y = le.fit_transform(df["health_level_0_15"])


In [26]:
# Scale numeric features (X is already defined in the notebook)
from sklearn.preprocessing import StandardScaler

# Ensure X exists (in case cells were run out of order)
if 'X' not in globals():
    X = df[["age", "heart_rate", "spo2", "temperature", "ecg", "eeg"]]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split using the scaled features
x_train, x_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

classifiers = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Support Vector Machine": SVC(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "K-Nearest Neighbors": KNeighborsClassifier()
}

for name, clf in classifiers.items():
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    print(f"\n{name} Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print(classification_report(y_test, y_pred))


Random Forest Accuracy: 0.99
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       111
           1       0.99      1.00      0.99        99
           2       1.00      1.00      1.00        97
           3       1.00      1.00      1.00       195
           4       0.98      1.00      0.99       171
           5       0.99      0.97      0.98       136
           6       0.99      1.00      0.99       230
           7       0.98      1.00      0.99       204
           8       0.99      0.98      0.98       204
           9       0.98      0.99      0.99       192
          10       0.98      0.97      0.98       176
          11       0.99      0.98      0.98       136
          12       0.98      0.97      0.98       102
          13       0.98      0.99      0.98        96
          14       0.99      0.94      0.96        84
          15       0.99      0.99      0.99       167

    accuracy                           0.99      2

In [27]:
results = []
for name, clf in classifiers.items():
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    results.append((name, acc))
print("\nClassifier Performance Comparison:")
for name, acc in results:
    print(f"{name}: {acc:.2f}")    


Classifier Performance Comparison:
Random Forest: 0.99
Logistic Regression: 0.29
Support Vector Machine: 0.68
Naive Bayes: 0.33
Decision Tree: 0.99
K-Nearest Neighbors: 0.67


In [31]:
import pickle
# Save the model and scaler
with open(r'C:\Users\bhara\Documents\GitHub\smart-health-monitoring-ai-iot\model.pkl', 'wb') as f:
    pickle.dump(rf, f)
with open(r'C:\Users\bhara\Documents\GitHub\smart-health-monitoring-ai-iot\scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
print('Saved model to health_model.pkl and scaler to scaler.pkl')

Saved model to health_model.pkl and scaler to scaler.pkl


In [32]:
print("Model saved as health_model.pkl")

Model saved as health_model.pkl


In [33]:
# Use the scaled numpy x_test (features used during training) — not the DataFrame X_test that contains 'smoker'
print("Predictions for first 5 test samples:", rf.predict(x_test[:5]))

# Predict for a single test sample (index 10) — x_test is a 2D array with the same features used for training
sample = x_test[10].reshape(1, -1)  # shape (1, 6)
print("Prediction for sample index 10:", rf.predict(sample)[0])

# Actual label from y_test (numpy array)
print("Actual class for sample index 10:", y_test[10])

Predictions for first 5 test samples: [12 12 12 12 12]
Prediction for sample index 10: 12
Actual class for sample index 10: 10




In [35]:
import pickle
# Load the model and scaler
with open(r'C:\Users\bhara\Documents\GitHub\smart-health-monitoring-ai-iot\model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)
with open(r'C:\Users\bhara\Documents\GitHub\smart-health-monitoring-ai-iot\scaler.pkl', 'rb') as f:
    loaded_scaler = pickle.load(f)
print('Model and scaler loaded')
# Example prediction using loaded objects (x_test is already scaled)
sample = x_test[0].reshape(1, -1)  # shape (1, n_features)
pred = loaded_model.predict(sample)[0]
print('Prediction for first test sample:', pred)
print('Actual for first test sample:', y_test[0])

Model and scaler loaded
Prediction for first test sample: 12
Actual for first test sample: 15




In [None]:
# def predict_using_loaded_model(age, heart_rate, spo2, temperature, ecg):
#     # Create DataFrame
#     input_df = pd.DataFrame(
#         [[age, heart_rate, spo2, temperature, ecg]],
#         columns=["age", "heart_rate", "spo2", "temperature", "ecg"]
#     )

#     # Scale input
#     input_scaled = loaded_scaler.transform(input_df)

#     # Predict
#     prediction = loaded_model.predict(input_scaled)[0]

#     # Bigger & more informative prediction
#     if prediction == 0:
#         return {
#             "Health Status": "NORMAL",
#             "Risk Level": "Low",
#             "Analysis": (
#                 "All vital signs are within clinically acceptable ranges. "
#                 "Heart rate, oxygen saturation, body temperature, ECG, and EEG "
#                 "signals indicate a stable physiological condition."
#             ),
#             "Recommendation": (
#                 "Maintain a healthy lifestyle, balanced diet, regular exercise, "
#                 "and periodic health monitoring."
#             )
#         }

#     elif prediction == 1:
#         return {
#             "Health Status": "WARNING",
#             "Risk Level": "Moderate",
#             "Analysis": (
#                 "Some vital parameters show slight deviation from normal values. "
#                 "This may indicate early stress, fatigue, mild cardiovascular, "
#                 "respiratory, or neurological irregularities."
#             ),
#             "Recommendation": (
#                 "Regular monitoring is advised. Reduce stress, ensure adequate rest, "
#                 "stay hydrated, and consult a healthcare professional if symptoms persist."
#             )
#         }

#     elif prediction == 2:
#         return {
#             "Health Status": "CRITICAL",
#             "Risk Level": "High",
#             "Analysis": (
#                 "Vital signs indicate a potentially serious medical condition. "
#                 "Abnormal heart rate, SpO₂, temperature, ECG, or EEG patterns suggest "
#                 "high physiological risk requiring urgent attention."
#             ),
#             "Recommendation": (
#                 "Immediate medical consultation or emergency care is strongly recommended. "
#                 "Continuous monitoring and clinical evaluation are necessary."
#             )
#         }
#     return {"Error": "Invalid prediction value"}

In [None]:
def predict_using_loaded_model(age, heart_rate, spo2, temperature, ecg):

    # Create DataFrame
    input_df = pd.DataFrame(
        [[age, heart_rate, spo2, temperature, ecg]],
        columns=["age", "heart_rate", "spo2", "temperature", "ecg"]
    )

    # Scale
    input_scaled = loaded_scaler.transform(input_df)

    # Predict
    prediction = int(loaded_model.predict(input_scaled)[0])

    # ---- Explanation Map ----
    explain_map = {

        0: ("Health Status": "NORMAL",
            "Risk Level": "Low",
            "Analysis": "All vital signs are within clinically acceptable ranges. Signals indicate stable physiology.",
            "Recommendation": "Maintain healthy lifestyle and routine monitoring."),

        1: ("Health Status": "NEAR NORMAL",
            "Risk Level": "Low",
            "Analysis": "One parameter slightly deviates but overall condition is stable.",
            "Recommendation": "Recheck readings and continue observation."),

        2: ("Health Status": "MINOR VARIATION",
            "Risk Level": "Low",
            "Analysis": "Single mild abnormality detected.",
            "Recommendation": "Rest and re-measure."),
        3: ("Health Status": "MILD IMBALANCE",
            "Risk Level": "Low–Moderate",
            "Analysis": "Two mild deviations observed.",
            "Recommendation": "Monitor for trend."),

        4: ("Health Status": "MILD RISK",
            "Risk Level": "Moderate",
            "Analysis": "Combination of mild and moderate abnormal signals.",
            "Recommendation": "Lifestyle correction advised."),
        5: ("Health Status": "CARDIAC LOAD SUSPECT",
            "Risk Level": "Moderate",
            "Analysis": "Heart rate or ECG strain pattern.",
            "Recommendation": "Avoid exertion and monitor ECG."),

        6: ("Health Status": "RESPIRATORY WATCH",
            "Risk Level": "Moderate",
            "Analysis": "SpO2 slightly reduced.",
            "Recommendation": "Check breathing and oxygen."),
        7: ("Health Status": "MULTI-SIGNAL STRESS",
            "Risk Level": "Moderate",
            "Analysis": "Multiple signals mildly abnormal.",
            "Recommendation": "Continuous monitoring needed."),

        8: ("Health Status": "HIGH RISK SIGNAL",
            "Risk Level": "High",
            "Analysis": "One critical parameter detected.",
            "Recommendation": "Immediate retest required."),
        9: ("Health Status": "CARDIAC WARNING",
            "Risk Level": "High",
            "Analysis": "Critical ECG/heart rhythm abnormality.",
            "Recommendation": "Cardiac evaluation recommended."),

        10: ("Health Status": "OXYGEN/FEVER ALERT",
            "Risk Level": "High",
            "Analysis": "Critical SpO2 or temperature.",
            "Recommendation": "Check oxygen and infection signs."),
        11: ("Health Status": "ESCALATING INSTABILITY",
            "Risk Level": "High",
            "Analysis": "Critical + mild abnormal combination.",
            "Recommendation": "Consult doctor."),

        12: ("Health Status": "SERIOUS CONDITION",
            "Risk Level": "Very High",
            "Analysis": "Two critical signals present.",
            "Recommendation": "Urgent clinical review."),
        13: ("Health Status": "CARDIO-RESPIRATORY DANGER",
            "Risk Level": "Very High",
            "Analysis": "Heart and oxygen systems abnormal.",
            "Recommendation": "Emergency assessment."),

        14: ("Health Status": "MULTI-SYSTEM RISK",
            "Risk Level": "Critical",
            "Analysis": "Three signals critical.",
            "Recommendation": "Hospital care advised."),
        15: ("Health Status": "MEDICAL EMERGENCY",
            "Risk Level": "Critical",
            "Analysis": "Most vitals critical.",
            "Recommendation": "Immediate emergency intervention.")
    }

    status, risk, analysis, rec = explain_map.get(
        prediction,
        ("Unknown", "Unknown", "Model output not mapped.", "Manual review needed.")
    )

    return {
        "Predicted Class": prediction,
        "Health Status": status,
        "Risk Level": risk,
        "Analysis": analysis,
        "Recommendation": rec
    }


In [40]:
# Example usage
predicted_class2 = predict_using_loaded_model(45, 80, 98, 36.5, 0.5)
print("Predicted class for input sample:", predicted_class2)

Predicted class for input sample: {'Predicted Class': 12, 'Health Status': 'Serious Condition', 'Risk Level': 'Very High', 'Analysis': 'Two critical signals present.', 'Recommendation': 'Urgent clinical review.'}


