In [51]:
# -*- coding: utf-8 -*-
"""
PPG-BP Prediction & Explainability
"""

# Install Required Libraries

!pip -q install numpy pandas scikit-learn matplotlib seaborn xgboost shap tensorflow gdown joblib

# 1. Upload & Extract Dataset
import os, zipfile
from google.colab import files
import pandas as pd

# Upload the PPG-BP zip manually
print(" Upload 'PPGBPDatabase.zip")
uploaded = files.upload()  # select the zip file

zip_path = list(uploaded.keys())[0]
DATA_DIR = "ppg_bp_data"

if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)
    with zipfile.ZipFile(zip_path, 'r') as zf:
        zf.extractall(DATA_DIR)
    print("Data extracted to", DATA_DIR)
else:
    print("Data already extracted.")

# Load Excel dataset
file_path = os.path.join(DATA_DIR, "Data File", "PPG-BP dataset.xlsx")
df = pd.read_excel(file_path, skiprows=1)  # skip metadata row

# Rename columns
df.columns = [
    "Num", "Subject_ID", "Sex", "Age", "Height", "Weight",
    "Systolic_BP", "Diastolic_BP", "Heart_Rate", "BMI",
    "Hypertension", "Diabetes", "Cerebral_Infarction", "Cerebrovascular_Disease"
]

# Convert numeric columns
numeric_cols = ["Age", "Height", "Weight", "Systolic_BP", "Diastolic_BP", "Heart_Rate", "BMI"]
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce")

print("Dataset loaded. Shape:", df.shape)
df.head()

# 2. Preprocessing

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Encode categorical column
df["Sex"] = df["Sex"].map({"Male": 0, "Female": 1})

# Features & target
feature_cols = ["Age", "Height", "Weight", "Heart_Rate", "BMI", "Sex"]
X = df[feature_cols]
y = df["Systolic_BP"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 3. Random Forest Model

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

rf_sbp = RandomForestRegressor(n_estimators=200, random_state=42)
rf_sbp.fit(X_train, y_train)
y_pred = rf_sbp.predict(X_test)

# 4. Evaluation
def evaluate(y_true, y_pred, name="Model"):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))  # fixed
    r2 = r2_score(y_true, y_pred)
    print(f"{name} → MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}")

print("Random Forest Performance (SBP):")
evaluate(y_test, y_pred, "Random Forest SBP")

# 5. SHAP
import shap
import matplotlib.pyplot as plt

explainer = shap.TreeExplainer(rf_sbp)
shap_values = explainer.shap_values(X_test)
# 6. Save Plots
import os
os.makedirs("plots", exist_ok=True)

# Scatter plot: True vs Predicted SBP
plt.figure(figsize=(6,6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel("True SBP")
plt.ylabel("Predicted SBP")
plt.title("True vs Predicted Systolic BP")
plt.savefig("plots/sbp_scatter.png")
plt.close()

# Feature importance
importances = rf_sbp.feature_importances_
plt.figure(figsize=(8,5))
plt.barh(feature_cols, importances)
plt.xlabel("Importance")
plt.title("Feature Importance (Random Forest)")
plt.savefig("plots/feature_importance.png")
plt.close()

# SHAP summary
shap.summary_plot(shap_values, X_test, feature_names=feature_cols, show=False)
plt.savefig("plots/shap_summary.png", bbox_inches="tight")
plt.close()

print("All plots saved in 'plots/' folder.")

# 7. Download Plots Folder (Colab)
!zip -r plots.zip plots
from google.colab import files
files.download("plots.zip")


 Upload 'PPGBPDatabase.zip


Saving PPGBPDatabase.zip to PPGBPDatabase (4).zip
Data already extracted.
Dataset loaded. Shape: (219, 14)
Random Forest Performance (SBP):
Random Forest SBP → MAE: 13.00, RMSE: 16.16, R²: 0.24
All plots saved in 'plots/' folder.
updating: plots/ (stored 0%)
updating: plots/feature_importance.png (deflated 22%)
updating: plots/sbp_scatter.png (deflated 9%)
updating: plots/shap_summary.png (deflated 6%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>