# ✅ Step 1: Generate the Correct Synthetic Dataset
This dataset will:

Separate patient & PHC data
Include only numerical values (no raw categorical features)
Use one-hot encoding & feature scaling properly



In [None]:
import numpy as np
import pandas as pd

# Set seed for reproducibility
np.random.seed(42)

# Number of patients & PHCs
num_patients = 5000
num_phcs = 10

# List of rural areas surrounding Chennai
rural_areas = [
    "Thiruvallur", "Kanchipuram", "Chengalpattu", "Red Hills", "Gummidipoondi",
    "Arakkonam", "Sriperumbudur", "Tiruttani", "Madurantakam", "Uthiramerur"
]

# Generate Patient Data (Healthcare worker input)
patients = pd.DataFrame({
    "age": np.random.randint(18, 85, num_patients),
    "gender": np.random.choice(["Male", "Female"], num_patients),
    "bp_systolic": np.random.randint(90, 180, num_patients),
    "bp_diastolic": np.random.randint(60, 120, num_patients),
    "heart_rate": np.random.randint(50, 140, num_patients),
    "oxygen_saturation": np.round(np.random.uniform(85, 100, num_patients), 2),
    "temperature": np.round(np.random.uniform(35.5, 40.0, num_patients), 2),
    "symptoms": np.random.choice(["Chest Pain", "Fever", "Breathing Difficulty", "Fatigue", "Headache"], num_patients),
    "required_resources": np.random.choice(["ICU", "Ventilator", "General Bed", "Oxygen Supply"], num_patients),
    "location": np.random.choice(rural_areas, num_patients),
    "time_of_day": np.random.choice(["Morning", "Afternoon", "Night"], num_patients)
})

# Generate PHC Data (Updated in real-time)
phcs = pd.DataFrame({
    "phc_name": [
        "Stanley Medical College Hospital", "Rajiv Gandhi Government General Hospital",
        "Kilpauk Medical College Hospital", "Government Royapettah Hospital",
        "Government Hospital, Saidapet", "ESI Hospital, Ayanavaram",
        "Government Kasturba Gandhi Hospital", "Government Siddha Medical College Hospital",
        "Institute of Child Health", "Government Peripheral Hospital, Tondiarpet"
    ],
    "available_beds": np.random.randint(0, 20, num_phcs),
    "phc_workload": np.round(np.random.uniform(0, 1, num_phcs), 2),
    "phc_distance_km": np.round(np.random.uniform(1, 50, num_phcs), 2)
})

# Save datasets
patients.to_csv("synthetic_patients.csv", index=False)
phcs.to_csv("dynamic_phc_database.csv", index=False)

print("✅ Datasets Created! Ready for Preprocessing.")


✅ Datasets Created! Ready for Preprocessing.


# ✅ Step 2: Preprocess & Normalize Data Properly
This ensures:

Categorical variables are one-hot encoded
Numerical values are scaled consistently
No feature mismatches occur later

In [None]:
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
import joblib

# Load datasets
patients = pd.read_csv("synthetic_patients.csv")
phcs = pd.read_csv("dynamic_phc_database.csv")

# One-hot encode categorical variables
categorical_cols = ["gender", "symptoms", "required_resources", "location", "time_of_day"]
patients = pd.get_dummies(patients, columns=categorical_cols, drop_first=True)

# Normalize numerical features
numerical_cols_patients = ["age", "bp_systolic", "bp_diastolic", "heart_rate", "oxygen_saturation", "temperature"]
scaler_patients = MinMaxScaler()
patients[numerical_cols_patients] = scaler_patients.fit_transform(patients[numerical_cols_patients])

numerical_cols_phcs = ["available_beds", "phc_workload", "phc_distance_km"]
scaler_phcs = MinMaxScaler()
phcs[numerical_cols_phcs] = scaler_phcs.fit_transform(phcs[numerical_cols_phcs])

# Save the scalers
joblib.dump(scaler_patients, "scaler_patients.pkl")
joblib.dump(scaler_phcs, "scaler_phcs.pkl")

print("✅ Scalers trained & saved successfully!")
print("✅ Preprocessing Complete! No extra features or mismatches.")


✅ Scalers trained & saved successfully!
✅ Preprocessing Complete! No extra features or mismatches.


# ✅ Step 3: Train the Deep Learning Model
This:

Ensures training data has correct features
Does not contain categorical errors
Uses proper feature scaling

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

X_train = []
y_train = []

import joblib

# Get the final order of columns after preprocessing
expected_columns = list(patients.columns)

# Save the column order for later use
joblib.dump(expected_columns, "expected_columns.pkl")

print("✅ Column order saved successfully!")

# Merge patient & PHC numeric data
for _, patient in patients.iterrows():
    for _, phc in phcs[numerical_cols_phcs].iterrows():
        X_train.append(np.concatenate((patient.values, phc.values)))

        # Compute referral suitability score
        suitability_score = (
            (phc["available_beds"] * 0.4) +
            ((1 - phc["phc_workload"]) * 0.3) +
            ((50 - phc["phc_distance_km"]) / 50 * 0.3)
        )
        y_train.append(suitability_score)

# Convert to NumPy arrays
X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.float32)

print("✅ X_train and y_train are now correctly formatted!")


✅ Column order saved successfully!
✅ X_train and y_train are now correctly formatted!


In [None]:
# Define Model
from tensorflow.keras.layers import Input
model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Explicit input layer
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])


model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
model.fit(X_train, y_train, epochs=10, batch_size=32)

model.save("referral_model.h5")
print("✅ Model Training Complete!")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
✅ Model Training Complete!


  saving_api.save_model(


# CONVERSION TO TENSORFLOWJS

In [None]:
!pip install tensorflowjs

Collecting tensorflowjs
  Downloading tensorflowjs-4.22.0-py3-none-any.whl.metadata (3.2 kB)
Collecting tensorflow-decision-forests>=1.5.0 (from tensorflowjs)
  Downloading tensorflow_decision_forests-1.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.0 kB)
Collecting packaging~=23.1 (from tensorflowjs)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting wurlitzer (from tensorflow-decision-forests>=1.5.0->tensorflowjs)
  Downloading wurlitzer-3.1.1-py3-none-any.whl.metadata (2.5 kB)
Collecting ydf (from tensorflow-decision-forests>=1.5.0->tensorflowjs)
  Downloading ydf-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.5 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3 (from tensorflow<3,>=2.13.0->tensorflowjs)
  Downloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Downloading tensorflowjs-4.22.0-py3-none-any.whl (89 kB)
[2K   [90

In [None]:
!pip install tensorflow==2.15.0



In [None]:
!pip3 install tensorflow_decision_forests==1.8.1



In [None]:
import tensorflowjs as tfjs

tfjs.converters.save_keras_model(model, "tfjs_model")

# ✅ Step 4: Predict PHC Rankings for a Given Patient (Fixed Issues)
This removes all previous issues like:

Extra features (no more mismatches like 29 vs 54)
Missing categorical encoding errors
Ensures input matches training data perfectly

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib

# Load Model
model = tf.keras.models.load_model("referral_model.h5")

# Load trained scalers
scaler_patients = joblib.load("scaler_patients.pkl")
scaler_phcs = joblib.load("scaler_phcs.pkl")

# Load expected column order (fix for missing file issue)
expected_columns = joblib.load("expected_columns.pkl")

# Load datasets
patients = pd.read_csv("synthetic_patients.csv")
phcs = pd.read_csv("dynamic_phc_database.csv")

# One-hot encode categorical patient features (ensure same encoding as training)
categorical_cols = ["gender", "symptoms", "required_resources", "location", "time_of_day"]
patients = pd.get_dummies(patients, columns=categorical_cols, drop_first=True)

# **Ensure all missing columns from training are added with default 0**
for col in expected_columns:
    if col not in patients.columns:
        patients[col] = 0

# **Reorder columns to match training order**
patients = patients[expected_columns]

# Normalize numerical patient data
numerical_cols_patients = ["age", "bp_systolic", "bp_diastolic", "heart_rate", "oxygen_saturation", "temperature"]
patients[numerical_cols_patients] = scaler_patients.transform(patients[numerical_cols_patients])

# Normalize PHC numerical data
numerical_cols_phcs = ["available_beds", "phc_workload", "phc_distance_km"]
phcs[numerical_cols_phcs] = scaler_phcs.transform(phcs[numerical_cols_phcs])

# Select a new patient (example first patient)
new_patient = patients.iloc[[0]]

# Duplicate patient details for all PHCs
new_patient_input = np.tile(new_patient.values, (len(phcs), 1))

# Merge patient data with PHC data
phcs_numeric = phcs[numerical_cols_phcs]  # Ensure PHC only has numerical data
phc_input = np.hstack((new_patient_input, phcs_numeric.values))

# Predict Referral Scores
phcs["suitability_score"] = model.predict(phc_input).flatten()

# Rank PHCs in Descending Order
ranked_phcs = phcs.sort_values(by="suitability_score", ascending=False)

# Display Ranked PHCs
print("\n🏥 **Ranked PHCs Based on Referral Suitability Score** 🏥")
print(ranked_phcs[["phc_name", "suitability_score"]])

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).