# proj_01 — Unified Healthcare + Supply Chain Decisioning
This notebook loads the trained models from the supply chain and healthcare tracks, and demonstrates a simple policy: given a hospital, medicine, lane, and carrier, decide whether to **order now** or **wait**, considering both inventory risk and delivery risk.

In [1]:

# Standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ML imports
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, KFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
                             confusion_matrix, ConfusionMatrixDisplay, mean_squared_error, r2_score)
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

import joblib
import os

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

import pathlib

# Get the project root (the folder where streamlit_app.py and notebooks live)
PROJECT_DIR = pathlib.Path(__file__).resolve().parents[1] if "__file__" in globals() else pathlib.Path(os.getcwd()) / "proj_01"

ASSETS = PROJECT_DIR / "proj_01_assets"
ASSETS.mkdir(parents=True, exist_ok=True)

import itertools

In [None]:

# --- Standardized paths for repo-friendly saves ---
from pathlib import Path
import os

# Resolve project root whether running from repo root or notebooks/
ROOT = Path.cwd()
if ROOT.name == "notebooks":
    ROOT = ROOT.parent

DATA = ROOT / "proj_01" / "data" if (ROOT / "proj_01").exists() else ROOT / "data"
MODELS = ROOT / "proj_01" / "models" if (ROOT / "proj_01").exists() else ROOT / "models"

DATA.mkdir(parents=True, exist_ok=True)
MODELS.mkdir(parents=True, exist_ok=True)

print(f"Saving CSVs to: {DATA}")
print(f"Saving models to: {MODELS}")


In [2]:

# Matplotlib settings (default colors, single-plot rule honored in guidance)
plt.rcParams["figure.figsize"] = (8,5)
plt.rcParams["axes.grid"] = True


## 1) Load Assets

In [4]:

from pathlib import Path

# assets_path = Path("proj_01_assets")
assets_path = ASSETS
sc_shipments = pd.read_csv(assets_path / "supply_chain_shipments.csv")
hc_df = pd.read_csv(assets_path / "healthcare_demand.csv")
carrier_lane_stats = pd.read_csv(assets_path / "carrier_lane_stats.csv")

sc_on_time_model = joblib.load(assets_path / "supply_chain_on_time_model.pkl")
sc_cost_model = joblib.load(assets_path / "supply_chain_cost_model.pkl")
hc_demand_model = joblib.load(assets_path / "healthcare_demand_model.pkl")
hc_shortage_model = joblib.load(assets_path / "healthcare_shortage_model.pkl")

sc_shipments.head(3), hc_df.head(3), carrier_lane_stats.head(3)


FileNotFoundError: [Errno 2] No such file or directory: 'proj_01_assets/supply_chain_shipments.csv'

## 2) Policy Function

In [None]:

def recommend_order(hospital:str, medicine:str, month:int,
                    origin:str, destination:str, carrier:str, service_level:str,
                    current_inventory:int, lead_time_days:int, stops:int=1, weight:float=1200.0, distance:float=None):
    """
    Combine healthcare demand/shortage predictions with supply chain on-time probability and cost.
    Returns a dict with recommendation and details.
    """
    # 1) Predict demand for (hospital, region inferred from hc_df, medicine, month)
    region = hc_df.loc[hc_df['Hospital']==hospital, 'Region'].iloc[0] if hospital in hc_df['Hospital'].unique() else "MW"
    x_reg = pd.DataFrame([{
        "Hospital": hospital, "Region": region, "Medicine": medicine,
        "Month": month, "Inventory": current_inventory, "LeadTimeDays": lead_time_days
    }])
    demand_pred = float(hc_demand_model.predict(x_reg)[0])

    # 2) Shortage risk given current inventory
    x_cls = x_reg.copy()
    shortage_prob = float(hc_shortage_model.predict_proba(x_cls)[0,1])
    shortage_flag = int(shortage_prob >= 0.5)

    # 3) Supply chain on-time probability (need a shipment feature row)
    if distance is None:
        # estimate using average lane distance from historical data (fallback if not present)
        lane = sc_shipments[(sc_shipments['Origin']==origin) & (sc_shipments['Destination']==destination)]
        if len(lane) >= 5:
            distance = float(lane['Distance'].mean())
        else:
            distance = 800.0
    x_sc = pd.DataFrame([{
        "Origin": origin, "Destination": destination, "Carrier": carrier, "ServiceLevel": service_level,
        "Distance": distance, "Weight": weight, "Stops": stops
    }])
    on_time_prob = float(sc_on_time_model.predict_proba(x_sc)[0,1])

    # 4) Cost estimate
    cost_est = float(sc_cost_model.predict(x_sc)[0])

    # 5) Simple decision rule
    # - If shortage probability high (>=0.5) OR inventory < predicted demand for lead time buffer, consider ordering.
    # - If on-time prob is low (<0.7), recommend ordering earlier or choosing a more reliable carrier.
    daily_need = demand_pred / 30.0
    buffer_need = daily_need * lead_time_days
    inventory_gap = current_inventory - buffer_need

    order_now = (shortage_prob >= 0.5) or (inventory_gap < 0)
    risk_flag = on_time_prob < 0.7

    recommendation = "ORDER NOW" if order_now else "OK TO WAIT"
    logistics_note = "Lane/Carrier risk is HIGH (consider expedited or different carrier)" if risk_flag else "Lane/Carrier risk acceptable"

    return {
        "hospital": hospital, "medicine": medicine, "month": month,
        "predicted_monthly_demand": round(demand_pred,2),
        "shortage_probability": round(shortage_prob,3),
        "on_time_probability": round(on_time_prob,3),
        "estimated_cost": round(cost_est,2),
        "inventory_buffer_need": round(buffer_need,2),
        "current_inventory": current_inventory,
        "inventory_gap": round(inventory_gap,2),
        "recommendation": recommendation,
        "logistics_note": logistics_note
    }

# Example run
example = recommend_order(
    hospital=hc_df['Hospital'].iloc[0],
    medicine="FluVax",
    month=11,
    origin="DAL", destination="NYC", carrier="AlphaTrans", service_level="Expedited",
    current_inventory=120, lead_time_days=7
)
example


## 3) Sensitivity: Compare Carriers & Lanes

In [None]:

# Evaluate the same order across carriers for the chosen lane
def compare_carriers(hospital, medicine, month, origin, destination, service_level, current_inventory, lead_time_days):
    rows = []
    for c in carrier_lane_stats['Carrier'].unique():
        rows.append(recommend_order(hospital, medicine, month, origin, destination, c, service_level,
                                    current_inventory, lead_time_days))
    return pd.DataFrame(rows).sort_values("on_time_probability", ascending=False)

sample_compare = compare_carriers(
    hospital=hc_df['Hospital'].iloc[0], medicine="FluVax", month=11,
    origin="DAL", destination="NYC", service_level="Standard",
    current_inventory=150, lead_time_days=7
)
sample_compare
