In [24]:
from chronos import BaseChronosPipeline, Chronos2Pipeline
import pandas as pd
import os
from dotenv import load_dotenv
import requests
from utils import upload
import dropbox
from pandas.tseries.frequencies import to_offset
import holidays

load_dotenv()

True

In [25]:
import torch
torch.cuda.set_device(6)

# Load the Chronos-2 pipeline
# GPU recommended for faster inference, but CPU is also supported
pipeline: Chronos2Pipeline = BaseChronosPipeline.from_pretrained(
    "amazon/chronos-2",
    device_map="cuda"
    # device_map="cpu"
)

'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: c0cc1015-8b63-44fa-b4f5-01974624bea7)')' thrown while requesting HEAD https://huggingface.co/amazon/chronos-2/resolve/main/config.json
Retrying in 1s [Retry 1/5].


In [26]:
def regularize_hourly(g: pd.DataFrame) -> pd.DataFrame:
    """
    Reindex each group's timestamps to strict hourly and fill gaps.
    Works whether the grouping column is present or omitted (include_groups=False).
    """
    # The group key (id) is available as g.name; if ID_COL exists, prefer it.
    sid = g[ID_COL].iloc[0] if ID_COL in g.columns else g.name

    g = g.sort_values(TS_COL)
    full_idx = pd.date_range(g[TS_COL].min(), g[TS_COL].max(), freq="h")
    g = g.set_index(TS_COL).reindex(full_idx)
    g.index.name = TS_COL

    # restore id (constant for the whole group)
    g[ID_COL] = sid

    # numeric + fill for targets
    for col in TARGETS:
        if col in g.columns:
            g[col] = pd.to_numeric(g[col], errors="coerce").ffill().bfill()
    return g.reset_index()

def add_holiday_flags(
    df: pd.DataFrame,
    ts_col: str = "ds",
    local_tz: str = "America/Montreal",
    observed: bool = True,
    include_names: bool = False,
) -> pd.DataFrame:
    """
    Adds boolean columns:
      • is_qc_holiday       — Québec public holiday (CA-QC)
      • is_jewish_holiday   — Israeli public/Jewish holiday (IL)
    Optionally adds:
      • qc_holiday_name
      • jewish_holiday_name

    Notes:
      • Holiday checks are date-based (00:00–24:00 local calendar date),
        not sundown-to-sundown observance.
      • NaT timestamps are ignored gracefully.
    """
    out = df.copy()

    # 1) Parse to datetime
    out[ts_col] = pd.to_datetime(out[ts_col], errors="coerce")

    # 2) Get the calendar DATE to use for holiday lookup
    #    - If tz-aware: convert to Montreal then take .date
    #    - If naive: assume values already represent local Montreal wall-clock; just take .date
    if getattr(out[ts_col].dt, "tz", None) is not None:
        dates_for_calendar = out[ts_col].dt.tz_convert(local_tz).dt.date
    else:
        dates_for_calendar = out[ts_col].dt.date

    # 3) Build a SAFE integer year range for the holiday objects
    years_series = pd.Series(dates_for_calendar)
    years_series = years_series.dropna().map(lambda d: int(pd.Timestamp(d).year))
    if years_series.empty:
        raise ValueError("No valid datetimes found to extract holiday years.")
    years = list(range(int(years_series.min()), int(years_series.max()) + 1))

    # 4) Construct holiday calendars
    qc_holidays = holidays.Canada(subdiv="QC", years=years, observed=observed)
    il_holidays = holidays.Israel(years=years, observed=observed)

   # 5) Flag membership
    out["is_qc_holiday"] = [ ("yes" if d in qc_holidays else "no") if pd.notna(pd.Timestamp(d)) else "no"
                             for d in dates_for_calendar ]
    out["is_jewish_holiday"] = [ ("yes" if d in il_holidays else "no") if pd.notna(pd.Timestamp(d)) else "no"
                                 for d in dates_for_calendar ]

    if include_names:
        out["qc_holiday_name"] = [ qc_holidays.get(d, "no") if pd.notna(pd.Timestamp(d)) else "no"
                                   for d in dates_for_calendar ]
        out["jewish_holiday_name"] = [ il_holidays.get(d, "no") if pd.notna(pd.Timestamp(d)) else "no"
                                       for d in dates_for_calendar ]

    return out

shift_types_dict = {'W1':'flow',
 'X1':'pod',
 'X3':'pod',
 'X4':'vertical',
 'X2':'vertical',
 'WOC1':'oncall',
 'WOC2':'oncall',
 'WOC3':'oncall',
 'X5':'pod',
 'W3':'overlap',
 'Y1':'pod',
 'Y3':'pod',
 'Y4':'vertical',
 'Y2':'vertical',
 'Y5':'pod',
 'Z1':'night',
 'Z2':'night',
 'D1':'pod',
 'R1':'pod',
 'P1':'vertical',
 'D2':'vertical',
 'OC1':'oncall',
 'OC2':'oncall',
 'V1':'flow',
 'A1':'pod',
 'G1':'vertical',
 'E1':'pod',
 'R2':'pod',
 'A2':'pod',
 'P2':'vertical',
 'E2':'vertical',
 'N1':'night',
 'N2':'night',
 'L2':'overlap',
 'L4':'overlap',
 'H1':'teaching',
 'B1':'vertical',
 'L1':'overlap',
 'W5':'overlap',
 'L6':'overlap',
 'B2':'vertical'}

In [27]:
# Load hourly data
df = pd.read_csv(
    'https://www.dropbox.com/scl/fi/s83jig4zews1xz7vhezui/allDataWithCalculatedColumns.csv?rlkey=9mm4zwaugxyj2r4ooyd39y4nl&raw=1')
df.ds = pd.to_datetime(df.ds, errors="coerce")
df['id'] = 'jgh'
df.tail()

Unnamed: 0,ds,INFLOW_STRETCHER,Infl_Stretcher_cum,INFLOW_AMBULATORY,Infl_Ambulatory_cum,Inflow_Total,Inflow_Cum_Total,INFLOW_AMBULANCES,Infl_Ambulances_cum,FLS,...,RAZ_IMCONS_MORE4H,RAZ_XRAY_MORE2H,RAZ_CT_MORE2H1,PSYCH1,PSYCH_WAITINGADM,total_tbs,vert_tbs,pod_tbs,overflow,id
44819,2026-02-16 10:00:00,7,34,8,30,15,64,2,8,0,...,0,0,0,12,6,10,6,4,3,jgh
44820,2026-02-16 11:00:00,13,47,11,41,24,88,2,10,1,...,0,0,0,11,5,15,12,3,2,jgh
44821,2026-02-16 12:00:00,16,63,5,46,21,109,2,12,0,...,0,0,1,11,5,24,19,5,8,jgh
44822,2026-02-16 13:00:00,12,75,6,52,18,127,6,18,0,...,0,0,1,11,4,24,18,6,8,jgh
44823,2026-02-16 14:00:00,16,91,12,64,28,155,3,21,0,...,0,1,4,11,4,36,27,9,9,jgh


In [28]:
# Load shift data
all_shifts_df = pd.read_csv('https://www.dropbox.com/scl/fi/yeyr2a7pj6nry8i2q3m0c/all_shifts.csv?rlkey=q1su2h8fqxfnlu7t1l2qe1w0q&raw=1')
all_shifts_df['shift_start'] = pd.to_datetime(all_shifts_df['shift_start']).dt.round('h')
all_shifts_df['shift_end'] = pd.to_datetime(all_shifts_df['shift_end']).dt.round('h')
all_shifts_df['shift_type'] = all_shifts_df['shift_short_name'].map(shift_types_dict)
all_shifts_df.tail()

Unnamed: 0,scheduled_shift_id,group_id,user_id,employee_id,npi,first_name,last_name,facility_id,facility_ext_id,facility_name,...,shift_start,shift_end,shift_hours,work_start,work_end,work_hours,count_as_shift,is_night,is_weekend,shift_type
29944,88846,1,18,,,Michael,Engo,1,,Jewish General Hospital,...,2026-02-23 16:00:00,2026-02-24 00:00:00,8.0,2/23/2026 16:00,2/24/2026 00:00,8.0,1,0,0,vertical
29945,88844,1,35,,,Jonathan,Simons,1,,Jewish General Hospital,...,2026-02-23 16:00:00,2026-02-24 00:00:00,8.0,2/23/2026 16:00,2/24/2026 00:00,8.0,1,0,0,pod
29946,88847,1,14,,,Paul,Brisebois,1,,Jewish General Hospital,...,2026-02-23 18:00:00,2026-02-24 02:00:00,8.0,2/23/2026 18:00,2/24/2026 02:00,8.0,1,0,0,vertical
29947,88208,1,24,,,Alexis,Haligua,1,,Jewish General Hospital,...,2026-02-24 00:00:00,2026-02-24 08:00:00,8.08,2/23/2026 23:45,2/24/2026 07:50,8.08,1,1,0,night
29948,88209,1,25,,,Devin,Hopkins,1,,Jewish General Hospital,...,2026-02-24 00:00:00,2026-02-24 08:00:00,8.08,2/23/2026 23:45,2/24/2026 07:50,8.08,1,1,0,night


In [29]:
# Create hourly rows
# We'll use a list comprehension to generate the range for each row
expanded_rows = []
for _, row in all_shifts_df.iterrows():
    # Create range. inclusive='left' means [start, end)
    # If start == end (e.g. 0 length shift after rounding), it will be empty, which is correct
    hours = pd.date_range(row['shift_start'], row['shift_end'], freq='h', inclusive='left')
    for h in hours:
        expanded_rows.append({
            'ds': h,
            'user': row['first_name']+row['last_name'],
            'shift_type': row['shift_type'],
            'shift_short_name': row['shift_short_name']
        })

expanded_df = pd.DataFrame(expanded_rows)

expanded_df.tail()

Unnamed: 0,ds,user,shift_type,shift_short_name
272789,2026-02-24 03:00:00,DevinHopkins,night,N2
272790,2026-02-24 04:00:00,DevinHopkins,night,N2
272791,2026-02-24 05:00:00,DevinHopkins,night,N2
272792,2026-02-24 06:00:00,DevinHopkins,night,N2
272793,2026-02-24 07:00:00,DevinHopkins,night,N2


In [30]:
# Pivot
# index=timestamp, columns=user_id, values=shift_type
hourly_shifts_by_user_df = expanded_df.pivot_table(
    index='ds', 
    columns='user', 
    values='shift_type', 
    aggfunc='first' # In case of duplicates, take the first
)

# Fill NaNs
hourly_shifts_by_user_df = hourly_shifts_by_user_df.fillna('NotWorking')
hourly_shifts_by_user_df.tail()

user,AlanAzuelos,AlexGuttman,AlexanderHart,AlexandreDostaler,AlexisHaligua,AmelieBellemare,ArzuChaudhry,BernardUnger,DahliaGuttman,DanielMankarios,...,PaulBrisebois,PhilipStasiak,RafaelAroutiunian,SaraAhronheim,ShuoPeng,StephenRosenthal,TanLe,ThuHangTran,WayneChoi,WillGrad
ds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2026-02-24 03:00:00,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking
2026-02-24 04:00:00,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking
2026-02-24 05:00:00,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking
2026-02-24 06:00:00,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking
2026-02-24 07:00:00,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking


In [68]:
df.columns.tolist()

['ds',
 'INFLOW_STRETCHER',
 'Infl_Stretcher_cum',
 'INFLOW_AMBULATORY',
 'Infl_Ambulatory_cum',
 'Inflow_Total',
 'Inflow_Cum_Total',
 'INFLOW_AMBULANCES',
 'Infl_Ambulances_cum',
 'FLS',
 'CUM_ADMREQ',
 'CUM_BA1',
 'WAITINGADM',
 'TTStr',
 'TRG_HALLWAY1',
 'TRG_HALLWAY_TBS',
 'reoriented_cum',
 'reoriented_cum_MD',
 'QTRACK1',
 'RESUS',
 'Pod_T',
 'POD_GREEN',
 'POD_GREEN_TBS',
 'POD_YELLOW',
 'POD_YELLOW_TBS',
 'POD_ORANGE',
 'POD_ORANGE_TBS',
 'POD_CONS_MORE2H',
 'POD_IMCONS_MORE4H',
 'POD_XRAY_MORE2H',
 'POD_CT_MORE2H',
 'POST_POD1',
 'VERTSTRET',
 'RAZ_TBS',
 'RAZ_LAZYBOY',
 'RAZ_WAITINGREZ',
 'AMBVERT1',
 'AMBVERTTBS',
 'QTrack_TBS',
 'Garage_TBS',
 'RAZ_CONS_MORE2H',
 'RAZ_IMCONS_MORE4H',
 'RAZ_XRAY_MORE2H',
 'RAZ_CT_MORE2H1',
 'PSYCH1',
 'PSYCH_WAITINGADM',
 'total_tbs',
 'vert_tbs',
 'pod_tbs',
 'overflow',
 'id']

In [69]:
ID_COL = "id"
TS_COL = "ds"
# TARGETS = ['total_tbs', 'Inflow_Total', 'overflow']
# Targets are all columns in df except ds (timestamp) and id
TARGETS = [col for col in df.columns.tolist() if col != TS_COL and col != ID_COL]

df = df.copy()
df[TS_COL] = pd.to_datetime(df[TS_COL], errors="coerce")
df = df.dropna(subset=[TS_COL])

# Snap to exact hours (lowercase 'h' to avoid FutureWarning)
df[TS_COL] = df[TS_COL].dt.floor("h")

# Sort + dedupe
df = df.sort_values([ID_COL, TS_COL]).drop_duplicates(
    [ID_COL, TS_COL], keep="last")





# Call apply with include_groups=False if supported; else fall back
gb = df.groupby(ID_COL, group_keys=False)
try:
    df = gb.apply(regularize_hourly, include_groups=False)
except TypeError:
    # older pandas without include_groups
    df = gb.apply(regularize_hourly)

# Assert truly hourly (accept 'h' and 'H')
g = df[df[ID_COL] == "jgh"].sort_values(TS_COL)
freq = pd.infer_freq(g[TS_COL])
if not freq:
    raise ValueError("No inferable frequency after regularization.")
if to_offset(freq).name.lower() != "h":
    # extra check independent of infer_freq
    diffs = g[TS_COL].diff().dropna()
    bad = g.loc[diffs != pd.Timedelta(hours=1), TS_COL].head(10).tolist()
    raise ValueError(f"Non-1h gaps remain around: {bad}")

In [70]:
# Predict
print('Predicting basic forecast')
basic_forecast = pipeline.predict_df(
    df,
    prediction_length=24,
    # future_df = future_df.head(24),
    # quantile_levels=[0.1, 0.5, 0.9],
    # quantile_levels=[0.5],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)

basic_forecast.head()

Predicting basic forecast


Unnamed: 0,id,ds,target_name,predictions,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
0,jgh,2026-02-16 15:00:00,INFLOW_STRETCHER,10.516043,6.961608,8.179802,9.034977,9.772823,10.516043,11.253127,12.076197,13.141186,14.739576
1,jgh,2026-02-16 16:00:00,INFLOW_STRETCHER,8.557309,5.25518,6.336973,7.140799,7.854489,8.557309,9.298118,10.084665,11.143421,12.610612
2,jgh,2026-02-16 17:00:00,INFLOW_STRETCHER,7.060792,3.988647,4.975925,5.734811,6.405937,7.060792,7.761248,8.507812,9.470943,10.789852
3,jgh,2026-02-16 18:00:00,INFLOW_STRETCHER,5.779593,2.992641,3.830048,4.506807,5.141966,5.779593,6.444592,7.147476,8.040215,9.325649
4,jgh,2026-02-16 19:00:00,INFLOW_STRETCHER,5.41987,2.679791,3.505436,4.171715,4.795451,5.41987,6.082952,6.778265,7.653898,8.946305


In [72]:
basic_forecast.target_name.value_counts()

target_name
INFLOW_STRETCHER       24
Infl_Stretcher_cum     24
INFLOW_AMBULATORY      24
Infl_Ambulatory_cum    24
Inflow_Total           24
Inflow_Cum_Total       24
INFLOW_AMBULANCES      24
Infl_Ambulances_cum    24
FLS                    24
CUM_ADMREQ             24
CUM_BA1                24
WAITINGADM             24
TTStr                  24
TRG_HALLWAY1           24
TRG_HALLWAY_TBS        24
reoriented_cum         24
reoriented_cum_MD      24
QTRACK1                24
RESUS                  24
Pod_T                  24
POD_GREEN              24
POD_GREEN_TBS          24
POD_YELLOW             24
POD_YELLOW_TBS         24
POD_ORANGE             24
POD_ORANGE_TBS         24
POD_CONS_MORE2H        24
POD_IMCONS_MORE4H      24
POD_XRAY_MORE2H        24
POD_CT_MORE2H          24
POST_POD1              24
VERTSTRET              24
RAZ_TBS                24
RAZ_LAZYBOY            24
RAZ_WAITINGREZ         24
AMBVERT1               24
AMBVERTTBS             24
QTrack_TBS             24


In [73]:
# basic_forecast


df_with_holidays = add_holiday_flags(df, ts_col='ds', include_names=True)

#create a dataframe with the next 24 hours timestamps hourly as column 'ds', with column 'id' jgh
future_df = hourly_shifts_by_user_df.reset_index()[hourly_shifts_by_user_df.reset_index()['ds'] > df['ds'].max()]
future_df['id'] = 'jgh'
future_df = add_holiday_flags(future_df, ts_col='ds', include_names=True)

# First, add holiday flags to future_df
future_df_with_added_holidays = add_holiday_flags(future_df, ts_col='ds', include_names=True)

# Then, select only the columns from future_df_with_added_holidays that are also in df_with_holidays
common_columns = [col for col in future_df_with_added_holidays.columns if col in df_with_holidays.columns]
future_df_with_holidays = future_df_with_added_holidays[common_columns]

# Predict
print('Predicting forecast with holidays')  
forecast_with_holidays = pipeline.predict_df(
    df_with_holidays,
    prediction_length=24,
    future_df = future_df_with_holidays.head(24),
    # quantile_levels=[0.1, 0.5, 0.9],
    # quantile_levels=[0.5],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)
forecast_with_holidays.tail()

Predicting forecast with holidays


Unnamed: 0,id,ds,target_name,predictions,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
1171,jgh,2026-02-17 10:00:00,overflow,6.493987,0.854991,2.521731,3.911976,5.216731,6.493987,7.800459,9.132782,10.787828,13.189905
1172,jgh,2026-02-17 11:00:00,overflow,7.396637,1.352684,3.241659,4.740939,6.094834,7.396637,8.740278,10.122043,11.859529,14.43067
1173,jgh,2026-02-17 12:00:00,overflow,8.416927,1.841601,3.920985,5.567081,7.026976,8.416927,9.844477,11.319663,13.167906,15.863704
1174,jgh,2026-02-17 13:00:00,overflow,9.316895,2.298245,4.637772,6.400423,7.898335,9.316895,10.79712,12.343758,14.286209,17.09301
1175,jgh,2026-02-17 14:00:00,overflow,10.029085,2.68153,4.987454,6.83025,8.466238,10.029085,11.654455,13.304937,15.319302,18.200638


In [74]:
df_with_staffing = df.merge(hourly_shifts_by_user_df, on='ds')
future_df_with_staffing = hourly_shifts_by_user_df.reset_index()[hourly_shifts_by_user_df.reset_index()['ds'] > df['ds'].max()]
future_df_with_staffing['id'] = 'jgh'

print('Predicting forecast with staffing')
forecast_with_staffing = pipeline.predict_df(
    df_with_staffing,
    prediction_length=24,
    future_df = future_df_with_staffing.head(24),
    # quantile_levels=[0.1, 0.5, 0.9],
    # quantile_levels=[0.5],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)

forecast_with_staffing.tail()

Predicting forecast with staffing


Unnamed: 0,id,ds,target_name,predictions,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
1171,jgh,2026-02-17 10:00:00,overflow,7.19829,1.383995,3.080765,4.50001,5.851507,7.19829,8.572074,9.995213,11.763536,14.321934
1172,jgh,2026-02-17 11:00:00,overflow,7.795527,1.687378,3.544498,5.042312,6.431162,7.795527,9.238455,10.802376,12.768647,15.543228
1173,jgh,2026-02-17 12:00:00,overflow,8.681901,1.684578,3.857572,5.670423,7.242128,8.681901,10.253183,11.940659,14.08419,17.113176
1174,jgh,2026-02-17 13:00:00,overflow,9.444818,2.060488,4.37566,6.264819,7.925147,9.444818,11.056158,12.819777,15.082014,18.233097
1175,jgh,2026-02-17 14:00:00,overflow,10.097653,2.190212,4.465285,6.499026,8.347623,10.097653,11.862719,13.642231,15.849106,18.916136


In [75]:
weather_df = pd.read_csv('https://www.dropbox.com/scl/fi/gmhwwld9z9yychg4r0yuk/weather.csv?rlkey=66c78m90aviamr0x0uu72pfr8&raw=1')
weather_df.ds = pd.to_datetime(weather_df.ds, errors="coerce")


future_weather_df = weather_df[weather_df.ds > df.ds.max()].head(24)
future_weather_df['id']='jgh'

print('Predicting forecast with weather')
# Predict
forecast_with_weather = pipeline.predict_df(
    #join df with weather_df on ds
    df.merge(weather_df, on='ds'),
    prediction_length=24,
    #weather_df where ds is greater than the max of df.ds.max()
    future_df = future_weather_df,
    # future_df = future_df.head(24),
    # quantile_levels=[0.1, 0.5, 0.9],
    quantile_levels=[0.5],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)

Predicting forecast with weather


In [76]:
weather_df.tail()

Unnamed: 0,ds,temperature_2m,relative_humidity_2m,dew_point_2m,apparent_temperature,precipitation,rain,snowfall,snow_depth,weather_code,pressure_msl,surface_pressure,cloud_cover,cloud_cover_low,cloud_cover_mid,cloud_cover_high,wind_speed_10m,wind_direction_10m,wind_gusts_10m,precipitation_probability
45096,2026-02-23 00:00:00,-4.811,93.0,-5.768622,-9.16297,0.0,0.0,0.0,0.16,3.0,1017.099976,988.330322,100.0,10.0,100.0,100.0,10.182337,44.999897,28.799999,13.0
45097,2026-02-23 01:00:00,-4.861,90.0,-6.248207,-9.584201,0.0,0.0,0.0,0.16,3.0,1016.299988,987.547668,100.0,14.0,100.0,100.0,12.387348,54.462238,34.200001,15.0
45098,2026-02-23 02:00:00,-4.511,86.0,-6.497601,-9.803611,0.0,0.0,0.0,0.16,3.0,1015.5,986.807007,100.0,19.0,100.0,100.0,16.119801,66.297348,40.68,15.0
45099,2026-02-23 03:00:00,-4.361,83.0,-6.81355,-10.074653,0.0,0.0,0.0,0.16,3.0,1014.799988,986.142578,100.0,25.0,100.0,100.0,18.792551,69.829269,44.639996,15.0
45100,2026-02-23 04:00:00,-4.711,84.0,-7.000731,-10.453012,0.0,0.0,0.0,0.16,3.0,1014.299988,985.619812,100.0,31.0,100.0,100.0,18.861389,66.370613,44.279999,15.0


In [77]:
# All variables forecast
print('Predicting all variables forecast')
all_variable_df = add_holiday_flags(df_with_staffing, ts_col='ds', include_names=True).merge(weather_df, on='ds')

forecast_all_vars_with_future = pipeline.predict_df(
    all_variable_df,
    prediction_length=24,
    #future_df should be future_df_with_staffing merged with future_weather_df on 'ds' and 'id'
    future_df = future_df_with_staffing.merge(future_weather_df, on=['ds', 'id']),
    # quantile_levels=[0.1, 0.5, 0.9],
    quantile_levels=[0.5],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)   
forecast_all_vars_with_future.tail()

Predicting all variables forecast


Unnamed: 0,id,ds,target_name,predictions,0.5
1171,jgh,2026-02-17 10:00:00,overflow,6.183309,6.183309
1172,jgh,2026-02-17 11:00:00,overflow,6.868708,6.868708
1173,jgh,2026-02-17 12:00:00,overflow,7.86443,7.86443
1174,jgh,2026-02-17 13:00:00,overflow,8.62498,8.62498
1175,jgh,2026-02-17 14:00:00,overflow,9.383798,9.383798


In [78]:
#join the predictions columns of basic_forecast, forecast_with_holidays, forecast_with_staffing, forecast_with_weather, forecast_all_vars_without_future, forecast_all_vars_with_future on the 'ds' column
basic_forecast = basic_forecast[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'basic_forecast'})
forecast_with_holidays = forecast_with_holidays[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_with_holidays'})
forecast_with_staffing = forecast_with_staffing[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_with_staffing'})
forecast_with_weather = forecast_with_weather[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_with_weather'})
# forecast_all_vars_without_future = forecast_all_vars_without_future[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_all_vars_without_future'})
forecast_all_vars_with_future = forecast_all_vars_with_future[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_all_vars_with_future'})

pred_df = basic_forecast.merge(forecast_with_holidays, on=['ds', 'target_name']).merge(forecast_with_staffing, on=['ds', 'target_name']).merge(forecast_with_weather, on=['ds', 'target_name']).merge(forecast_all_vars_with_future, on=['ds', 'target_name'])
pred_df.head()

Unnamed: 0,ds,target_name,basic_forecast,forecast_with_holidays,forecast_with_staffing,forecast_with_weather,forecast_all_vars_with_future
0,2026-02-16 15:00:00,INFLOW_STRETCHER,10.516043,10.55586,11.033377,10.644041,10.918982
1,2026-02-16 16:00:00,INFLOW_STRETCHER,8.557309,8.572207,8.947393,8.573193,8.874675
2,2026-02-16 17:00:00,INFLOW_STRETCHER,7.060792,7.074705,7.454864,7.167273,7.411076
3,2026-02-16 18:00:00,INFLOW_STRETCHER,5.779593,5.787514,6.193031,5.919795,6.102533
4,2026-02-16 19:00:00,INFLOW_STRETCHER,5.41987,5.417531,5.980149,5.57759,5.844005


In [79]:
# Create a new dataframe with the average % difference of each forecast compared to the basic forecast, for each target_name averaged over all ds
# The output dataframe should only have 1 row per target_name, and columns for the average % difference of each forecast compared to the basic forecast
comparison_df = pred_df.copy()
comparison_df['%diff_holidays'] = (comparison_df['forecast_with_holidays'] - comparison_df['basic_forecast']) / comparison_df['basic_forecast'] * 100
comparison_df['%diff_staffing'] = (comparison_df['forecast_with_staffing'] - comparison_df['basic_forecast']) / comparison_df['basic_forecast'] * 100
comparison_df['%diff_weather'] = (comparison_df['forecast_with_weather'] - comparison_df['basic_forecast']) / comparison_df['basic_forecast'] * 100
comparison_df['%diff_all_vars_with_future'] = (comparison_df['forecast_all_vars_with_future'] - comparison_df['basic_forecast']) / comparison_df['basic_forecast'] * 100
comparison_df = comparison_df.groupby('target_name')[['%diff_holidays', '%diff_staffing', '%diff_weather', '%diff_all_vars_with_future']].mean().reset_index()
comparison_df.head()
comparison_df.to_csv('forecast_variable_effects.csv', index=False)

In [46]:
anomaly_detection_ranges_df = pd.read_csv('https://www.dropbox.com/scl/fi/fjz0am427gw35sz7l994m/anomaly_detection_ranges.csv?rlkey=lib9w0jz2zei5n566jv76o7ol&raw=1')
anomaly_detection_ranges_df.ds = pd.to_datetime(anomaly_detection_ranges_df.ds, errors="coerce")
anomaly_detection_ranges_df.tail()

Unnamed: 0,ds,INFLOW_STRETCHER_yhat,INFLOW_STRETCHER_yhat_lower,INFLOW_STRETCHER_yhat_upper,Infl_Stretcher_cum_yhat,Infl_Stretcher_cum_yhat_lower,Infl_Stretcher_cum_yhat_upper,INFLOW_AMBULATORY_yhat,INFLOW_AMBULATORY_yhat_lower,INFLOW_AMBULATORY_yhat_upper,...,total_tbs_yhat_upper,vert_tbs_yhat,vert_tbs_yhat_lower,vert_tbs_yhat_upper,pod_tbs_yhat,pod_tbs_yhat_lower,pod_tbs_yhat_upper,overflow_yhat,overflow_yhat_lower,overflow_yhat_upper
331,2026-02-17 10:00:00,8.595712,3.554119,13.517478,47.723544,13.078714,82.473518,8.852669,4.014644,13.442297,...,37.956685,16.99528,2.559104,32.85998,3.707365,-1.113128,8.724998,12.28253,4.211947,20.290898
332,2026-02-17 11:00:00,9.687093,4.539101,14.101765,60.644536,24.332223,96.551032,9.295951,4.693106,14.05675,...,39.226278,18.479611,3.080171,32.43485,4.451797,-0.519434,9.610966,12.775318,4.478152,20.883682
333,2026-02-17 12:00:00,10.284545,5.329003,15.11436,68.020177,30.113809,106.32008,8.912751,4.274332,13.676663,...,42.830903,20.955425,5.945452,35.394854,5.33668,0.503265,10.277198,13.486158,5.76339,21.540793
334,2026-02-17 13:00:00,10.425223,5.926774,15.152485,72.147128,32.129208,106.426742,8.28394,3.335691,12.904704,...,47.741766,24.373396,9.681965,38.839372,6.233579,1.234512,11.036601,14.339756,6.060332,22.118518
335,2026-02-17 14:00:00,10.167426,4.909505,15.298731,79.373912,41.276287,118.178607,7.814351,3.365244,12.515837,...,51.090844,27.70049,12.294372,42.764633,6.954979,1.849109,12.033232,15.129058,6.429588,22.820212


In [None]:
targets = pred_df['target_name'].unique().tolist()
output_df = pd.DataFrame()
for target in targets:
    target_df = pred_df[pred_df['target_name'] == target][['ds', 'forecast_all_vars_with_future']].rename(columns={'forecast_all_vars_with_future': target+'_forecast'})
    target_df = target_df.merge(anomaly_detection_ranges_df[['ds', target+'_yhat', target+'_yhat_lower', target+'_yhat_upper']], on=['ds'], how='left')
    target_df[target+'_anomaly'] = ((target_df[target+'_forecast'] < target_df[target+'_yhat_lower']) | (target_df[target+'_forecast'] > target_df[target+'_yhat_upper'])).map({True: 'yes', False: 'no'})
    #assign a colour based on how the value compares to the yhat and the yhat_lower and yhat_upper. If it's an anomaly, colour is #D13438. If it's between yhat and yhat_upper, colour is #FFB900. If it's between yhat_lower and yhat, colour is #107C10. 
    target_df[target+'_colour'] = target_df.apply(lambda row: '#D13438' if row[target+'_anomaly'] == 'yes' else ('#FFB900' if row[target+'_forecast'] > row[target+'_yhat'] else '#107C10'), axis=1)
    if output_df.empty:
        output_df = target_df
    else:
        output_df = output_df.merge(target_df, on='ds', how='outer')

output_df.head()

output_df.to_csv('ED_Hourly_Forecasts_Anomalies_v1.0.csv', index=False)
    

Unnamed: 0,ds,INFLOW_STRETCHER_forecast,INFLOW_STRETCHER_yhat,INFLOW_STRETCHER_yhat_lower,INFLOW_STRETCHER_yhat_upper,INFLOW_STRETCHER_anomaly,INFLOW_STRETCHER_colour,Infl_Stretcher_cum_forecast,Infl_Stretcher_cum_yhat,Infl_Stretcher_cum_yhat_lower,...,pod_tbs_yhat_lower,pod_tbs_yhat_upper,pod_tbs_anomaly,pod_tbs_colour,overflow_forecast,overflow_yhat,overflow_yhat_lower,overflow_yhat_upper,overflow_anomaly,overflow_colour
0,2026-02-16 15:00:00,10.918982,9.458918,4.468796,14.345647,no,#FFB900,101.539413,88.916756,49.788927,...,1.856135,12.153844,no,#FFB900,9.471125,14.360292,6.371663,23.163043,no,#107C10
1,2026-02-16 16:00:00,8.874675,8.669144,3.581215,13.780748,no,#FFB900,108.976799,102.816752,62.397997,...,2.315553,12.439591,no,#FFB900,8.916575,14.469101,6.641635,22.710516,no,#107C10
2,2026-02-16 17:00:00,7.411076,7.864754,2.730241,12.718816,no,#107C10,116.228668,110.376389,74.673301,...,2.071671,11.590717,no,#107C10,7.985712,14.102024,6.239101,21.751197,no,#107C10
3,2026-02-16 18:00:00,6.102533,7.264579,2.211959,12.231943,no,#107C10,122.371773,110.89874,77.90026,...,1.600534,11.515328,no,#107C10,7.184168,13.434902,5.453645,21.708162,no,#107C10
4,2026-02-16 19:00:00,5.844005,6.950492,2.102628,11.563216,no,#107C10,125.699539,111.978559,75.601749,...,0.963935,10.975109,no,#107C10,6.492012,12.707949,5.019035,21.185448,no,#107C10


In [48]:
joined_df

Unnamed: 0,ds,target_name,basic_forecast,forecast_with_holidays,forecast_with_staffing,forecast_with_weather,forecast_all_vars_with_future,INFLOW_STRETCHER_yhat,INFLOW_STRETCHER_yhat_lower,INFLOW_STRETCHER_yhat_upper,...,total_tbs_yhat_upper,vert_tbs_yhat,vert_tbs_yhat_lower,vert_tbs_yhat_upper,pod_tbs_yhat,pod_tbs_yhat_lower,pod_tbs_yhat_upper,overflow_yhat,overflow_yhat_lower,overflow_yhat_upper
0,2026-02-16 15:00:00,total_tbs,37.628384,37.695625,37.563896,38.022236,38.057693,9.458918,4.468796,14.345647,...,53.815014,29.376201,15.367441,43.401781,7.193510,1.856135,12.153844,14.360292,6.371663,23.163043
1,2026-02-16 16:00:00,total_tbs,42.068497,42.213303,41.657764,42.866474,42.227623,8.669144,3.581215,13.780748,...,52.236339,29.544375,14.502433,44.198538,7.205262,2.315553,12.439591,14.469101,6.641635,22.710516
2,2026-02-16 17:00:00,total_tbs,35.643852,35.746296,36.205032,36.480484,36.711891,7.864754,2.730241,12.718816,...,50.777934,28.191093,13.573461,42.566534,6.878355,2.071671,11.590717,14.102024,6.239101,21.751197
3,2026-02-16 18:00:00,total_tbs,30.739222,30.869289,32.788193,31.750641,32.941677,7.264579,2.211959,12.231943,...,49.794723,26.283048,12.062690,40.969106,6.413268,1.600534,11.515328,13.434902,5.453645,21.708162
4,2026-02-16 19:00:00,total_tbs,26.363464,26.584129,27.536945,27.800011,28.464209,6.950492,2.102628,11.563216,...,46.572718,24.503960,9.274812,39.498724,6.023024,0.963935,10.975109,12.707949,5.019035,21.185448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,2026-02-17 10:00:00,overflow,6.341452,6.304688,6.562127,5.287560,5.664505,8.595712,3.554119,13.517478,...,37.956685,16.995280,2.559104,32.859980,3.707365,-1.113128,8.724998,12.282530,4.211947,20.290898
68,2026-02-17 11:00:00,overflow,7.168682,7.100276,7.069561,6.040672,6.253234,9.687093,4.539101,14.101765,...,39.226278,18.479611,3.080171,32.434850,4.451797,-0.519434,9.610966,12.775318,4.478152,20.883682
69,2026-02-17 12:00:00,overflow,8.209742,8.122235,7.834362,7.109168,7.194704,10.284545,5.329003,15.114360,...,42.830903,20.955425,5.945452,35.394854,5.336680,0.503265,10.277198,13.486158,5.763390,21.540793
70,2026-02-17 13:00:00,overflow,9.161466,9.044331,8.628501,7.961152,7.949181,10.425223,5.926774,15.152485,...,47.741766,24.373396,9.681965,38.839372,6.233579,1.234512,11.036601,14.339756,6.060332,22.118518
