In [1]:
from chronos import BaseChronosPipeline, Chronos2Pipeline
import pandas as pd
import os
from dotenv import load_dotenv
import requests
from utils import upload
import dropbox
from pandas.tseries.frequencies import to_offset
import holidays
import torch

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
try:
    chronos_gpu_or_cpu = os.getenv("CHRONOS_GPU_OR_CPU", "gpu").lower()
    if chronos_gpu_or_cpu not in ["gpu", "cpu"]:
        raise ValueError("Invalid value for CHRONOS_GPU_OR_CPU. Must be 'gpu' or 'cpu'.")

    if chronos_gpu_or_cpu == "gpu":


        torch.cuda.set_device(int(os.getenv("CHRONOS_GPU_DEVICE", 0)))

        # Load the Chronos-2 pipeline
        # GPU recommended for faster inference, but CPU is also supported
        pipeline: Chronos2Pipeline = BaseChronosPipeline.from_pretrained(
            "amazon/chronos-2",
            device_map="cuda"
            # device_map="cpu"
    )
    else:
        
        # Load the Chronos-2 pipeline
        # GPU recommended for faster inference, but CPU is also supported
        pipeline: Chronos2Pipeline = BaseChronosPipeline.from_pretrained(
            "amazon/chronos-2",
            device_map="cpu"
    )
except Exception as e:
    print(f"Error loading Chronos-2 pipeline: {e}")
    
    pipeline: Chronos2Pipeline = BaseChronosPipeline.from_pretrained(
    "amazon/chronos-2",
    device_map="cpu")
    raise


In [3]:
def regularize_hourly(g: pd.DataFrame) -> pd.DataFrame:
    """
    Reindex each group's timestamps to strict hourly and fill gaps.
    Works whether the grouping column is present or omitted (include_groups=False).
    """
    # The group key (id) is available as g.name; if ID_COL exists, prefer it.
    sid = g[ID_COL].iloc[0] if ID_COL in g.columns else g.name

    g = g.sort_values(TS_COL)
    full_idx = pd.date_range(g[TS_COL].min(), g[TS_COL].max(), freq="h")
    g = g.set_index(TS_COL).reindex(full_idx)
    g.index.name = TS_COL

    # restore id (constant for the whole group)
    g[ID_COL] = sid

    # numeric + fill for targets
    for col in TARGETS:
        if col in g.columns:
            g[col] = pd.to_numeric(g[col], errors="coerce").ffill().bfill()
    return g.reset_index()

def add_holiday_flags(
    df: pd.DataFrame,
    ts_col: str = "ds",
    local_tz: str = "America/Montreal",
    observed: bool = True,
    include_names: bool = False,
) -> pd.DataFrame:
    """
    Adds boolean columns:
      • is_qc_holiday       — Québec public holiday (CA-QC)
      • is_jewish_holiday   — Israeli public/Jewish holiday (IL)
    Optionally adds:
      • qc_holiday_name
      • jewish_holiday_name

    Notes:
      • Holiday checks are date-based (00:00–24:00 local calendar date),
        not sundown-to-sundown observance.
      • NaT timestamps are ignored gracefully.
    """
    out = df.copy()

    # 1) Parse to datetime
    out[ts_col] = pd.to_datetime(out[ts_col], errors="coerce")

    # 2) Get the calendar DATE to use for holiday lookup
    #    - If tz-aware: convert to Montreal then take .date
    #    - If naive: assume values already represent local Montreal wall-clock; just take .date
    if getattr(out[ts_col].dt, "tz", None) is not None:
        dates_for_calendar = out[ts_col].dt.tz_convert(local_tz).dt.date
    else:
        dates_for_calendar = out[ts_col].dt.date

    # 3) Build a SAFE integer year range for the holiday objects
    years_series = pd.Series(dates_for_calendar)
    years_series = years_series.dropna().map(lambda d: int(pd.Timestamp(d).year))
    if years_series.empty:
        raise ValueError("No valid datetimes found to extract holiday years.")
    years = list(range(int(years_series.min()), int(years_series.max()) + 1))

    # 4) Construct holiday calendars
    qc_holidays = holidays.Canada(subdiv="QC", years=years, observed=observed)
    il_holidays = holidays.Israel(years=years, observed=observed)

   # 5) Flag membership
    out["is_qc_holiday"] = [ ("yes" if d in qc_holidays else "no") if pd.notna(pd.Timestamp(d)) else "no"
                             for d in dates_for_calendar ]
    out["is_jewish_holiday"] = [ ("yes" if d in il_holidays else "no") if pd.notna(pd.Timestamp(d)) else "no"
                                 for d in dates_for_calendar ]

    if include_names:
        out["qc_holiday_name"] = [ qc_holidays.get(d, "no") if pd.notna(pd.Timestamp(d)) else "no"
                                   for d in dates_for_calendar ]
        out["jewish_holiday_name"] = [ il_holidays.get(d, "no") if pd.notna(pd.Timestamp(d)) else "no"
                                       for d in dates_for_calendar ]

    return out

shift_types_dict = {'W1':'flow',
 'X1':'pod',
 'X3':'pod',
 'X4':'vertical',
 'X2':'vertical',
 'WOC1':'oncall',
 'WOC2':'oncall',
 'WOC3':'oncall',
 'X5':'pod',
 'W3':'overlap',
 'Y1':'pod',
 'Y3':'pod',
 'Y4':'vertical',
 'Y2':'vertical',
 'Y5':'pod',
 'Z1':'night',
 'Z2':'night',
 'D1':'pod',
 'R1':'pod',
 'P1':'vertical',
 'D2':'vertical',
 'OC1':'oncall',
 'OC2':'oncall',
 'V1':'flow',
 'A1':'pod',
 'G1':'vertical',
 'E1':'pod',
 'R2':'pod',
 'A2':'pod',
 'P2':'vertical',
 'E2':'vertical',
 'N1':'night',
 'N2':'night',
 'L2':'overlap',
 'L4':'overlap',
 'H1':'teaching',
 'B1':'vertical',
 'L1':'overlap',
 'W5':'overlap',
 'L6':'overlap',
 'B2':'vertical'}

In [4]:
# Load hourly data
df = pd.read_csv(
    'https://www.dropbox.com/scl/fi/s83jig4zews1xz7vhezui/allDataWithCalculatedColumns.csv?rlkey=9mm4zwaugxyj2r4ooyd39y4nl&raw=1')
df.ds = pd.to_datetime(df.ds, errors="coerce")
df['id'] = 'jgh'

hourly_report_df = df.copy()
df.tail()

Unnamed: 0,ds,INFLOW_STRETCHER,Infl_Stretcher_cum,INFLOW_AMBULATORY,Infl_Ambulatory_cum,Inflow_Total,Inflow_Cum_Total,INFLOW_AMBULANCES,Infl_Ambulances_cum,FLS,...,RAZ_IMCONS_MORE4H,RAZ_XRAY_MORE2H,RAZ_CT_MORE2H1,PSYCH1,PSYCH_WAITINGADM,total_tbs,vert_tbs,pod_tbs,overflow,id
44910,2026-02-20 05:00:00,2,13,1,11,3,24,0,7,0,...,0,0,0,15,10,9,7,2,10,jgh
44911,2026-02-20 06:00:00,4,17,1,12,5,29,1,8,0,...,0,0,0,15,10,11,10,1,12,jgh
44912,2026-02-20 07:00:00,3,20,3,15,6,35,1,9,1,...,0,0,0,15,10,11,10,1,12,jgh
44913,2026-02-20 08:00:00,9,29,3,18,12,47,2,11,1,...,0,0,0,15,10,13,12,1,12,jgh
44914,2026-02-20 09:00:00,8,37,6,24,14,61,3,14,0,...,0,0,0,15,10,20,15,5,13,jgh


In [5]:
# Load shift data
all_shifts_df = pd.read_csv('https://www.dropbox.com/scl/fi/yeyr2a7pj6nry8i2q3m0c/all_shifts.csv?rlkey=q1su2h8fqxfnlu7t1l2qe1w0q&raw=1')
all_shifts_df['shift_start'] = pd.to_datetime(all_shifts_df['shift_start']).dt.round('h')
all_shifts_df['shift_end'] = pd.to_datetime(all_shifts_df['shift_end']).dt.round('h')
all_shifts_df['shift_type'] = all_shifts_df['shift_short_name'].map(shift_types_dict)
all_shifts_df.tail()

Unnamed: 0,scheduled_shift_id,group_id,user_id,employee_id,npi,first_name,last_name,facility_id,facility_ext_id,facility_name,...,shift_start,shift_end,shift_hours,work_start,work_end,work_hours,count_as_shift,is_night,is_weekend,shift_type
30013,88897,1,66,,,Wayne,Choi,1,,Jewish General Hospital,...,2026-02-27 16:00:00,2026-02-28 00:00:00,8.0,2/27/2026 16:00,2/28/2026 00:00,8.0,1,0,0,vertical
30014,88898,1,39,,,ThuHang,Tran,1,,Jewish General Hospital,...,2026-02-27 16:00:00,2026-02-28 00:00:00,8.0,2/27/2026 16:00,2/28/2026 00:00,8.0,1,0,0,vertical
30015,88223,1,19,,,Katya,Ghannoum,1,,Jewish General Hospital,...,2026-02-27 16:00:00,2026-02-28 00:00:00,8.0,2/27/2026 16:00,2/28/2026 00:00,8.0,1,0,0,pod
30016,88899,1,61,,,Alexander,Hart,1,,Jewish General Hospital,...,2026-02-28 00:00:00,2026-02-28 08:00:00,8.08,2/27/2026 23:45,2/28/2026 07:50,8.08,1,1,1,night
30017,88900,1,27,,,Tan,Le,1,,Jewish General Hospital,...,2026-02-28 00:00:00,2026-02-28 08:00:00,8.08,2/27/2026 23:45,2/28/2026 07:50,8.08,1,1,1,night


In [6]:
# Create hourly rows
# We'll use a list comprehension to generate the range for each row
expanded_rows = []
for _, row in all_shifts_df.iterrows():
    # Create range. inclusive='left' means [start, end)
    # If start == end (e.g. 0 length shift after rounding), it will be empty, which is correct
    hours = pd.date_range(row['shift_start'], row['shift_end'], freq='h', inclusive='left')
    for h in hours:
        expanded_rows.append({
            'ds': h,
            'user': row['first_name']+row['last_name'],
            'shift_type': row['shift_type'],
            'shift_short_name': row['shift_short_name']
        })

expanded_df = pd.DataFrame(expanded_rows)

expanded_df.tail()

Unnamed: 0,ds,user,shift_type,shift_short_name
273393,2026-02-28 03:00:00,TanLe,night,Z2
273394,2026-02-28 04:00:00,TanLe,night,Z2
273395,2026-02-28 05:00:00,TanLe,night,Z2
273396,2026-02-28 06:00:00,TanLe,night,Z2
273397,2026-02-28 07:00:00,TanLe,night,Z2


In [7]:
# Pivot
# index=timestamp, columns=user_id, values=shift_type
hourly_shifts_by_user_df = expanded_df.pivot_table(
    index='ds', 
    columns='user', 
    values='shift_type', 
    aggfunc='first' # In case of duplicates, take the first
)

# Fill NaNs
hourly_shifts_by_user_df = hourly_shifts_by_user_df.fillna('NotWorking')
hourly_shifts_by_user_df.tail()

user,AlanAzuelos,AlexGuttman,AlexanderHart,AlexandreDostaler,AlexisHaligua,AmelieBellemare,ArzuChaudhry,BernardUnger,DahliaGuttman,DanielMankarios,...,PaulBrisebois,PhilipStasiak,RafaelAroutiunian,SaraAhronheim,ShuoPeng,StephenRosenthal,TanLe,ThuHangTran,WayneChoi,WillGrad
ds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2026-02-28 03:00:00,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking
2026-02-28 04:00:00,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking
2026-02-28 05:00:00,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking
2026-02-28 06:00:00,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking
2026-02-28 07:00:00,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,...,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,NotWorking,night,NotWorking,NotWorking,NotWorking


In [8]:
df.columns.tolist()

['ds',
 'INFLOW_STRETCHER',
 'Infl_Stretcher_cum',
 'INFLOW_AMBULATORY',
 'Infl_Ambulatory_cum',
 'Inflow_Total',
 'Inflow_Cum_Total',
 'INFLOW_AMBULANCES',
 'Infl_Ambulances_cum',
 'FLS',
 'CUM_ADMREQ',
 'CUM_BA1',
 'WAITINGADM',
 'TTStr',
 'TRG_HALLWAY1',
 'TRG_HALLWAY_TBS',
 'reoriented_cum',
 'reoriented_cum_MD',
 'QTRACK1',
 'RESUS',
 'Pod_T',
 'POD_GREEN',
 'POD_GREEN_TBS',
 'POD_YELLOW',
 'POD_YELLOW_TBS',
 'POD_ORANGE',
 'POD_ORANGE_TBS',
 'POD_CONS_MORE2H',
 'POD_IMCONS_MORE4H',
 'POD_XRAY_MORE2H',
 'POD_CT_MORE2H',
 'POST_POD1',
 'VERTSTRET',
 'RAZ_TBS',
 'RAZ_LAZYBOY',
 'RAZ_WAITINGREZ',
 'AMBVERT1',
 'AMBVERTTBS',
 'QTrack_TBS',
 'Garage_TBS',
 'RAZ_CONS_MORE2H',
 'RAZ_IMCONS_MORE4H',
 'RAZ_XRAY_MORE2H',
 'RAZ_CT_MORE2H1',
 'PSYCH1',
 'PSYCH_WAITINGADM',
 'total_tbs',
 'vert_tbs',
 'pod_tbs',
 'overflow',
 'id']

In [9]:
ID_COL = "id"
TS_COL = "ds"
# TARGETS = ['total_tbs', 'Inflow_Total', 'overflow']
# Targets are all columns in df except ds (timestamp) and id
TARGETS = [col for col in df.columns.tolist() if col != TS_COL and col != ID_COL]

df = df.copy()
df[TS_COL] = pd.to_datetime(df[TS_COL], errors="coerce")
df = df.dropna(subset=[TS_COL])

# Snap to exact hours (lowercase 'h' to avoid FutureWarning)
df[TS_COL] = df[TS_COL].dt.floor("h")

# Sort + dedupe
df = df.sort_values([ID_COL, TS_COL]).drop_duplicates(
    [ID_COL, TS_COL], keep="last")





# Call apply with include_groups=False if supported; else fall back
gb = df.groupby(ID_COL, group_keys=False)
try:
    df = gb.apply(regularize_hourly, include_groups=False)
except TypeError:
    # older pandas without include_groups
    df = gb.apply(regularize_hourly)

# Assert truly hourly (accept 'h' and 'H')
g = df[df[ID_COL] == "jgh"].sort_values(TS_COL)
freq = pd.infer_freq(g[TS_COL])
if not freq:
    raise ValueError("No inferable frequency after regularization.")
if to_offset(freq).name.lower() != "h":
    # extra check independent of infer_freq
    diffs = g[TS_COL].diff().dropna()
    bad = g.loc[diffs != pd.Timedelta(hours=1), TS_COL].head(10).tolist()
    raise ValueError(f"Non-1h gaps remain around: {bad}")

In [10]:
# Predict
print('Predicting basic forecast')
basic_forecast = pipeline.predict_df(
    df,
    prediction_length=24,
    # future_df = future_df.head(24),
    # quantile_levels=[0.1, 0.5, 0.9],
    # quantile_levels=[0.5],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)

basic_forecast.head()

Predicting basic forecast


Unnamed: 0,id,ds,target_name,predictions,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
0,jgh,2026-02-20 10:00:00,INFLOW_STRETCHER,9.259979,6.022861,7.113624,7.903517,8.597184,9.259979,9.93662,10.66971,11.582595,12.881701
1,jgh,2026-02-20 11:00:00,INFLOW_STRETCHER,10.477552,7.01764,8.266611,9.152569,9.842384,10.477552,11.11451,11.823692,12.774025,14.126563
2,jgh,2026-02-20 12:00:00,INFLOW_STRETCHER,10.948558,7.238555,8.561578,9.493872,10.252857,10.948558,11.64356,12.407932,13.389277,14.667284
3,jgh,2026-02-20 13:00:00,INFLOW_STRETCHER,11.217377,7.506251,8.834149,9.730947,10.501405,11.217377,11.983734,12.713386,13.650673,14.93862
4,jgh,2026-02-20 14:00:00,INFLOW_STRETCHER,11.070419,7.160436,8.554928,9.540588,10.360786,11.070419,11.801764,12.579137,13.60103,14.940167


In [11]:
basic_forecast.target_name.value_counts()

target_name
INFLOW_STRETCHER       24
Infl_Stretcher_cum     24
INFLOW_AMBULATORY      24
Infl_Ambulatory_cum    24
Inflow_Total           24
Inflow_Cum_Total       24
INFLOW_AMBULANCES      24
Infl_Ambulances_cum    24
FLS                    24
CUM_ADMREQ             24
CUM_BA1                24
WAITINGADM             24
TTStr                  24
TRG_HALLWAY1           24
TRG_HALLWAY_TBS        24
reoriented_cum         24
reoriented_cum_MD      24
QTRACK1                24
RESUS                  24
Pod_T                  24
POD_GREEN              24
POD_GREEN_TBS          24
POD_YELLOW             24
POD_YELLOW_TBS         24
POD_ORANGE             24
POD_ORANGE_TBS         24
POD_CONS_MORE2H        24
POD_IMCONS_MORE4H      24
POD_XRAY_MORE2H        24
POD_CT_MORE2H          24
POST_POD1              24
VERTSTRET              24
RAZ_TBS                24
RAZ_LAZYBOY            24
RAZ_WAITINGREZ         24
AMBVERT1               24
AMBVERTTBS             24
QTrack_TBS             24


In [12]:
# basic_forecast


df_with_holidays = add_holiday_flags(df, ts_col='ds', include_names=True)

#create a dataframe with the next 24 hours timestamps hourly as column 'ds', with column 'id' jgh
future_df = hourly_shifts_by_user_df.reset_index()[hourly_shifts_by_user_df.reset_index()['ds'] > df['ds'].max()]
future_df['id'] = 'jgh'
future_df = add_holiday_flags(future_df, ts_col='ds', include_names=True)

# First, add holiday flags to future_df
future_df_with_added_holidays = add_holiday_flags(future_df, ts_col='ds', include_names=True)

# Then, select only the columns from future_df_with_added_holidays that are also in df_with_holidays
common_columns = [col for col in future_df_with_added_holidays.columns if col in df_with_holidays.columns]
future_df_with_holidays = future_df_with_added_holidays[common_columns]

# Predict
print('Predicting forecast with holidays')  
forecast_with_holidays = pipeline.predict_df(
    df_with_holidays,
    prediction_length=24,
    future_df = future_df_with_holidays.head(24),
    # quantile_levels=[0.1, 0.5, 0.9],
    # quantile_levels=[0.5],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)
forecast_with_holidays.tail()

Predicting forecast with holidays


Unnamed: 0,id,ds,target_name,predictions,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
1171,jgh,2026-02-21 05:00:00,overflow,9.173792,5.619145,6.772789,7.613613,8.37956,9.173792,10.06718,11.077979,12.434795,14.530298
1172,jgh,2026-02-21 06:00:00,overflow,9.519608,5.713805,6.94772,7.853425,8.672404,9.519608,10.469816,11.54313,12.961407,15.139805
1173,jgh,2026-02-21 07:00:00,overflow,9.818166,5.672829,7.006298,7.997849,8.89683,9.818166,10.842305,11.992191,13.484562,15.739632
1174,jgh,2026-02-21 08:00:00,overflow,9.879219,5.567202,6.934124,7.964867,8.905487,9.879219,10.974312,12.214241,13.837315,16.203167
1175,jgh,2026-02-21 09:00:00,overflow,10.153063,5.680309,7.065538,8.121206,9.113364,10.153063,11.312147,12.598232,14.302902,16.874451


In [13]:
df_with_staffing = df.merge(hourly_shifts_by_user_df, on='ds')
future_df_with_staffing = hourly_shifts_by_user_df.reset_index()[hourly_shifts_by_user_df.reset_index()['ds'] > df['ds'].max()]
future_df_with_staffing['id'] = 'jgh'

print('Predicting forecast with staffing')
forecast_with_staffing = pipeline.predict_df(
    df_with_staffing,
    prediction_length=24,
    future_df = future_df_with_staffing.head(24),
    # quantile_levels=[0.1, 0.5, 0.9],
    # quantile_levels=[0.5],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)

forecast_with_staffing.tail()

Predicting forecast with staffing


Unnamed: 0,id,ds,target_name,predictions,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
1171,jgh,2026-02-21 05:00:00,overflow,9.933822,5.590402,6.974916,8.011818,8.962352,9.933822,11.011072,12.162826,13.634123,15.783558
1172,jgh,2026-02-21 06:00:00,overflow,10.016403,5.591339,6.967269,8.027653,9.013744,10.016403,11.107307,12.277895,13.806357,16.161951
1173,jgh,2026-02-21 07:00:00,overflow,10.461024,5.59163,7.103473,8.27339,9.359097,10.461024,11.675286,12.970524,14.595724,16.949081
1174,jgh,2026-02-21 08:00:00,overflow,10.274912,5.252452,6.793679,8.003257,9.125336,10.274912,11.558183,12.953187,14.707758,17.184189
1175,jgh,2026-02-21 09:00:00,overflow,10.16598,5.080356,6.610891,7.82049,8.960226,10.16598,11.512136,12.937328,14.718136,17.299271


In [14]:
weather_df = pd.read_csv('https://www.dropbox.com/scl/fi/gmhwwld9z9yychg4r0yuk/weather.csv?rlkey=66c78m90aviamr0x0uu72pfr8&raw=1')
weather_df.ds = pd.to_datetime(weather_df.ds, errors="coerce")


future_weather_df = weather_df[weather_df.ds > df.ds.max()].head(24)
future_weather_df['id']='jgh'

print('Predicting forecast with weather')
# Predict
forecast_with_weather = pipeline.predict_df(
    #join df with weather_df on ds
    df.merge(weather_df, on='ds'),
    prediction_length=24,
    #weather_df where ds is greater than the max of df.ds.max()
    future_df = future_weather_df,
    # future_df = future_df.head(24),
    # quantile_levels=[0.1, 0.5, 0.9],
    quantile_levels=[0.5],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)

Predicting forecast with weather


In [15]:
weather_df.tail()

Unnamed: 0,ds,temperature_2m,relative_humidity_2m,dew_point_2m,apparent_temperature,precipitation,rain,snowfall,snow_depth,weather_code,pressure_msl,surface_pressure,cloud_cover,cloud_cover_low,cloud_cover_mid,cloud_cover_high,wind_speed_10m,wind_direction_10m,wind_gusts_10m,precipitation_probability
45192,2026-02-27 00:00:00,-5.511,86.0,-7.481047,-10.484428,0.0,0.0,0.0,0.42,3.0,1012.900024,984.175415,100.0,100.0,2.0,0.0,13.276144,282.528809,36.360001,17.0
45193,2026-02-27 01:00:00,-7.361,87.0,-9.153062,-12.215097,0.0,0.0,0.0,0.42,3.0,1013.799988,984.853088,99.0,99.0,1.0,0.0,11.440978,282.724365,30.239998,15.0
45194,2026-02-27 02:00:00,-9.660999,88.0,-11.275159,-14.298566,0.0,0.0,0.0,0.42,3.0,1014.5,985.284485,98.0,98.0,1.0,0.0,8.825508,281.76825,21.24,15.0
45195,2026-02-27 03:00:00,-11.910999,89.0,-13.355215,-16.420048,0.0,0.0,0.0,0.42,3.0,1015.200012,985.717102,95.0,94.0,0.0,0.0,6.989935,281.888641,13.679999,15.0
45196,2026-02-27 04:00:00,-14.061,88.0,-15.615069,-18.494802,0.0,0.0,0.0,0.42,3.0,1015.900024,986.156311,88.0,87.0,0.0,0.0,5.588703,284.931458,9.0,15.0


In [16]:
# All variables forecast
print('Predicting all variables forecast')
all_variable_df = add_holiday_flags(df_with_staffing, ts_col='ds', include_names=True).merge(weather_df, on='ds')

forecast_all_vars_with_future = pipeline.predict_df(
    all_variable_df,
    prediction_length=24,
    #future_df should be future_df_with_staffing merged with future_weather_df on 'ds' and 'id'
    future_df = future_df_with_staffing.merge(future_weather_df, on=['ds', 'id']),
    # quantile_levels=[0.1, 0.5, 0.9],
    quantile_levels=[0.2,0.5,0.8],
    id_column=ID_COL,
    timestamp_column=TS_COL,
    target=TARGETS,
)   
forecast_all_vars_with_future.head()

Predicting all variables forecast


Unnamed: 0,id,ds,target_name,predictions,0.2,0.5,0.8
0,jgh,2026-02-20 10:00:00,INFLOW_STRETCHER,9.942927,7.69179,9.942927,12.291724
1,jgh,2026-02-20 11:00:00,INFLOW_STRETCHER,10.98838,8.656234,10.98838,13.29756
2,jgh,2026-02-20 12:00:00,INFLOW_STRETCHER,11.429466,9.037399,11.429466,13.710134
3,jgh,2026-02-20 13:00:00,INFLOW_STRETCHER,11.527231,9.116427,11.527231,13.914976
4,jgh,2026-02-20 14:00:00,INFLOW_STRETCHER,11.150646,8.586011,11.150646,13.710413


In [17]:
#join the predictions columns of basic_forecast, forecast_with_holidays, forecast_with_staffing, forecast_with_weather, forecast_all_vars_without_future, forecast_all_vars_with_future on the 'ds' column
basic_forecast = basic_forecast[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'basic_forecast'})
forecast_with_holidays = forecast_with_holidays[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_with_holidays'})
forecast_with_staffing = forecast_with_staffing[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_with_staffing'})
forecast_with_weather = forecast_with_weather[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_with_weather'})
# forecast_all_vars_without_future = forecast_all_vars_without_future[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_all_vars_without_future'})
forecast_all_vars_with_future_to_merge = forecast_all_vars_with_future[['ds', 'target_name', 'predictions']].rename(columns={'predictions':'forecast_all_vars_with_future'})

pred_df = basic_forecast.merge(forecast_with_holidays, on=['ds', 'target_name']).merge(forecast_with_staffing, on=['ds', 'target_name']).merge(forecast_with_weather, on=['ds', 'target_name']).merge(forecast_all_vars_with_future_to_merge, on=['ds', 'target_name'])
pred_df.head()

Unnamed: 0,ds,target_name,basic_forecast,forecast_with_holidays,forecast_with_staffing,forecast_with_weather,forecast_all_vars_with_future
0,2026-02-20 10:00:00,INFLOW_STRETCHER,9.259979,9.234007,10.217232,9.478157,9.942927
1,2026-02-20 11:00:00,INFLOW_STRETCHER,10.477552,10.477982,11.229727,10.718143,10.98838
2,2026-02-20 12:00:00,INFLOW_STRETCHER,10.948558,10.932492,11.621191,11.134229,11.429466
3,2026-02-20 13:00:00,INFLOW_STRETCHER,11.217377,11.177794,11.77276,11.308363,11.527231
4,2026-02-20 14:00:00,INFLOW_STRETCHER,11.070419,11.023916,11.438702,10.999893,11.150646


In [28]:
# Create a new dataframe with the average % difference of each forecast compared to the basic forecast, for each target_name averaged over all ds
# The output dataframe should only have 1 row per target_name, and columns for the average % difference of each forecast compared to the basic forecast
comparison_df = pred_df.copy()
comparison_df['%diff_holidays'] = (comparison_df['forecast_with_holidays'] - comparison_df['basic_forecast']) / comparison_df['basic_forecast'] * 100
comparison_df['%diff_staffing'] = (comparison_df['forecast_with_staffing'] - comparison_df['basic_forecast']) / comparison_df['basic_forecast'] * 100
comparison_df['%diff_weather'] = (comparison_df['forecast_with_weather'] - comparison_df['basic_forecast']) / comparison_df['basic_forecast'] * 100
comparison_df['%diff_all_vars_with_future'] = (comparison_df['forecast_all_vars_with_future'] - comparison_df['basic_forecast']) / comparison_df['basic_forecast'] * 100
comparison_df.to_csv('forecast_variable_effects_hourly.csv', index=False)

comparison_df_mean = comparison_df.groupby('target_name')[['%diff_holidays', '%diff_staffing', '%diff_weather', '%diff_all_vars_with_future']].mean().reset_index()
# comparison_df_mean.head()
comparison_df_mean.to_csv('forecast_variable_effects.csv', index=False)

In [19]:
anomaly_detection_ranges_df = pd.read_csv('https://www.dropbox.com/scl/fi/fjz0am427gw35sz7l994m/anomaly_detection_ranges.csv?rlkey=lib9w0jz2zei5n566jv76o7ol&raw=1')
anomaly_detection_ranges_df.ds = pd.to_datetime(anomaly_detection_ranges_df.ds, errors="coerce")
anomaly_detection_ranges_df.tail()

Unnamed: 0,ds,INFLOW_STRETCHER_yhat,INFLOW_STRETCHER_yhat_lower,INFLOW_STRETCHER_yhat_upper,Infl_Stretcher_cum_yhat,Infl_Stretcher_cum_yhat_lower,Infl_Stretcher_cum_yhat_upper,INFLOW_AMBULATORY_yhat,INFLOW_AMBULATORY_yhat_lower,INFLOW_AMBULATORY_yhat_upper,...,total_tbs_yhat_upper,vert_tbs_yhat,vert_tbs_yhat_lower,vert_tbs_yhat_upper,pod_tbs_yhat,pod_tbs_yhat_lower,pod_tbs_yhat_upper,overflow_yhat,overflow_yhat_lower,overflow_yhat_upper
331,2026-02-21 18:00:00,6.301807,1.103358,11.217936,102.632832,67.984845,138.1263,5.117596,0.151011,10.019865,...,46.921093,25.323647,9.975307,39.813873,5.79805,0.796935,10.630575,10.689771,2.956672,19.18952
332,2026-02-21 19:00:00,5.941592,1.448541,10.627467,102.932254,67.870207,139.554813,4.585238,0.078139,9.674305,...,45.40465,23.235929,7.361531,36.070625,5.332239,0.128474,10.586323,9.804666,1.837117,17.274983
333,2026-02-21 20:00:00,5.738804,0.872201,10.598413,112.025845,71.889668,149.298087,4.232222,-0.687425,9.215222,...,43.533104,21.381158,6.559838,35.870611,5.05685,0.288353,10.19906,9.051571,0.932291,16.803409
334,2026-02-21 21:00:00,5.431705,0.736415,10.182918,128.355677,93.43523,163.837459,3.87169,-1.215276,8.54141,...,41.625619,19.623135,3.946191,34.434043,4.931963,-0.363304,10.129298,8.490576,0.248556,16.385808
335,2026-02-21 22:00:00,4.833551,0.247119,9.66827,138.525124,103.245362,177.479407,3.292923,-1.541375,8.196968,...,38.747837,18.096434,3.05543,33.128349,4.808311,-0.125537,10.122132,8.078795,0.223009,16.277714


In [20]:
recent_df = df.tail(24).copy()
targets = recent_df.columns.tolist()
#remove ds and id from targets
targets = [t for t in targets if t not in ['ds', 'id']]

# Merge recent_df with anomaly_detection_ranges_df on 'ds' to align the data
recent_df = recent_df.merge(anomaly_detection_ranges_df, on='ds', how='left')

for target in targets:
    recent_df[target+'_hist_anomaly'] = ((recent_df[target] < recent_df[target+'_yhat_lower']) | (recent_df[target] > recent_df[target+'_yhat_upper'])).map({True: 'yes', False: 'no'})
    recent_df[target+'_hist_colour'] = recent_df.apply(lambda row: '#D13438' if row[target+'_hist_anomaly'] == 'yes' else ('#FFB900' if row[target] > row[target+'_yhat'] else ('#107C10' if row[target] < row[target+'_yhat'] else '#000000')), axis=1)

#remove all the columns containing yhat, yhat_lower, yhat_upper from recent_df
recent_df = recent_df[[col for col in recent_df.columns if not any(sub in col for sub in ['yhat', 'yhat_lower', 'yhat_upper'])]]
recent_df.columns.tolist()

  recent_df[target+'_hist_anomaly'] = ((recent_df[target] < recent_df[target+'_yhat_lower']) | (recent_df[target] > recent_df[target+'_yhat_upper'])).map({True: 'yes', False: 'no'})
  recent_df[target+'_hist_colour'] = recent_df.apply(lambda row: '#D13438' if row[target+'_hist_anomaly'] == 'yes' else ('#FFB900' if row[target] > row[target+'_yhat'] else ('#107C10' if row[target] < row[target+'_yhat'] else '#000000')), axis=1)


['ds',
 'INFLOW_STRETCHER',
 'Infl_Stretcher_cum',
 'INFLOW_AMBULATORY',
 'Infl_Ambulatory_cum',
 'Inflow_Total',
 'Inflow_Cum_Total',
 'INFLOW_AMBULANCES',
 'Infl_Ambulances_cum',
 'FLS',
 'CUM_ADMREQ',
 'CUM_BA1',
 'WAITINGADM',
 'TTStr',
 'TRG_HALLWAY1',
 'TRG_HALLWAY_TBS',
 'reoriented_cum',
 'reoriented_cum_MD',
 'QTRACK1',
 'RESUS',
 'Pod_T',
 'POD_GREEN',
 'POD_GREEN_TBS',
 'POD_YELLOW',
 'POD_YELLOW_TBS',
 'POD_ORANGE',
 'POD_ORANGE_TBS',
 'POD_CONS_MORE2H',
 'POD_IMCONS_MORE4H',
 'POD_XRAY_MORE2H',
 'POD_CT_MORE2H',
 'POST_POD1',
 'VERTSTRET',
 'RAZ_TBS',
 'RAZ_LAZYBOY',
 'RAZ_WAITINGREZ',
 'AMBVERT1',
 'AMBVERTTBS',
 'QTrack_TBS',
 'Garage_TBS',
 'RAZ_CONS_MORE2H',
 'RAZ_IMCONS_MORE4H',
 'RAZ_XRAY_MORE2H',
 'RAZ_CT_MORE2H1',
 'PSYCH1',
 'PSYCH_WAITINGADM',
 'total_tbs',
 'vert_tbs',
 'pod_tbs',
 'overflow',
 'id',
 'INFLOW_STRETCHER_hist_anomaly',
 'INFLOW_STRETCHER_hist_colour',
 'Infl_Stretcher_cum_hist_anomaly',
 'Infl_Stretcher_cum_hist_colour',
 'INFLOW_AMBULATORY_hist_a

In [21]:
targets = pred_df['target_name'].unique().tolist()
output_df = pd.DataFrame()
for target in targets:
    target_df = forecast_all_vars_with_future[forecast_all_vars_with_future['target_name'] == target][['ds', 'predictions', '0.2', '0.8']].rename(columns={'predictions': target+'_forecast', '0.2': target+'_forecast_lower', '0.8': target+'_forecast_upper'})
    # target_df = pred_df[pred_df['target_name'] == target][['ds', 'forecast_all_vars_with_future']].rename(columns={'forecast_all_vars_with_future': target+'_forecast'})
    target_df = target_df.merge(anomaly_detection_ranges_df[['ds', target+'_yhat', target+'_yhat_lower', target+'_yhat_upper']], on=['ds'], how='left')
    target_df[target+'_anomaly'] = ((target_df[target+'_forecast'] < target_df[target+'_yhat_lower']) | (target_df[target+'_forecast'] > target_df[target+'_yhat_upper'])).map({True: 'yes', False: 'no'})
    #assign a colour based on how the value compares to the yhat and the yhat_lower and yhat_upper. If it's an anomaly, colour is #D13438. If it's between yhat and yhat_upper, colour is #FFB900. If it's between yhat_lower and yhat, colour is #107C10. 
    target_df[target+'_colour'] = target_df.apply(lambda row: '#D13438' if row[target+'_anomaly'] == 'yes' else ('#FFB900' if row[target+'_forecast'] > row[target+'_yhat'] else '#107C10'), axis=1)
    #remove all the columns containing yhat, yhat_lower, yhat_upper from recent_df
    target_df = target_df[[col for col in target_df.columns if not any(sub in col for sub in ['yhat', 'yhat_lower', 'yhat_upper'])]]
    if output_df.empty:
        output_df = target_df
    else:
        output_df = output_df.merge(target_df, on='ds', how='outer')

output_df = output_df.merge(recent_df, on='ds', how='outer')

#Merge output_df with anomaly_detection_ranges_df on 'ds' to align the data, but only keep the rows where ds is in output_df
output_df = output_df.merge(anomaly_detection_ranges_df, on='ds', how='inner')

today_mtl = pd.Timestamp.now(tz="America/Montreal").normalize().tz_localize(None)
output_df['ds_date'] = output_df['ds'].dt.date
# Create new column where yesterday is -1, today is 0, tomorrow is 1, etc.
output_df['day_offset'] = (output_df['ds_date'] - today_mtl.date()).apply(lambda x: x.days)
output_df = output_df.drop(columns=['ds_date'])

output_df.to_csv('ED_Hourly_Forecasts_Anomalies_v1.0.csv', index=False)

output_df.head()
    

Unnamed: 0,ds,INFLOW_STRETCHER_forecast,INFLOW_STRETCHER_forecast_lower,INFLOW_STRETCHER_forecast_upper,INFLOW_STRETCHER_anomaly,INFLOW_STRETCHER_colour,Infl_Stretcher_cum_forecast,Infl_Stretcher_cum_forecast_lower,Infl_Stretcher_cum_forecast_upper,Infl_Stretcher_cum_anomaly,...,vert_tbs_yhat,vert_tbs_yhat_lower,vert_tbs_yhat_upper,pod_tbs_yhat,pod_tbs_yhat_lower,pod_tbs_yhat_upper,overflow_yhat,overflow_yhat_lower,overflow_yhat_upper,day_offset
0,2026-02-19 10:00:00,,,,,,,,,,...,14.949018,0.516989,29.773802,3.907546,-0.990457,8.94066,11.401044,3.317386,19.31688,-1
1,2026-02-19 11:00:00,,,,,,,,,,...,16.386871,1.885582,30.860193,4.652472,-0.169762,9.772839,11.872931,3.422834,20.423222,-1
2,2026-02-19 12:00:00,,,,,,,,,,...,18.823786,4.448782,33.960537,5.538141,0.532,10.903928,12.566954,4.267964,20.568006,-1
3,2026-02-19 13:00:00,,,,,,,,,,...,22.211895,8.608084,36.946444,6.434409,1.957628,11.141959,13.407126,6.048382,21.180993,-1
4,2026-02-19 14:00:00,,,,,,,,,,...,25.517523,11.218617,39.618544,7.152353,2.128322,12.03592,14.185435,6.201688,21.914063,-1


In [22]:
output_df.tail()

Unnamed: 0,ds,INFLOW_STRETCHER_forecast,INFLOW_STRETCHER_forecast_lower,INFLOW_STRETCHER_forecast_upper,INFLOW_STRETCHER_anomaly,INFLOW_STRETCHER_colour,Infl_Stretcher_cum_forecast,Infl_Stretcher_cum_forecast_lower,Infl_Stretcher_cum_forecast_upper,Infl_Stretcher_cum_anomaly,...,vert_tbs_yhat,vert_tbs_yhat_lower,vert_tbs_yhat_upper,pod_tbs_yhat,pod_tbs_yhat_lower,pod_tbs_yhat_upper,overflow_yhat,overflow_yhat_lower,overflow_yhat_upper,day_offset
43,2026-02-21 05:00:00,2.454376,1.229041,3.986084,no,#FFB900,13.649837,10.218807,17.578045,no,...,13.059208,-1.7296,29.029552,2.15934,-2.865674,7.127641,8.257291,0.54058,15.965049,1
44,2026-02-21 06:00:00,2.551854,1.332386,4.177228,no,#FFB900,15.117493,11.729889,19.057751,no,...,12.063197,-2.810197,26.987932,2.100804,-2.668805,7.010137,8.396147,0.482594,15.784412,1
45,2026-02-21 07:00:00,3.081324,1.588449,4.971077,no,#107C10,17.047314,13.507004,20.674072,no,...,12.816111,-1.271639,26.335055,2.189605,-2.891863,7.417071,8.566103,0.852934,16.58366,1
46,2026-02-21 08:00:00,4.438673,2.531039,6.733964,no,#107C10,21.663429,17.59325,26.719681,no,...,14.490466,0.267204,28.592306,2.41013,-2.362745,7.416423,8.730531,0.669386,16.824893,1
47,2026-02-21 09:00:00,5.298821,3.264493,7.773528,no,#107C10,27.705975,23.237453,33.195232,no,...,16.057423,1.213836,30.734179,2.775338,-2.333382,7.857081,8.898098,1.218379,17.808147,1


In [23]:
output_df.tail()

Unnamed: 0,ds,INFLOW_STRETCHER_forecast,INFLOW_STRETCHER_forecast_lower,INFLOW_STRETCHER_forecast_upper,INFLOW_STRETCHER_anomaly,INFLOW_STRETCHER_colour,Infl_Stretcher_cum_forecast,Infl_Stretcher_cum_forecast_lower,Infl_Stretcher_cum_forecast_upper,Infl_Stretcher_cum_anomaly,...,vert_tbs_yhat,vert_tbs_yhat_lower,vert_tbs_yhat_upper,pod_tbs_yhat,pod_tbs_yhat_lower,pod_tbs_yhat_upper,overflow_yhat,overflow_yhat_lower,overflow_yhat_upper,day_offset
43,2026-02-21 05:00:00,2.454376,1.229041,3.986084,no,#FFB900,13.649837,10.218807,17.578045,no,...,13.059208,-1.7296,29.029552,2.15934,-2.865674,7.127641,8.257291,0.54058,15.965049,1
44,2026-02-21 06:00:00,2.551854,1.332386,4.177228,no,#FFB900,15.117493,11.729889,19.057751,no,...,12.063197,-2.810197,26.987932,2.100804,-2.668805,7.010137,8.396147,0.482594,15.784412,1
45,2026-02-21 07:00:00,3.081324,1.588449,4.971077,no,#107C10,17.047314,13.507004,20.674072,no,...,12.816111,-1.271639,26.335055,2.189605,-2.891863,7.417071,8.566103,0.852934,16.58366,1
46,2026-02-21 08:00:00,4.438673,2.531039,6.733964,no,#107C10,21.663429,17.59325,26.719681,no,...,14.490466,0.267204,28.592306,2.41013,-2.362745,7.416423,8.730531,0.669386,16.824893,1
47,2026-02-21 09:00:00,5.298821,3.264493,7.773528,no,#107C10,27.705975,23.237453,33.195232,no,...,16.057423,1.213836,30.734179,2.775338,-2.333382,7.857081,8.898098,1.218379,17.808147,1


In [None]:
dropbox_app_key = os.environ.get("DROPBOX_APP_KEY")
dropbox_app_secret = os.environ.get("DROPBOX_APP_SECRET")
dropbox_refresh_token = os.environ.get("DROPBOX_REFRESH_TOKEN")

# exchange the authorization code for an access token:
token_url = "https://api.dropboxapi.com/oauth2/token"
params = {
    "grant_type": "refresh_token",
    "refresh_token": dropbox_refresh_token,
    "client_id": dropbox_app_key,
    "client_secret": dropbox_app_secret
}
r = requests.post(token_url, data=params)

dropbox_access_token = r.json()['access_token']

dbx = dropbox.Dropbox(dropbox_access_token)

upload(dbx, 'chronos_forecast.csv', '', '',
            'chronos_forecast.csv', overwrite=True)
upload(dbx, 'ED_Hourly_Forecasts_Anomalies_v1.0.csv', '', '',
            'ED_Hourly_Forecasts_Anomalies_v1.0.csv', overwrite=True)
upload(dbx, 'forecast_variable_effects.csv', '', '',
            'forecast_variable_effects.csv', overwrite=True)
upload(dbx, 'forecast_variable_effects_hourly.csv', '', '',
            'forecast_variable_effects_hourly.csv', overwrite=True)

uploaded as b'chronos_forecast.csv'
uploaded as b'ED_Hourly_Forecasts_Anomalies_v1.0.csv'
uploaded as b'forecast_variable_effects.csv'


FileMetadata(client_modified=datetime.datetime(2026, 2, 16, 20, 46, 58), content_hash='e67091084ccd8e1bdd5edfb96548e0e89d3c74f2d3983d49269226560633e641', export_info=NOT_SET, file_lock_info=NOT_SET, has_explicit_shared_members=NOT_SET, id='id:oNSmVCFixyQAAAAAAABxQQ', is_downloadable=True, media_info=NOT_SET, name='forecast_variable_effects.csv', parent_shared_folder_id=NOT_SET, path_display='/forecast_variable_effects.csv', path_lower='/forecast_variable_effects.csv', preview_url=NOT_SET, property_groups=NOT_SET, rev='64b424eaa26997a19c0a3', server_modified=datetime.datetime(2026, 2, 20, 14, 33, 39), sharing_info=NOT_SET, size=2710, symlink_info=NOT_SET)