In [32]:
import os
import pandas as pd
from dotenv import load_dotenv
import dropbox
import requests
from utils import upload
import datetime
from zoneinfo import ZoneInfo

load_dotenv('../.env')

now_utc   = datetime.datetime.now(datetime.timezone.utc)
now_local = now_utc.astimezone(ZoneInfo("America/Montreal"))

def call_llm(system_prompt, user_prompt, print_reasoning=True):
    LLM_ENDPOINT   = "https://api.groq.com/openai/v1/chat/completions"
    LLM_API_KEY    = os.environ["GROQ_API_KEY"]
    LLM_MODEL      = "openai/gpt-oss-120b"

    REQ_TIMEOUT = 300  # seconds
    payload = {
        "model": LLM_MODEL,
        "messages": [

            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": user_prompt,
            }
        ],
        # "temperature": 0.5,
        "max_tokens": 5000
    }

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {LLM_API_KEY}"
    }

    resp = requests.post(LLM_ENDPOINT, headers=headers, json=payload, timeout=REQ_TIMEOUT)
    resp.raise_for_status()
    resp_json = resp.json()

    # Adjust extraction to your provider’s schema
    try:
        text = resp_json["choices"][0]["message"]["content"].strip()
        reasoning = resp_json["choices"][0]["message"]["reasoning"].strip()
    except Exception as e:
        raise RuntimeError(f"Unexpected LLM response format: {resp_json}") from e

    if print_reasoning:
        print(reasoning)
        print("\n\n--------------------------\n\n")
    print(text)

# """You are an AI Emergency Department Operations assistant generating an Emergency Department (ED) Flow Report.
#             Describe yesterday's total inflow compared to what was expected, as well as a very brief comment about how busy or not the last week has been.
#             Also describe how many patients are waiting to be seen compared to what is expected.
#             Your target audience is a layperson with no understanding of statistics and modelling."""

# (f"Current date/time: {now_local} \n\n"
#             f"Recent daily inflow: \n{daily_inflow_markdown_table}\n\n"
#             f"Total patients waiting to see a doctor (TBS): \n {total_tbs_markdown_table}, ")

In [33]:
ed_metrics = {
    'INFLOW_STRETCHER': 'Hourly number of stretcher (non-ambulatory) patient arrivals to triage.',
    'Infl_Stretcher_cum': 'Cumulative stretcher inflow count since midnight.',
    'INFLOW_AMBULATORY': 'Hourly number of ambulatory (walk-in) patient arrivals to triage.',
    'Infl_Ambulatory_cum': 'Cumulative ambulatory inflow count since midnight.',
    'Inflow_Total': 'Total hourly patient arrivals (stretcher + ambulatory).',
    'Inflow_Cum_Total': 'Cumulative total inflow since midnight.',
    'INFLOW_AMBULANCES': 'Hourly number of arrivals brought by ambulance.',
    'Infl_Ambulances_cum': 'Cumulative ambulance arrivals since midnight.',
    'FLS': 'Number of patients presenting with flu-like symptoms.',
    'CUM_ADMREQ': 'Cumulative number of patients with an admission request.',
    'CUM_BA1': 'Cumulative number of admitted patients who have had a ward bed assigned and will therefore soon be transferred to a ward and leave the emergency department.',
    'WAITINGADM': 'Number of admitted patients currently waiting for an inpatient bed.',
    'TTStr': 'Total number of patients occupying a stretcher.',
    'TRG_HALLWAY1': 'Number of patients in the Prepod area awaiting a pod stretcher to be assigned. This is an overflow area for patients recently arriving.',
    'TRG_HALLWAY_TBS': 'Number of Prepod patietns waiting to be seen',
    'reoriented_cum': 'Cumulative number of reoriented (redirected) patients since midnight.',
    'reoriented_cum_MD': 'Do not use this metric.',
    'QTRACK1': 'Number of patients currently in QTrack (fast-track).',
    'RESUS': 'Number of patients currently in resuscitation rooms.',
    'Pod_T': 'Total number of patients in all pods (treatment areas).',
    'POD_GREEN': 'Number of patients in Green Pod.',
    'POD_GREEN_TBS': 'Number of Green Pod patients waiting to be seen.',
    'POD_YELLOW': 'Number of patients in Yellow Pod.',
    'POD_YELLOW_TBS': 'Number of Yellow Pod patients waiting to be seen.',
    'POD_ORANGE': 'Number of patients in Orange Pod.',
    'POD_ORANGE_TBS': 'Number of Orange Pod patients waiting to be seen.',
    'POD_CONS_MORE2H': 'Number of pod patients waiting more than 2 hours for a consult.',
    'POD_IMCONS_MORE4H': 'Number of pod patients waiting more than 4 hours for an internal medicine consult.',
    'POD_XRAY_MORE2H': 'Number of pod patients waiting more than 2 hours for X-ray.',
    'POD_CT_MORE2H': 'Number of pod patients waiting more than 2 hours for CT scan.',
    'POST_POD1': 'Number of patients in overflow stretcher areas that we call Family Rooms.',
    'VERTSTRET': 'Number of patients in vertical stretcher zone.',
    'RAZ_TBS': 'Number of RAZ (rapid assessment zone) patients waiting to be seen.',
    'RAZ_LAZYBOY': 'Number of patients in RAZ sitting/observation chairs.',
    'RAZ_WAITINGREZ': 'Number of RAZ patients waiting for results of investigations.',
    'AMBVERT1': 'Number of ambulatory patients.',
    'AMBVERTTBS': 'Number of ambulatory patients waiting to be seen.',
    'QTrack_TBS': 'Number of QTrack patients waiting to be seen.',
    'Garage_TBS': 'Number of patients in overflow/garage area waiting to be seen. We no longer use this metric.',
    'RAZ_CONS_MORE2H': 'Number of RAZ patients waiting more than 2 hours for a consult.',
    'RAZ_IMCONS_MORE4H': 'Number of RAZ patients waiting more than 4 hours for an internal medicine consult.',
    'RAZ_XRAY_MORE2H': 'Number of RAZ patients waiting more than 2 hours for X-ray.',
    'RAZ_CT_MORE2H1': 'Number of RAZ patients waiting more than 2 hours for CT scan.',
    'PSYCH1': 'Number of patients in the psychiatric zone.',
    'PSYCH_WAITINGADM': 'Number of psychiatric patients awaiting admission.',
    'total_tbs': 'Total number of patients currently to be seen across all zones.',
    'vert_tbs': 'Number of vertical (RAZ lazyboy/stretcher and ambulatory) patients currently to be seen.',
    'pod_tbs': 'Number of stretcher (pod) patients currently to be seen.',
    'overflow': 'Number of patients in overflow areas (garage, Prepod, and Postpod/Family Rooms).'
}

ed_metrics_string = "\n".join([f"**{k}**: {v}" for k, v in ed_metrics.items()])



In [34]:
current_df = pd.read_csv('https://www.dropbox.com/scl/fi/4zmm1bpwtrhrqtvce8aqq/current.csv?rlkey=1wnfrlal5jefwkx4fg7jxl20o&raw=1')
current_df.ds = pd.to_datetime(current_df.ds)

anomaly_df = pd.read_csv('https://www.dropbox.com/scl/fi/fjz0am427gw35sz7l994m/anomaly_detection_ranges.csv?rlkey=lib9w0jz2zei5n566jv76o7ol&raw=1')
anomaly_df.ds = pd.to_datetime(anomaly_df.ds)

target_column = 'Inflow_Total'
merged_df = pd.merge(current_df, anomaly_df, on='ds', how='left')
merged_df['anomaly'] = ((merged_df[target_column] < merged_df[target_column+'_yhat_lower']) | (merged_df[target_column] > merged_df[target_column+'_yhat_upper']))
merged_df[['ds', target_column, target_column+'_yhat',target_column+'_yhat_lower',target_column+'_yhat_upper', 'anomaly']]

# show only today's rows, in Montreal time zone
montreal_tz = 'America/Montreal'
today_montreal = pd.Timestamp.now(tz=montreal_tz).normalize()

# assign montreal timezone to 'ds' column
merged_df['ds_montreal'] = merged_df['ds'].dt.tz_localize(montreal_tz)
merged_df_today = merged_df[merged_df['ds_montreal'].dt.normalize() == today_montreal]
output_df = merged_df_today[['ds', target_column, target_column+'_yhat',target_column+'_yhat_lower',target_column+'_yhat_upper', 'anomaly']]


output_df.to_csv('hourly-inflow-today.csv', index=False)

# display as markdown table
print(output_df.to_markdown(index=False))


dropbox_app_key = os.environ.get("DROPBOX_APP_KEY")
dropbox_app_secret = os.environ.get("DROPBOX_APP_SECRET")
dropbox_refresh_token = os.environ.get("DROPBOX_REFRESH_TOKEN")

# exchange the authorization code for an access token:
token_url = "https://api.dropboxapi.com/oauth2/token"
params = {
    "grant_type": "refresh_token",
    "refresh_token": dropbox_refresh_token,
    "client_id": dropbox_app_key,
    "client_secret": dropbox_app_secret
}
r = requests.post(token_url, data=params)
# print(r.text)

dropbox_access_token = r.json()['access_token']

dbx = dropbox.Dropbox(dropbox_access_token)

upload(dbx, 'hourly-inflow-today.csv', '', '',
        'hourly-inflow-today.csv', overwrite=True)

| ds                  |   Inflow_Total |   Inflow_Total_yhat |   Inflow_Total_yhat_lower |   Inflow_Total_yhat_upper | anomaly   |
|:--------------------|---------------:|--------------------:|--------------------------:|--------------------------:|:----------|
| 2025-10-27 19:00:00 |             11 |            12.7233  |                  6.45451  |                  18.9958  | False     |
| 2025-10-27 18:00:00 |             14 |            13.5117  |                  6.94015  |                  20.0328  | False     |
| 2025-10-27 17:00:00 |             15 |            14.7019  |                  8.43458  |                  21.038   | False     |
| 2025-10-27 16:00:00 |              9 |            16.0063  |                  9.5953   |                  22.5404  | True      |
| 2025-10-27 15:00:00 |             19 |            17.0992  |                 10.7087   |                  24.0048  | False     |
| 2025-10-27 14:00:00 |             21 |            17.9149  |                 11.2

FileMetadata(client_modified=datetime.datetime(2025, 10, 27, 23, 47, 35), content_hash='d145d6f1bd1a8c8b8b925da6a9ec65fc40f0df1adc7ffc6c02903c7be93d4d43', export_info=NOT_SET, file_lock_info=NOT_SET, has_explicit_shared_members=NOT_SET, id='id:oNSmVCFixyQAAAAAAABT0Q', is_downloadable=True, media_info=NOT_SET, name='hourly-inflow-today.csv', parent_shared_folder_id=NOT_SET, path_display='/hourly-inflow-today.csv', path_lower='/hourly-inflow-today.csv', preview_url=NOT_SET, property_groups=NOT_SET, rev='6422c862078d57a19c0a3', server_modified=datetime.datetime(2025, 10, 27, 23, 47, 36), sharing_info=NOT_SET, size=1775, symlink_info=NOT_SET)

In [35]:
allDataWithCalculatedColumns_df = pd.read_csv('https://www.dropbox.com/scl/fi/s83jig4zews1xz7vhezui/allDataWithCalculatedColumns.csv?rlkey=9mm4zwaugxyj2r4ooyd39y4nl&raw=1')
allDataWithCalculatedColumns_df.ds = pd.to_datetime(allDataWithCalculatedColumns_df.ds)
allDataWithCalculatedColumns_df.tail()

current = allDataWithCalculatedColumns_df.tail(1)
last_timestamp = allDataWithCalculatedColumns_df.iloc[-1].ds

df = allDataWithCalculatedColumns_df

anomaly_ranges = pd.read_csv('https://www.dropbox.com/scl/fi/fjz0am427gw35sz7l994m/anomaly_detection_ranges.csv?rlkey=lib9w0jz2zei5n566jv76o7ol&raw=1')
anomaly_ranges.ds = pd.to_datetime(anomaly_ranges.ds)
anomaly_ranges.tail(5)

def prepare_metric_table(target_metric):
    # current_target_metric = df.iloc[-1][target_metric]

    merged_current_with_anomaly_ranges = current.merge(anomaly_ranges, on='ds', how='left')

    metric_markdown_table = merged_current_with_anomaly_ranges[['ds',target_metric,target_metric+'_yhat',target_metric+'_yhat_lower',target_metric+'_yhat_upper']].to_markdown(index=False)
    # print(metric_markdown_table)

    return metric_markdown_table

tables = []
# for metric in ['total_tbs', 'pod_tbs', 'vert_tbs', 'overflow']:
for metric in ['INFLOW_STRETCHER', 'Infl_Stretcher_cum', 'INFLOW_AMBULATORY',
       'Infl_Ambulatory_cum', 'Inflow_Total', 'Inflow_Cum_Total',
       'INFLOW_AMBULANCES', 'Infl_Ambulances_cum', 'FLS', 'CUM_ADMREQ',
       'CUM_BA1', 'WAITINGADM', 'TTStr', 'TRG_HALLWAY1', 'TRG_HALLWAY_TBS',
       'reoriented_cum', 'reoriented_cum_MD', 'QTRACK1', 'RESUS', 'Pod_T',
       'POD_GREEN', 'POD_GREEN_TBS', 'POD_YELLOW', 'POD_YELLOW_TBS',
       'POD_ORANGE', 'POD_ORANGE_TBS', 'POD_CONS_MORE2H', 'POD_IMCONS_MORE4H',
       'POD_XRAY_MORE2H', 'POD_CT_MORE2H', 'POST_POD1', 'VERTSTRET', 'RAZ_TBS',
       'RAZ_LAZYBOY', 'RAZ_WAITINGREZ', 'AMBVERT1', 'AMBVERTTBS', 'QTrack_TBS',
       'Garage_TBS', 'RAZ_CONS_MORE2H', 'RAZ_IMCONS_MORE4H', 'RAZ_XRAY_MORE2H',
       'RAZ_CT_MORE2H1', 'PSYCH1', 'PSYCH_WAITINGADM', 'total_tbs', 'vert_tbs',
       'pod_tbs', 'overflow']:
    tables.append(prepare_metric_table(metric))

tables_string = '\n\n'.join(tables)
print(tables_string)

| ds                  |   INFLOW_STRETCHER |   INFLOW_STRETCHER_yhat |   INFLOW_STRETCHER_yhat_lower |   INFLOW_STRETCHER_yhat_upper |
|:--------------------|-------------------:|------------------------:|------------------------------:|------------------------------:|
| 2025-10-27 19:00:00 |                  8 |                 6.89271 |                       2.05722 |                       11.5545 |

| ds                  |   Infl_Stretcher_cum |   Infl_Stretcher_cum_yhat |   Infl_Stretcher_cum_yhat_lower |   Infl_Stretcher_cum_yhat_upper |
|:--------------------|---------------------:|--------------------------:|--------------------------------:|--------------------------------:|
| 2025-10-27 19:00:00 |                  147 |                     110.6 |                          75.644 |                         145.601 |

| ds                  |   INFLOW_AMBULATORY |   INFLOW_AMBULATORY_yhat |   INFLOW_AMBULATORY_yhat_lower |   INFLOW_AMBULATORY_yhat_upper |
|:--------------------|--

In [None]:
call_llm(
    system_prompt="You are an AI Emergency Department Operations assistant generating an Emergency Department (ED) Flow Report. "
                  "Summarize today's hourly inflow compared to what was expected. "
                  "Also describe how many patients are waiting to be seen compared to what is expected. Refer to this as TBS (Total patients waiting to be seen). "
                  "Your target audience is a layperson with no understanding of statistics and modelling."
                  "Summarize the data given in the table with key insights, no markdown tables in the output."
                  "Focus on anomalies and significant deviations from expected values, don't spend time on normal data points."
                  "Keep the response concise and to the point, minimal explanations, short bullets."
                  "Use simple language that a non-expert can understand."
                  "Don't refer to specific numbers unless they are particularly noteworthy."
                  "Don't refer to the model or prediction intervals, just focus on whether inflow was higher or lower than expected."
                  "Limit your response to 2 or 3 bullet points.",
    user_prompt=(f"Current date/time: {now_local} \n\n"
                f"Today's hourly inflow: \n{output_df.to_markdown(index=False)}\n\n"
                f"List of metrics and their descriptions: \n {ed_metrics_string}\n\n"
                f"Metrics with anomaly ranges: \n {tables_string}, "
                )
)

We need to produce 2-3 bullet points, focusing on anomalies and significant deviations. Look at hourly inflow vs expected. All anomaly column false. But we need to note where inflow higher or lower than expected notably. Check each hour: compare Inflow_Total to yhat.

14:00: actual 20 vs expected 17.9 -> slightly higher but within range (lower 11.7 upper 24.5). Not notable? maybe moderate high.

13:00: 20 vs 18.6 slightly higher.

12:00: 23 vs 19.0 higher, approaching upper 25.6; maybe notable.

11:00: 20 vs 18.7 slight higher.

10:00: 18 vs 17.1 slight higher.

9:00: 19 vs 13.9 higher, near upper 19.9; could be notable.

8:00: 4 vs 9.8 lower than expected; quite lower.

7:00: 6 vs 6.1 about equal.

6:00: 5 vs 3.6 higher but within.

5:00: 6 vs 2.8 higher.

4:00: 3 vs 3.0 equal.

3:00:5 vs 3.6 higher.

2:00:3 vs 4.1 lower.

1:00:6 vs 4.7 higher.

0:00:1 vs 5.9 lower.

So biggest deviations: 8 AM low (4 vs 9.8), 9 AM high (19 vs 13.9), 12 PM high (23 vs 19), maybe overall TBS (total pat

In [36]:
call_llm(
    system_prompt="You are an AI Emergency Department Operations assistant generating an Emergency Department (ED) Flow Report. "
                  "Summarize today's hourly inflow compared to what was expected. "
                  "Also describe how many patients are waiting to be seen compared to what is expected. Refer to this as TBS (Total patients waiting to be seen). "
                  "Also highlight any other notable metrics that are outside anomaly ranges."
                  "Your target audience is a layperson with no understanding of statistics and modelling."
                  "Summarize the data given in the table with key insights, no markdown tables in the output."
                  "Focus on anomalies and significant deviations from expected values, don't spend time on normal data points."
                  "Keep the response concise and to the point, minimal explanations, short bullets."
                  "Use simple language that a non-expert can understand."
                  "Don't refer to specific numbers unless they are particularly noteworthy."
                  "Don't refer to the model or prediction intervals, just focus on the actual metric number and a qualitative description of how it fits in the expected range."
                #   "Limit your response to 5 highlights."
                  "Use markdown formatting for the output.",
    user_prompt=(f"Current date/time: {now_local} \n\n"
                f"Today's hourly inflow: \n{output_df.to_markdown(index=False)}\n\n"
                f"List of metrics and their descriptions: \n {ed_metrics_string}\n\n"
                f"Metrics with anomaly ranges: \n {tables_string}, "
                ), print_reasoning=False
)



**ED Flow Snapshot – 27 Oct 2025 (19 hrs)**  

- **Hourly arrivals:**  
  - Most hours matched expectations.  
  - **16:00 hr** saw a **sharp dip** (only 9 arrivals vs. the usual ~16). This is the only hour flagged as an out‑lier.  

- **Patients waiting to be seen (TBS):**  
  - **Total waiting now:** **≈24** patients. This sits comfortably inside the normal range (15‑48) but is on the lower‑mid side, indicating the department is not overloaded at the moment.  

- **Key areas showing unusual activity:**  
  - **Resuscitation rooms:** 7 patients – higher than the typical 4‑6 range.  
  - **X‑ray wait in pods:** 9 patients – well above the normal 0‑5 range, suggesting a bottleneck for imaging.  
  - **CT‑scan wait in pods:** 7 patients – slightly above the usual 0‑6, also pointing to imaging delays.  
  - **Rapid‑Assessment consults:** 0 patients waiting – lower than the expected 1‑12, indicating very fast turnover for consults in that zone.  
  - **Overflow (garage/Pre‑pod/Family Rooms