In [1]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import json
import pickle
import pandas as pd

load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=api_key)

In [2]:
with open("../data/models/season_models.pkl", "rb") as f:
    models = pickle.load(f)

In [3]:
import pandas as pd

df = pd.read_csv("../data/raw/ems_2025_merged.csv")
print(df['datetime'].min(), df['datetime'].max())

2024-09-01 00:00:11 2025-08-31 23:59:55


In [14]:
hourly_df = pd.read_csv("../data/raw/hourly_location_avg.csv")
monthly_df = pd.read_csv("../data/raw/monthly_location_avg.csv")

# summaries system prompt
borough_avg = hourly_df.groupby('borough')['response_time_min'].mean().round(2).to_dict()
monthly_summary = monthly_df.groupby('month')['avg_response_min'].mean().round(2).to_dict()

#total calls by borough
total_calls = hourly_df.groupby('borough')['incident_count'].sum().to_dict()

#total overall calls
total_calls_overall = hourly_df['incident_count'].sum()

In [17]:
# delayed average response time
avg_response = hourly_df['response_time_min'].mean()
delayed_threshold = avg_response * 1.5  
#50% above average = delayed

# Borough delay stats
borough_stats = df.groupby('borough')['incident_response_sec'].apply(lambda x: (x > delayed_threshold * 60).sum()).to_dict()

''' defined for model '''''


total_delayed = sum(borough_stats.values())


total_incidents = len(df.dropna(subset=['incident_response_sec']))
delayed_pct = round((total_delayed / total_incidents) * 100, 2)

In [18]:
# time questions
fastest = round(df['incident_response_sec'].min() / 60, 2)
slowest = round(df['incident_response_sec'].max() / 60, 2)
average = round(df['incident_response_sec'].mean() / 60, 2)
median = round(df['incident_response_sec'].median() / 60, 2)

# By borough
borough_fastest = (df.groupby('borough')['incident_response_sec'].min() / 60).round(2).to_dict()
borough_slowest = (df.groupby('borough')['incident_response_sec'].max() / 60).round(2).to_dict()
borough_average = (df.groupby('borough')['incident_response_sec'].mean() / 60).round(2).to_dict()

In [19]:
rf_preds = pd.read_csv("../data/raw/hourly_rf_predictions.csv")
print(rf_preds.shape)
print(rf_preds.head())
print(rf_preds.columns.tolist())

(8759, 2)
         datetime_hour  rf_pred_response_sec
0  2024-09-01 00:00:00            699.621980
1  2024-09-01 01:00:00            817.548291
2  2024-09-01 02:00:00            817.265180
3  2024-09-01 03:00:00            696.186797
4  2024-09-01 04:00:00           1010.834855
['datetime_hour', 'rf_pred_response_sec']


In [None]:
rf_preds = pd.read_csv("../data/raw/hourly_rf_predictions.csv")
rf_preds['rf_pred_response_min'] = round(rf_preds['rf_pred_response_sec'] / 60, 2)

# Summarization for model
hourly_pattern = rf_preds.groupby(rf_preds['datetime_hour'].str[11:13])['rf_pred_response_min'].mean().round(2).to_dict()

# overall stats for model predictions
rf_avg = round(rf_preds['rf_pred_response_min'].mean(), 2)
rf_fastest_hour = min(hourly_pattern, key=hourly_pattern.get)
rf_slowest_hour = max(hourly_pattern, key=hourly_pattern.get)

In [41]:
df2 = pd.read_csv(r"..\utku folder\ALL_BOROUGHS_SARIMA_72H.csv")
df2.shape
df2.columns

Index(['datetime_hour', 'actual_calls', 'predicted_calls', 'dataset_type',
       'borough'],
      dtype='object')

In [42]:
df2

Unnamed: 0,datetime_hour,actual_calls,predicted_calls,dataset_type,borough
0,2025-04-01 12:00:00,59.0,56.178399,test,BRONX
1,2025-04-01 13:00:00,57.0,55.915609,test,BRONX
2,2025-04-01 14:00:00,57.0,57.164498,test,BRONX
3,2025-04-01 15:00:00,78.0,55.674593,test,BRONX
4,2025-04-01 16:00:00,56.0,54.674777,test,BRONX
...,...,...,...,...,...
18655,2025-04-04 07:00:00,,5.898401,forecast,STATEN ISLAND / RICHMOND
18656,2025-04-04 08:00:00,,7.621247,forecast,STATEN ISLAND / RICHMOND
18657,2025-04-04 09:00:00,,9.022261,forecast,STATEN ISLAND / RICHMOND
18658,2025-04-04 10:00:00,,9.714381,forecast,STATEN ISLAND / RICHMOND


In [58]:
time = df2['datetime_hour'].value_counts()
print(time)

datetime_hour
2025-04-01 12:00:00    10
2025-04-01 13:00:00    10
2025-04-01 14:00:00    10
2025-04-01 15:00:00    10
2025-04-01 16:00:00    10
                       ..
2025-08-31 19:00:00     5
2025-08-31 20:00:00     5
2025-08-31 21:00:00     5
2025-08-31 22:00:00     5
2025-08-31 23:00:00     5
Name: count, Length: 3660, dtype: int64


In [49]:
test_df = df2[df2["dataset_type"] == "test"]
forecast_df = df2[df2["dataset_type"] == "forecast"]

# summarize forecast by borough
forecast_summary = forecast_df.groupby("borough")["predicted_calls"].sum().to_string()
#  summarize forecast by hour
forecast_context = forecast_df[["datetime_hour", "borough", "predicted_calls"]].to_string(index=False)


So now GPT handles both:

"Predict response time for a CARD call in Brooklyn"  aymans model



"How many incidents are expected in the next 6 hours?" utku SARIMA model

In [22]:
def predict_response_time(season, borough, dispatch_area, initial_type,
                           hour, day, month, is_weekend, is_holiday,
                           is_rush_hour, initial_severity, zipcode,
                           temperature, precipitation, windspeed, weathercode,
                           closest_station_manhattan_miles,
                           special_events=0, standby=0, held=0):  # defaults to 0
    season = season.lower()
    model = models[season]
    features = model.feature_names_in_
    row = {f: 0 for f in features}
    row["closest_station_manhattan_miles"] = closest_station_manhattan_miles
    row["is_weekend"] = is_weekend
    row["hour"] = hour
    row["is_holiday"] = is_holiday
    row["is_rush_hour_1"] = is_rush_hour
    row["initial_severity"] = initial_severity
    row["zipcode"] = zipcode
    row["precipitation"] = precipitation
    row["windspeed"] = windspeed
    row["weathercode"] = weathercode
    row["day"] = day
    row["month"] = month
    row["temperture"]= temperature
    row["held_Y"] = held
    row["standby_Y"] = standby
    row["special_events_Y"] = special_events
    
    
    #one hot encode the categorical variables
    

    borough_key = f"borough_{borough.upper()}"
    if borough_key in row:
        row[borough_key] = 1
    
    dispatch_key = f"dispatch_area_{dispatch_area.upper()}"
    if dispatch_key in row:
        row[dispatch_key] = 1
    
    type_key = f"initial_type_{initial_type.upper()}"
    if type_key in row:
        row[type_key] = 1
    
    df = pd.DataFrame([row])
    # predict response time in seconds
    
    
    
    prediction_seconds = model.predict(df)[0]
    prediction_minutes = round(float(prediction_seconds) / 60, 2)
    return prediction_minutes

 System prompt = values, knowledge, and personality


 Tools/functions = ability to take actions


 Conversation history = memory within a session


 Data feeding = training dat

# "type": "function"          # tells the API this tool is a callable function
# "name": "predict_response_time"   # the function name the model will call
# "description": "..."        # explains to the model WHEN to use this tool
# "parameters": { ... }       # defines what inputs the function needs

In [26]:
'''''  the menu '''

tools = [
    {
        "type": "function",
        "function": {
            "name": "predict_response_time",
            "description": "Predicts NYC 911 EMS response time in minutes based on call details and conditions.",
            "parameters": {
                "type": "object",
                "properties": {
                    "season":       {"type": "string", "enum": ["winter", "spring", "summer", "fall"]},
                    "borough":      {"type": "string", "description": "BROOKLYN, MANHATTAN, QUEENS, RICHMOND / STATEN ISLAND"},
                    "dispatch_area":{"type": "string", "description": "e.g. M1, K3, Q2, B4"},
                    "initial_type": {"type": "string", "description": "Call type code e.g. CARD, TRAUMA, EDP, SICK"},
                    "hour":         {"type": "integer", "description": "Hour of day 0-23"},
                    "day":          {"type": "integer", "description": "Day of month"},
                    "month":        {"type": "integer", "description": "Month 1-12"},
                    "is_weekend":   {"type": "integer", "enum": [0, 1]},
                    "is_holiday":   {"type": "integer", "enum": [0, 1]},
                    "is_rush_hour": {"type": "integer", "enum": [0, 1]},
                    "initial_severity": {"type": "integer", "description": "Severity level"},
                    "zipcode":      {"type": "integer"},
                    "temperature":  {"type": "number", "description": "Temperature in Fahrenheit"},
                    "precipitation":{"type": "number"},
                    "windspeed":    {"type": "number"},
                    "weathercode":  {"type": "integer"},
                    "closest_station_manhattan_miles": {"type": "number"}
                },
                "required": ["season", "borough", "dispatch_area", "initial_type",
                             "hour", "day", "month", "is_weekend", "is_holiday",
                             "is_rush_hour", "initial_severity", "zipcode",
                             "temperature", "precipitation", "windspeed",
                             "weathercode", "closest_station_manhattan_miles"]
            }
        }
    }]

In [59]:
system_prompt = f"""

You are a helpful assistant that answers questions about NYC 911 EMS response times.
Explain things in simple, everyday language that anyone can understand.
Avoid technical terms, jargon, or data science language.
Keep your answers clear, friendly, and conversational.
The predictions are based on NYC EMS data from 2024-09-01 to 2025-08-31,
so your insights should reflect patterns and trends from that time period.
Only answer questions related to NYC 911 EMS response times. If the user asks 
about anything else, politely let them know you can only help with that topic.
Start the conversation with: "Hi! I can help you with NYC 911 EMS response times. What would you like to know?"

The call volume forecast is only available from April 4th to April 8th 2025.
If the user asks about dates outside this range, let them know predictions 
are only available for that window.

Here is the total predicted 911 call volume per borough over the next 72 hours:
{forecast_summary}

Here is the full hour-by-hour breakdown:
{forecast_context}


You have access to a trained ensemble of 4 seasonal machine learning models (winter, spring, summer, fall) 
built with scikit-learn that predict EMS response times in seconds, which you will always convert 
and present to the user in minutes based on:
- Location: borough, dispatch area, zipcode, distance from Manhattan
- Time: hour, day, month, rush hour, weekend, holiday
- Call type: initial_type codes (e.g. CARD, TRAUMA, EDP, SICK)
- Severity: initial_severity
- Weather: temperature, precipitation, windspeed, weathercode
- Operational flags: special events, standby, held

When making predictions:
- Always state the result in minutes AND provide context (e.g. fast/average/slow relative to typical ranges)
- Mention which seasonal model was used and why it matters

When explaining results to a technical audience:
- Reference feature importance, model behavior, and seasonal differences where relevant
- Be specific about what factors are likely driving the prediction

When generating reports or summaries:
- Switch to clear, concise language accessible to any audience
- Highlight actionable insights

For questions outside the model such as hospital locations, NYC EMS facts, or geography, 
use your own training knowledge to answer as accurately as possible.

Always ask for missing inputs conversationally — start with season, borough, call type, and hour, 
then gather remaining details naturally. Never dump a list of 10 questions at once.

When asked general questions like 'which season is best', run the prediction function 
across all 4 seasons using typical average values and compare the results automatically 
without asking the user for input.

You also have access to the following real data summaries:

Average response time by borough (in minutes):
{borough_avg}

Average response time by month:
{monthly_summary}

Use this data when answering questions about trends, borough comparisons, 
or monthly patterns. Always present numbers in minutes.

Total 911 calls by borough (Sep 2024 - Aug 2025):
{total_calls}

Total calls overall: {total_calls_overall}

Delay analysis (a call is considered delayed if response time exceeds {round(delayed_threshold, 1)} minutes):
- Total delayed calls: {total_delayed}
- Percentage of delayed calls: {delayed_pct}%
- Delayed calls by borough: {borough_stats}

Key statistics from the actual dataset (in minutes):
- Fastest response time: {fastest} minutes
- Slowest response time: {slowest} minutes  
- Average response time: {average} minutes
- Median response time: {median} minutes

By borough:
- Fastest per borough: {borough_fastest}
- Slowest per borough: {borough_slowest}
- Average per borough: {borough_average}

Common call type mappings for non-technical users:
- Heart attack / cardiac arrest  CARD
- Broken bone / fracture INJMIN or INJMAJ
- Breathing difficulty  DIFFBR
- Unconscious person UNC
- Car accident  MVA
- Stroke CVA
- Psychiatric emergency EDP
- Stabbing STAB
- Shooting SHOT
- Sick person SICK

When a user describes a situation in plain English, automatically map it to the 
correct call type code without asking them to specify the code.

RF model hourly prediction summary:
- Overall average predicted response time: {rf_avg} minutes
- Fastest hour of day: {rf_fastest_hour}:00 ({hourly_pattern[rf_fastest_hour]} min avg)
- Slowest hour of day: {rf_slowest_hour}:00 ({hourly_pattern[rf_slowest_hour]} min avg)
- Average predicted response by hour: {hourly_pattern}
"""

In [None]:
conversation_history = []

def chat(user_message):
    conversation_history.append({"role": "user", "content": user_message})
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "system", "content": system_prompt},*conversation_history],
        tools=tools,
        tool_choice="auto")
    message = response.choices[0].message
    ## If chatbot wants to call the model respond with a message like:
    if message.tool_calls:
        conversation_history.append(message)
        
        for tool_call in message.tool_calls:
            args = json.loads(tool_call.function.arguments)
            result = predict_response_time(**args)
            
            conversation_history.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": f"Predicted response time: {result} minutes"})
        
        # GPT explains the result
        follow_up = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "system", "content": system_prompt}] + conversation_history
        )
        assistant_message = follow_up.choices[0].message.content
    else:
        assistant_message = message.content
    
    conversation_history.append({"role": "assistant", "content": assistant_message})
    return assistant_message


In [61]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Exiting chat.")
        break
    reply = chat(user_input)
    print(f"Bot: {reply}\n")

Bot: I currently only have the predicted call volume information for April 4th to April 8th, 2025. If you want insights or specifics within those dates, feel free to ask! For complete data from the whole month of April, I don't have that available at the moment. Let me know if there's anything else you'd like to know!

Exiting chat.


In [62]:
# predict the amount of calls for diiferent borough 
# r2 square score for times siers sarimma x 
# Time series model — looks at patterns over time (trends, seasonality, cycles)
# great at answering "what will average response times look like next month?"

# i cant add his model to the chatbot because it only looks at time patterns and not the other features, so it wouldnt be a fair comparison to the random forest model which uses all the features.
# doesn't use features like borough or call type — just historical time patterns
# ----    IGNORE    ----
# respond with details on injury severity, weather conditions, and time of day to provide context for the prediction.
# respond with ems times are needed the most during rush hour on weekdays in Manhattan, especially for severe cardiac calls, and that bad weather can further increase response times.
# make a little  filtered data frame i used grouped by == table shows that average response time by hour location. 
# sampled month to month
