In [132]:
import pandas as pd

In [133]:
calls = pd.read_excel('../data/Winchester_EMS_calls_RAW.xls')
calls.head()

Unnamed: 0,IncidentNumber,CallDescription,Longitude,Latitude,CallDateTime
0,16-048239,Sick Person,-78.164122,39.171316,2016-12-01 05:35:23
1,16-048253,Falls,-78.170729,39.16761,2016-12-01 10:03:51
2,16-048320,Falls,-78.159111,39.17552,2016-12-01 16:58:41
3,16-048340,Sick Person,-78.186034,39.138891,2016-12-01 20:42:58
4,16-048354,Sick Person,-78.165961,39.187564,2016-12-01 23:16:32


In [134]:
# Dropping the IncidentNumber column as it is not needed for training
calls = calls.drop(columns=['IncidentNumber'])
# Changing CallDateTime to datetime
calls['CallDateTime'] = pd.to_datetime(calls['CallDateTime'])
calls.head()

Unnamed: 0,CallDescription,Longitude,Latitude,CallDateTime
0,Sick Person,-78.164122,39.171316,2016-12-01 05:35:23
1,Falls,-78.170729,39.16761,2016-12-01 10:03:51
2,Falls,-78.159111,39.17552,2016-12-01 16:58:41
3,Sick Person,-78.186034,39.138891,2016-12-01 20:42:58
4,Sick Person,-78.165961,39.187564,2016-12-01 23:16:32


In [135]:
# Find the unique values in the CallDescription column
calls['CallDescription'].unique()

array(['Sick Person', 'Falls', 'Unknown Problem/Person Down',
       'Overdose/Poisoning/Ingestion', 'Chest Pain (Non-Traumatic)',
       'Hemorrhage/Laceration', 'Traumatic Injury',
       'Unconscious/Fainting/Near-Fainting', 'Cardiac Arrest/Death',
       'Z-Other', 'Back Pain (Non-Traumatic)',
       'Traffic/Transportation Incident', 'Heart Problems/AICD',
       'Breathing Problem', 'Pregnancy/Childbirth/Miscarriage',
       'Abdominal Pain/Problems', 'Convulsions/Seizure',
       'Diabetic Problem', 'Stroke/CVA', 'Burns/Explosion',
       'Allergic Reaction/Stings',
       'Carbon Monoxide/Hazmat/Inhalation/CBRN',
       'No Other Appropriate Choice',
       'Industrial Accident/Inaccessible Incident/Other Entrapments (Non-Vehicle)',
       'Assault', 'Headache',
       'Psychiatric Problem/Abnormal Behavior/Suicide Attempt',
       'Stab/Gunshot Wound/Penetrating Trauma', 'Animal Bite',
       'Transfer/Interfacility/Palliative Care', 'Medical Alarm',
       'Choking', 'Eye Pro

In [136]:
# Get the values counts
calls['CallDescription'].value_counts()

CallDescription
Sick Person                                                                  8556
Breathing Problem                                                            4536
Falls                                                                        4367
Chest Pain (Non-Traumatic)                                                   3397
Unconscious/Fainting/Near-Fainting                                           2809
No Other Appropriate Choice                                                  2385
Z-Other                                                                      1906
Convulsions/Seizure                                                          1699
Hemorrhage/Laceration                                                        1543
Traffic/Transportation Incident                                              1478
Abdominal Pain/Problems                                                      1251
Overdose/Poisoning/Ingestion                                                  942


In [137]:
# Combine some of the call descriptions into more general categories
def combine_call_descriptions(call_type, mapping):
    for cat, type in mapping.items():
        if call_type in type:
            return cat
    return "Other"

In [138]:
# Mappings for specific call descriptions
mapping_specific = {
        "General Medical Issue": ["Sick Person", "Unknown Problem/Person Down", "Medical Alarm", "No Other Appropriate Choice"],
        "Cardiac & Circulatory Issues": ["Chest Pain (Non-Traumatic)", "Cardiac Arrest/Death", "Heart Problems/AICD", "Stroke/CVA"],
        "Respiratory Issues": ["Breathing Problem", "Choking", "Carbon Monoxide/Hazmat/Inhalation/CBRN"],
        "Neurological Issues": ["Convulsions/Seizure", "Headache"],
        "Diabetic & Metabolic Issues": ["Diabetic Problem", "Heat/Cold Exposure"],
        "Pregnancy & Birth-Related": ["Pregnancy/Childbirth/Miscarriage"],
        "Falls & Minor Trauma": ["Falls", "Back Pain (Non-Traumatic)", "Traumatic Injury"],
        "Serious Trauma & Wounds": ["Stab/Gunshot Wound/Penetrating Trauma", "Hemorrhage/Laceration", "Burns/Explosion"],
        "Vehicle & Industrial Accidents": ["Traffic/Transportation Incident", "Industrial Accident/Inaccessible Incident/Other Entrapments (Non-Vehicle)", "Electrocution/Lightning"],
        "Assault & Violence": ["Assault", "Animal Bite"],
        "Drowning & Environmental Exposure": ["Drowning/Diving/SCUBA Accident"],
        "Mental Health & Suicide": ["Psychiatric Problem/Abnormal Behavior/Suicide Attempt"],
        "Substance-Related Issues": ["Overdose/Poisoning/Ingestion", "Allergic Reaction/Stings"],
        "Non-Urgent Transport & Checks": ["Transfer/Interfacility/Palliative Care", "Well Person Check", "Standby"]
    }

mapping_general = {
        "Medical Emergencies": ["Sick Person", "Unknown Problem/Person Down", "Medical Alarm", "No Other Appropriate Choice", 
                "Chest Pain (Non-Traumatic)", "Cardiac Arrest/Death", "Heart Problems/AICD", "Stroke/CVA",
                "Breathing Problem", "Choking", "Carbon Monoxide/Hazmat/Inhalation/CBRN",
                "Convulsions/Seizure", "Headache", "Diabetic Problem", "Heat/Cold Exposure",
                "Pregnancy/Childbirth/Miscarriage"],
        "Trauma & Injury": ["Falls", "Back Pain (Non-Traumatic)", "Traumatic Injury", "Stab/Gunshot Wound/Penetrating Trauma", 
                "Hemorrhage/Laceration", "Burns/Explosion", "Traffic/Transportation Incident", 
                "Industrial Accident/Inaccessible Incident/Other Entrapments (Non-Vehicle)", "Electrocution/Lightning", 
                "Assault", "Animal Bite", "Drowning/Diving/SCUBA Accident"],
        "Behavioral & Psychiatric Issues": ["Psychiatric Problem/Abnormal Behavior/Suicide Attempt"],
        "Non-Urgent Calls": ["Overdose/Poisoning/Ingestion", "Allergic Reaction/Stings", "Transfer/Interfacility/Palliative Care", 
                "Well Person Check", "Standby"]
}

In [139]:
# Apply the function to the CallDescription column  
calls_specific_col = calls['CallDescription'].apply(combine_call_descriptions, mapping=mapping_specific)
calls_general_col = calls['CallDescription'].apply(combine_call_descriptions, mapping=mapping_general)

In [140]:
# Create two new dataframes with each mapping
calls_specific = calls.copy()
calls_general = calls.copy()

calls_specific['CallDescription'] = calls_specific_col
calls_general['CallDescription'] = calls_general_col

In [141]:
# Save the new dataframes to csv files
calls_specific.to_csv('../data/clean/calls_specific.csv', index=False)
calls_general.to_csv('../data/clean/calls_general.csv', index=False)
calls.to_csv('../data/clean/calls_full.csv', index=False)

FROM HERE DOWN IS SEBASTIAN'S WORK ON CLASSIFYING EACH CALL INTO ITS DISTRICT

In [142]:
import geopandas as gpd
# calls.head()
gdf_points = gpd.GeoDataFrame(calls, geometry=gpd.points_from_xy(calls.Longitude, calls.Latitude), crs="EPSG:4326")

districts = gpd.read_file('../data/First_Due_Areas.geojson')


classified = gpd.sjoin(gdf_points, districts, how="left", predicate="within")



In [143]:
classified.head(100)



Unnamed: 0,CallDescription,Longitude,Latitude,CallDateTime,geometry,index_right,OBJECTID,NAME,STATION_NUM,SHAPESTArea,SHAPESTLength
0,Sick Person,-78.164122,39.171316,2016-12-01 05:35:23,POINT (-78.16412 39.17132),3.0,4.0,SOUTH END,5,4.712386e+07,54023.736232
1,Falls,-78.170729,39.167610,2016-12-01 10:03:51,POINT (-78.17073 39.16761),3.0,4.0,SOUTH END,5,4.712386e+07,54023.736232
2,Falls,-78.159111,39.175520,2016-12-01 16:58:41,POINT (-78.15911 39.17552),0.0,1.0,FRIENDSHIP,1,5.355029e+07,45003.624106
3,Sick Person,-78.186034,39.138891,2016-12-01 20:42:58,POINT (-78.18603 39.13889),2.0,3.0,SHAWNEE,4,9.493346e+07,48784.408074
4,Sick Person,-78.165961,39.187564,2016-12-01 23:16:32,POINT (-78.16596 39.18756),1.0,2.0,ROUSS,2,6.209217e+07,44937.483837
...,...,...,...,...,...,...,...,...,...,...,...
95,Unconscious/Fainting/Near-Fainting,-78.168421,39.175851,2016-12-07 15:52:33,POINT (-78.16842 39.17585),3.0,4.0,SOUTH END,5,4.712386e+07,54023.736232
96,Sick Person,-78.172269,39.186694,2016-12-07 14:28:30,POINT (-78.17227 39.18669),1.0,2.0,ROUSS,2,6.209217e+07,44937.483837
97,Breathing Problem,-78.180535,39.197024,2016-12-08 21:30:47,POINT (-78.18054 39.19702),1.0,2.0,ROUSS,2,6.209217e+07,44937.483837
98,Breathing Problem,-78.172535,39.168882,2016-12-08 18:05:36,POINT (-78.17253 39.16888),3.0,4.0,SOUTH END,5,4.712386e+07,54023.736232


In [144]:
calls_by_district = classified[['CallDescription', 'CallDateTime', 'NAME']]
calls_by_district.to_csv('../data/clean/calls_by_district.csv', index=False)


In [145]:
# We are going to break the day into eight 3-hour time slots. 12:00 am to 3:00 am, 3:00 am to 6:00 am
print(type(calls_by_district['CallDateTime'][0]))

calls_by_district_and_time_segment = calls_by_district.copy()

# make sure the timestamp is in datetime format
calls_by_district_and_time_segment['CallDateTime'] = pd.to_datetime(calls_by_district_and_time_segment['CallDateTime'])

# 1. Day of the week (Sunday–Saturday)
calls_by_district_and_time_segment['weekday'] = calls_by_district_and_time_segment['CallDateTime'].dt.day_name()

# 2. Week of the year (1–52)
calls_by_district_and_time_segment['week_of_year'] = calls_by_district_and_time_segment['CallDateTime'].dt.isocalendar().week

calls_by_district_and_time_segment['year'] = calls_by_district_and_time_segment['CallDateTime'].dt.isocalendar().year

# 3. 3-hour time segments
segment_index = calls_by_district_and_time_segment['CallDateTime'].dt.hour // 3
segment_labels = [
        'MORNING_1',   # 00:00–03:00
        'MORNING_2',   # 03:00–06:00
        'MORNING_3',   # 06:00–09:00
        'MORNING_4',   # 09:00–12:00
        'AFTERNOON_1', # 12:00–15:00
        'AFTERNOON_2', # 15:00–18:00
        'AFTERNOON_3', # 18:00–21:00
        'AFTERNOON_4'  # 21:00–00:00
]
calls_by_district_and_time_segment['time_segment'] = segment_index.map(dict(enumerate(segment_labels)))




<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [146]:


# calls_by_district_and_time_segment = calls_by_district_and_time_segment[['CallDescription', 'NAME', 'weekday', 'week_of_year', 'time_segment']]
#
# calls_by_district_and_time_segment.head(30)
#
# calls_by_district_and_time_segment.to_csv('../data/clean/calls_by_district_and_time_segment.csv', index=False)

calls_by_district_and_time_segment = calls_by_district_and_time_segment[['CallDescription', 'NAME', 'weekday', 'week_of_year', 'time_segment', 'year']]
calls_by_district_and_time_segment.to_csv('../data/clean/calls_by_district_and_time_segment_with_year.csv', index=False)

In [147]:
import pandas as pd

# Load and prep weather data
weather = pd.read_csv("../data/clean/weather_clean.csv")
weather['DATE'] = pd.to_datetime(weather['DATE'])
weather['date'] = weather['DATE'].dt.date  

# DataFrame names
call_df_names = [
    'calls_by_district',
    # 'calls_by_district_and_time_segment',
    # 'calls_by_district_and_time_segment_with_year',
    'calls',
    'calls_general',
    'calls_specific'
]

# Merge and save
for name in call_df_names:
    print(f"doing {name}")
    original_df = globals()[name].copy()
    
    # Ensure datetime and extract date
    original_df['CallDateTime'] = pd.to_datetime(original_df['CallDateTime'])
    original_df['date'] = original_df['CallDateTime'].dt.date
    
    # Merge on 'date'
    merged_df = original_df.merge(weather, on='date', how='left')

    # New variable name
    new_name = f"{name}_weather"
    globals()[new_name] = merged_df

    # Save
    merged_df.to_csv(f"../data/clean/{new_name}.csv", index=False)


doing calls_by_district
doing calls
doing calls_general
doing calls_specific
