# Link to data - [link](https://open.toronto.ca/dataset/fire-services-basic-incident-details/)

In [1]:
import numpy as np
np.__version__

'1.18.1'

In [2]:
import pandas as pd
pd.__version__

'1.0.1'

In [3]:
import matplotlib
matplotlib.__version__

'3.1.3'

In [4]:
import matplotlib.pyplot as plt

In [5]:
%matplotlib inline

In [6]:
import requests
import os
import pprint as pp

In [7]:
# check to see if folder fire_incidents exists
DIRECTORY = 'fire_incidents'
CHECK_FOLDER = os.path.isdir(DIRECTORY)

# If folder doesn't exist, then create it.
if not CHECK_FOLDER:
    os.makedirs(DIRECTORY)
else:
    print("Folder exists")

Folder exists


In [8]:
# get the homeless_shelter directory
path = os.path.join(os.getcwd(), DIRECTORY)

In [24]:
# get the data dictionary
for dirpath, subdirs, files in os.walk(path):
    subdirs[:] = [d for d in subdirs if d not in '.ipynb_checkpoints' and d not in 'open_data']
    for x in files:
        if x.split(".")[1] == "xlsx":
            data_dict_path = os.path.join(dirpath, x)

# walk through the fire_incidents directory
# to find the file all csv files
file_paths = []
for dirpath, subdirs, files in os.walk(path):
    subdirs[:] = [d for d in subdirs if d not in '.ipynb_checkpoints' and d not in 'open_data']
    for x in files:
        if x.split(".")[1] == "csv":
            if x != "2011-2018-TORONTO_FIRE_INCIDENTS_WEATHER.csv":
                file_paths.append(os.path.join(dirpath, x))
file_paths.sort();
# file_paths, a list, holds the pathway to the csv's

In [11]:
pd.set_option('max_colwidth', 400)
df_meta = pd.read_excel(data_dict_path)
df_meta

Unnamed: 0,Column,Description,Data Source
0,Incident Number,TFS incident number,TFS RMS System
1,Initial CAD Event Type,"First event type in CAD system of this incident. \nIn situations where the initial CAD event type is medical OR the final incident type is medical, the field is set to medical",TFS RMS System
2,Initial CAD Event Call Type,"First call type in CAD system of this incident. Call type is a group of event types.\nIn situations where the initial CAD event type is medical OR the final incident type is medical, the field is set to medical",TFS RMS System
3,Final Incident Type,"Final incident type.\nIn situations where the initial CAD event type is medical OR the final incident type is medical, the field is set to medical",TFS RMS System
4,Event Alarm Level,Alarm level of the event,TFS RMS System
5,Call Source,"Source of the call to TFS (e.g., 911 call, Alarm, referal agency, etc.)",TFS RMS System
6,Incident Station Area,TFS Station area where the incident occurred,TFS CAD System
7,Incident Ward,"Ward where the incident occurred, when available",TFS CAD System
8,LATITUDE,Latitude of nearest major or minor intersection in the ward of the incident. \nFor medical calls this data is not provided.,"City Of Toronto Open Data, Intersection File"
9,LONGITUDE,"Longitude of nearest major or minor intersection in the ward of the incident. \nIn situations where the initial CAD event type is medical OR the final incident type is medical, the field is set to the Forward Sortation Area (FSA) where the event occured","City Of Toronto Open Data, Intersection File"


In [12]:
# reset pandas default option
pd.reset_option('max_colwidth')

In [13]:
# read all 8 .csv files with data into a dictionary
# 3 columns can be read in as datetime
df_dict = dict()
for x in file_paths:
    df_dict[x.split("\\")[-1]] = pd.read_csv(x, parse_dates=["TFS Alarm Time", "TFS Arrival Time", "Last TFS Unit Clear Time"])

In [14]:
df_dict.keys()

dict_keys(['2011_Basic_Incident_Details.csv', '2012_Basic_Incident_Details.csv', '2013_Basic_Incident_Details.csv', '2014_Basic_Incident_Details.csv', '2015_Basic_Incident_Details.csv', '2016_Basic_Incident_Details.csv', '2017_Basic_Incident_Details.csv', '2018_Basic_Incident_Details.csv'])

In [15]:
# prove that all the columns are equivalent
print("2011 and 2012: ",df_dict[file_paths[0].split("\\")[-1]].columns == df_dict[file_paths[1].split("\\")[-1]].columns)
print("2012 and 2013: ", df_dict[file_paths[1].split("\\")[-1]].columns == df_dict[file_paths[2].split("\\")[-1]].columns)
print("2013 and 2014: ", df_dict[file_paths[2].split("\\")[-1]].columns == df_dict[file_paths[3].split("\\")[-1]].columns)
print("2014 and 2015: ", df_dict[file_paths[3].split("\\")[-1]].columns == df_dict[file_paths[4].split("\\")[-1]].columns)
print("2015 and 2016: ", df_dict[file_paths[4].split("\\")[-1]].columns == df_dict[file_paths[5].split("\\")[-1]].columns)
print("2016 and 2017: ", df_dict[file_paths[5].split("\\")[-1]].columns == df_dict[file_paths[6].split("\\")[-1]].columns)
print("2017 and 2018: ", df_dict[file_paths[6].split("\\")[-1]].columns == df_dict[file_paths[7].split("\\")[-1]].columns)

2011 and 2012:  [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True]
2012 and 2013:  [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True]
2013 and 2014:  [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True]
2014 and 2015:  [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True]
2015 and 2016:  [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True]
2016 and 2017:  [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True]
2017 and 2018:  [ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True]


In [16]:
# merge all dataframes into 1
df = pd.concat([df_dict[x.split("\\")[-1]] for x in file_paths])
df.shape

(975175, 15)

In [17]:
# create Datetime column to match Toronto Weather data
df["Datetime"] = df["TFS Alarm Time"].dt.floor('d')
df["Datetime"]

0        2011-01-01
1        2011-01-01
2        2011-01-01
3        2011-01-01
4        2011-01-01
            ...    
133711   2018-12-31
133712   2018-12-31
133713   2018-12-31
133714   2018-12-31
133715   2018-12-31
Name: Datetime, Length: 975175, dtype: datetime64[ns]

In [18]:
# check to see if folder toronto_weather exists
DIRECTORY2 = 'toronto_weather'
CHECK_FOLDER = os.path.isdir(DIRECTORY2)

# If folder doesn't exist, then create it.
if not CHECK_FOLDER:
    print("Make sure to run toronto_weather.ipynb first")
else:
    print("Folder exists")

Folder exists


In [19]:
# get the toronto_weather directory
path2 = os.path.join(os.getcwd(), DIRECTORY2)

In [20]:
# read in the toronto weather data
df_weather = pd.read_csv(os.path.join(DIRECTORY2, 'average_toronto_weather_stations_2010_2020.csv'), parse_dates=["Datetime"])
df_weather.dtypes

Datetime     datetime64[ns]
MAX_TEMP            float64
MIN_TEMP            float64
MEAN_TEMP           float64
HDD                 float64
CDD                 float64
RAIN_MM             float64
PRECIP_MM           float64
SNOW_CM             float64
dtype: object

In [21]:
# merge fire inxidents and toronto weather data together
df_merge = df.merge(df_weather, on="Datetime")
df_merge.columns

Index(['Incident Number', 'Initial CAD Event Type',
       'Initial CAD Event Call Type', 'Final Incident Type',
       'Event Alarm Level', 'Call Source', 'Incident Station Area',
       'Incident Ward', 'LATITUDE', 'Longitude', 'Intersection',
       'TFS Alarm Time', 'TFS Arrival Time', 'Last TFS Unit Clear Time',
       'Persons Rescued', 'Datetime', 'MAX_TEMP', 'MIN_TEMP', 'MEAN_TEMP',
       'HDD', 'CDD', 'RAIN_MM', 'PRECIP_MM', 'SNOW_CM'],
      dtype='object')

In [22]:
df_merge.head()

Unnamed: 0,Incident Number,Initial CAD Event Type,Initial CAD Event Call Type,Final Incident Type,Event Alarm Level,Call Source,Incident Station Area,Incident Ward,LATITUDE,Longitude,...,Persons Rescued,Datetime,MAX_TEMP,MIN_TEMP,MEAN_TEMP,HDD,CDD,RAIN_MM,PRECIP_MM,SNOW_CM
0,F11000010,Medical,Medical,89 - Other Medical,1,03 - From Ambulance,342.0,9,43.679099,-79.461761,...,0.0,2011-01-01,11.5,0.9,6.4,11.6,0.0,3.7,8.7,0.0
1,F11000011,Medical,Carbon Monoxide,89 - Other Medical,1,01 - 911,131.0,15,43.726342,-79.396401,...,0.0,2011-01-01,11.5,0.9,6.4,11.6,0.0,3.7,8.7,0.0
2,F11000012,Medical,Medical,89 - Other Medical,1,03 - From Ambulance,324.0,14,43.668548,-79.335324,...,0.0,2011-01-01,11.5,0.9,6.4,11.6,0.0,3.7,8.7,0.0
3,F11000013,FIG - Fire - Grass/Rubbish,Emergency Fire,"03 - NO LOSS OUTDOOR fire (exc: Sus.arson,vand...",1,01 - 911,345.0,9,43.657123,-79.434313,...,0.0,2011-01-01,11.5,0.9,6.4,11.6,0.0,3.7,8.7,0.0
4,F11000014,FAHR - Alarm Highrise Residential,Emergency Fire,"33 - Human - Malicious intent, prank",1,05 - Telephone from Monitoring Agency,142.0,7,43.75984,-79.516182,...,0.0,2011-01-01,11.5,0.9,6.4,11.6,0.0,3.7,8.7,0.0


In [23]:
# write it to a folder
df_merge.to_csv(os.path.join(DIRECTORY, "2011-2018-TORONTO_FIRE_INCIDENTS_WEATHER.csv"), index=False)