Read the data in from API and local file. Assumes notebook is running in the same directory where the 'Fire Data' directory is located. 

In [1]:
#!/usr/bin/env python

# make sure to install these packages before running:
# pip install pandas
# pip install sodapy
# pip install geopy
# pip install numpy
# pip install seaborn
# pip install matplotlib

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
#You might need to 'pip install geopy' and it doesn't seem to work on Python 3.7
import geopy.distance
from sodapy import Socrata


# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofnewyork.us", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofnewyork.us,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("tm6d-hbzd", limit = 250000)#limit=300000000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

#Get the firehouse location information
firehouse_info = client.get("hc8x-tcnd", limit=10000)

firehouse_df = pd.DataFrame.from_records(firehouse_info)
firehouse_df = firehouse_df.dropna()

#Get the firebox location information
firebox_locs = pd.read_csv("Fire Data\\Fire Boxes.csv", names=['LONG','LAT','fire_box','nearest_intersection'])



In [2]:
results_df.head()

Unnamed: 0,action_taken1_desc,action_taken2_desc,action_taken3_desc,aes_presence_desc,arrival_date_time,borough_desc,co_detector_present_desc,detector_presence_desc,fire_box,fire_origin_below_grade_flag,...,incident_date_time,incident_type_desc,last_unit_cleared_date_time,property_use_desc,standpipe_sys_present_flag,story_fire_origin_count,street_highway,total_incident_duration,units_onscene,zip_code
0,"42 - HazMat detection, monitoring, sampling, &...",51 - Ventilate,86 - Investigate,,2018-07-01T00:00:32.000,2 - Bronx,Yes,,2928,,...,2018-06-30T23:55:29.000,"746 - Carbon monoxide detector activation, no CO",2018-07-01T00:22:57.000,429 - Multifamily dwelling,,,UNDERCLIFF AVE,1648,2,10453
1,"00 - Action taken, other",,,,2018-06-30T23:58:59.000,4 - Brooklyn,,,1021,,...,2018-06-30T23:54:44.000,"300 - Rescue, EMS incident, other",2018-07-01T00:09:29.000,UUU - Undetermined,,,RUTLAND RD,885,1,11203
2,"00 - Action taken, other",,,,2018-06-30T23:58:29.000,2 - Bronx,,,2745,,...,2018-06-30T23:54:13.000,"651 - Smoke scare, odor of smoke",2018-07-01T00:07:50.000,UUU - Undetermined,,,PROSPECT AVE,817,3,10456
3,11 - Extinguishment by fire service personnel,51 - Ventilate,,,2018-06-30T23:58:53.000,2 - Bronx,,,2135,,...,2018-06-30T23:54:08.000,"117 - Commercial Compactor fire, confined to r...",2018-07-01T00:19:16.000,429 - Multifamily dwelling,,,BROWN PL,1508,5,10454
4,"00 - Action taken, other",,,,2018-06-30T23:56:50.000,4 - Brooklyn,,,1877,,...,2018-06-30T23:53:33.000,"710 - Malicious, mischievous false call, other",2018-06-30T23:57:11.000,UUU - Undetermined,,,DEFAULT RECORD FOR SF,218,1,11208


In [3]:
#convert dates to date time objects
results_df['arrival_date_time']=pd.to_datetime(results_df['arrival_date_time'])
results_df['incident_date_time']=pd.to_datetime(results_df['incident_date_time'])
results_df['last_unit_cleared_date_time']=pd.to_datetime(results_df['last_unit_cleared_date_time'])

In [4]:
#Limit results to 2018
results_df = results_df[results_df['incident_date_time'] > pd.to_datetime('2018-01-01 00:00:00')]

In [5]:
#Convert the columns to appropriate types. Probably more to do here. 
results_df['response_time'] = (results_df['arrival_date_time'] - results_df['incident_date_time'])
results_df['response_time'] = pd.to_numeric(results_df['response_time'])
results_df['units_onscene'] = pd.to_numeric(results_df['units_onscene'])
results_df['total_incident_duration'] = pd.to_numeric(results_df['total_incident_duration'])
results_df['story_fire_origin_count'] = pd.to_numeric(results_df['story_fire_origin_count'])
results_df['response_time'] = results_df['response_time']/ 60000000000

results_df['fire_box'] = results_df['fire_box'].astype('str')

#Convert firehouse values
firehouse_df['latitude'] = pd.to_numeric(firehouse_df['latitude'])
firehouse_df['longitude'] = pd.to_numeric(firehouse_df['longitude'])

In [6]:
#Just to show you how we are converting the firebox to match the fire_box dataframe for merging
results_df.borough_desc.unique()

array(['2 - Bronx', '4 - Brooklyn', '1 - Manhattan', '5 - Queens',
       '3 - Staten Island'], dtype=object)

In [7]:
#Add the borough code to the firebox column
results_df.loc[results_df['borough_desc'].str.match('1 - Manhattan'), 'fire_box'] = 'M' + results_df.fire_box
results_df.loc[results_df['borough_desc'].str.match('2 - Bronx'), 'fire_box'] = 'X' + results_df.fire_box
results_df.loc[results_df['borough_desc'].str.match('3 - Staten Island'), 'fire_box'] = 'R' + results_df.fire_box
results_df.loc[results_df['borough_desc'].str.match('4 - Brooklyn'), 'fire_box'] = 'B' + results_df.fire_box
results_df.loc[results_df['borough_desc'].str.match('5 - Queens'), 'fire_box'] = 'Q' + results_df.fire_box

In [8]:
from geopy import distance
def findNearestFirestation (Lat, Long):
    minDistance = 1000
    for fireLat, fireLong in zip(firehouse_df.latitude, firehouse_df.longitude):
        distanceFire = distance.distance((Lat, Long), (fireLat,fireLong)).miles
        if distanceFire < minDistance: 
            minDistance = distanceFire
    return minDistance

In [9]:
#Merge the fire_box dataframe with the overall dataframe
final_df = pd.merge(results_df, firebox_locs, on = 'fire_box')
final_df['LAT'] = pd.to_numeric(final_df['LAT'])
final_df['LONG'] = pd.to_numeric(final_df['LONG'])

In [10]:
#Get the nearest fire station
#This code takes a long time to run because it needs to calculate the distance for each firebox to all fire stations and then select the minimum.
#final_df['Distance_To_Nearest_Station'] = 0
#i = 0
#for lat, long in zip (final_df.LAT, final_df.LONG): 
#     dist = findNearestFirestation(lat,long)
#     final_df['Distance_To_Nearest_Station'].iloc[i] = dist
#     i = i + 1


Display the final dataframe. 

In [11]:
final_df.tail(100)

Unnamed: 0,action_taken1_desc,action_taken2_desc,action_taken3_desc,aes_presence_desc,arrival_date_time,borough_desc,co_detector_present_desc,detector_presence_desc,fire_box,fire_origin_below_grade_flag,...,standpipe_sys_present_flag,story_fire_origin_count,street_highway,total_incident_duration,units_onscene,zip_code,response_time,LONG,LAT,nearest_intersection
169364,64 - Shut down system,,,,2018-01-08 23:09:21,4 - Brooklyn,,,B2389,,...,,,PAERDEGAT 6 ST,905.0,6.0,11236,2.466667,-73.908910,40.630020,Paerdegat Avenue N & Paerdegat 6th St
169365,"00 - Action taken, other",,,,2018-01-08 18:32:38,5 - Queens,,,Q4639,,...,,,158 ST,1013.0,1.0,11357,15.450000,-73.803330,40.794350,Powells Cove Blvd && 158th St
169366,64 - Shut down system,,,,2018-01-08 17:45:32,3 - Staten Island,,,R1274,,...,,,STEUBEN ST,541.0,1.0,10305,8.350000,-74.077100,40.599800,Steuben St & Radcliff Rd
169367,64 - Shut down system,,,,2018-01-08 16:52:03,5 - Queens,,,Q9307,,...,,,NORTHERN BLVD,2556.0,1.0,11368,21.733333,-73.845780,40.760530,Northern Blvd && 126th St
169368,64 - Shut down system,,,,2018-01-08 13:28:28,5 - Queens,,,Q9307,,...,,,NORTHERN BLVD,3028.0,1.0,11368,8.616667,-73.845780,40.760530,Northern Blvd && 126th St
169369,45 - Remove hazard,,,,2018-01-02 18:04:26,5 - Queens,,,Q9307,,...,,,NORTHERN BLVD,904.0,2.0,11368,7.666667,-73.845780,40.760530,Northern Blvd && 126th St
169370,64 - Shut down system,,,,2018-01-08 15:01:48,5 - Queens,,,Q3803,,...,,,213 ST,2742.0,1.0,11364,15.383333,-73.766580,40.752030,53rd Ave && 213th St
169371,64 - Shut down system,,,,2018-01-08 14:23:30,5 - Queens,,,Q3803,,...,,,213 ST,258.0,1.0,11364,3.750000,-73.766580,40.752030,53rd Ave && 213th St
169372,64 - Shut down system,,,,2018-01-08 12:25:38,5 - Queens,,,Q3803,,...,,,213 ST,2394.0,1.0,11364,6.200000,-73.766580,40.752030,53rd Ave && 213th St
169373,64 - Shut down system,,,,2018-01-08 13:22:01,5 - Queens,,,Q6385,,...,,,248 ST,1075.0,1.0,11363,5.550000,-73.740140,40.768940,43rd Ave && 248th St


AS we can see below , StatenIsland  has  very  less incidents reported which is intersting compared to other buroughs

In [32]:
grouped = final_df.groupby('borough_desc')

In [33]:
for name,group in final_df.groupby('borough_desc'):
    print (name , group.shape[0])

1 - Manhattan 26836
2 - Bronx 45462
3 - Staten Island 5269
4 - Brooklyn 45959
5 - Queens 45938
