In [None]:
# lets start by getting the data
import csv
import math
import matplotlib.pyplot as plt
from collections import defaultdict
from collections import Counter
from datetime import datetime
from operator import itemgetter
# Dataset url:
# https://data.sfgov.org/Public-Safety/Fire-Department-Calls-for-Service/nuek-vuh3

data = []

hour_seconds = 3600
stationDispatches = {}
station24HrDispatches = {}
station48to24HrDispatches = {}

def second_difference(end,start):
    #'09/05/2014 03:33:20 AM' ->    %p
    start_sec = datetime.strptime(start, '%m/%d/%Y %I:%M:%S %p')
    if len(end) > 0:
        end_sec = datetime.strptime(end, '%m/%d/%Y %I:%M:%S %p')
        difference =(end_sec-start_sec).total_seconds()
        if start_sec.year != 2016 or start_sec.month != 4 or start_sec.day != 25:
            if end_sec.year != 2016 or end_sec.month != 4 or end_sec.day != 25:
#                 if difference >= 0.0 and difference < 10800:
                if difference >= 0.0:
                    return int(difference)
    return None
    
with open("../../Fire_Department_Calls_for_Service_2012-2016.csv", "rb") as csvfile:
    reader = csv.DictReader(csvfile,delimiter=",")
    for row in sorted(reader, key=lambda x:datetime.strptime(x["Dispatch DtTm"], '%m/%d/%Y %I:%M:%S %p')):
        CallDateSplit = row["Call Date"].split('/')
        CallDate = CallDateSplit[2]+'-'+CallDateSplit[0]+"-"+CallDateSplit[1]
        if CallDateSplit[2] == '2017':
            continue
        initiateToEntry = second_difference(row['Entry DtTm'],row['Received DtTm'])
        entryToDispatch = second_difference(row['Dispatch DtTm'],row['Entry DtTm'])
        dispatchToArrival = second_difference(row['On Scene DtTm'],row['Dispatch DtTm'])
        if initiateToEntry is None or entryToDispatch is None or dispatchToArrival is None:
            continue
        ReceivedDtTm = datetime.strptime(row["Received DtTm"], '%m/%d/%Y %I:%M:%S %p') # Date and time of call is received at the 911 Dispatch Center.
        DispatchDtTm = datetime.strptime(row["Dispatch DtTm"], '%m/%d/%Y %I:%M:%S %p') # Date and time of call is received at the 911 Dispatch Center.
        OnSceneDtTm = datetime.strptime(row["On Scene DtTm"], '%m/%d/%Y %I:%M:%S %p') # Date and time the unit records arriving to the location of the incident
        CallFinalDisposition = row["Call Final Disposition"]
        if row["Station Area"] == '' or row["Station Area"] == 'F3':
            continue
        StationArea = int(row["Station Area"])
        if StationArea not in stationDispatches:
                stationDispatches[StationArea] = []
        if StationArea not in station24HrDispatches:
                station24HrDispatches[StationArea] = []
        if StationArea not in station48to24HrDispatches:
                station48to24HrDispatches[StationArea] = []
        stationDispatches[StationArea].append(DispatchDtTm)
        while (DispatchDtTm - stationDispatches[StationArea][0]).total_seconds() > hour_seconds:
            stationDispatches[StationArea].pop(0)
        station24HrDispatches[StationArea].append(DispatchDtTm)
        while (DispatchDtTm - station24HrDispatches[StationArea][0]).total_seconds() > hour_seconds*24:
            station24HrDispatches[StationArea].pop(0)
        station48to24HrDispatches[StationArea].append(DispatchDtTm)
        while (DispatchDtTm - station48to24HrDispatches[StationArea][0]).total_seconds() > hour_seconds*48:
            station48to24HrDispatches[StationArea].pop(0)
        Box = -1
        if row['Box'] is not None and len(row['Box']) > 0 and 'AI' not in row['Box']:
            Box = int(row['Box'])
        data.append({
            "CallNumber":row["Call Number"],
            "CallType":row["Call Type"],
            "CallDate":CallDate,
            "ReceivedDtTm":ReceivedDtTm,
            "DispatchDtTm":DispatchDtTm,
            "OnSceneDtTm":OnSceneDtTm,
            "InitiateToEntry":initiateToEntry,
            "EntryToDispatch":entryToDispatch,
            "DispatchToArrival":dispatchToArrival,
            "Box":Box,
            "CallFinalDisposition":CallFinalDisposition,
            "StationArea":StationArea,
            "FinalPriority":int(row["Final Priority"]),
            "CallTypeGroup":row["Call Type Group"], #Call types are divided into four main groups: Fire, Alarm, Potential Life Threatening and Non Life Threatening., 
            "UnitType":row["Unit Type"],
            "UnitId":row["Unit ID"],
            "Battalion":int(row['Battalion'][1:]),
            "NeighborhoodDistrict":row["Neighborhood  District"],
            "Location":row["Location"],
            "StationDispatches":len(stationDispatches[StationArea]),
            "Station24HrDispatches":len(station24HrDispatches[StationArea]),
            "Station48to24HrDispatches":len([x for x in station48to24HrDispatches[StationArea] if 
                 (DispatchDtTm - station48to24HrDispatches[StationArea][0]).total_seconds() >= hour_seconds*24])
        })


In [1]:
import json
import cytoolz.curried as tlz
firestations = {}
with open('fire_station_data.json') as f:    
    firestations = tlz.groupby(lambda x:x['station'],json.loads(f.read()))

IOError: [Errno 2] No such file or directory: './data/fire_station_data.json'

In [None]:
# Author: Wayne Dyck
import math
def haversine(origin, destination):
    lat1, lon1 = origin
    lat2, lon2 = destination
    radius = 6371 # km

    dlat = math.radians(lat2-lat1)
    dlon = math.radians(lon2-lon1)
    a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
        * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = radius * c
    return d

def getlatlonpair(location):
    return [float(location.strip("()").split(",")[0]), 
            float(location.strip("()").split(",")[1])]

In [None]:
# Multiple units can be called out for each call therefore multiple lines can be created for each call.
# We decided to group the data by call number and extract the relevant data.
import cytoolz.curried as tlz
import io
from operator import itemgetter
data_grouped = tlz.groupby(lambda x: x['CallNumber'],data)
for d in data_grouped:
    temp = {
        'CallType': "",
        'CallTypeGroup': "",
        'ReceivedHour': -1,
        'ReceivedYear': -1,
        'frTimeToArrival': float("inf"),
        'frUnitId': -1,
        'frUnitType': -1,
        'NeighborhoodDistrict': "",
        'Battalion':-1,
        'Lat': None,
        'Lon': None,
        'Weekend':False,
        'FinalPriority': -1,
        'CallFinalDisposition': "",
        'StationArea': -1,
        'DirectDistance': -1,
        'NbrhoodDispatches': -1,
        'BattalionDispatches': -1,
        'StationDispatches': -1,
        'Station24HrDispatches': -1,
        'Station48to24HrDispatches': -1,
        'Box':-1
    }
    for line in data_grouped[d]:
        timeToArrival = line['InitiateToEntry']+line['EntryToDispatch']+line['DispatchToArrival']
        if temp['frTimeToArrival'] > timeToArrival:
            temp['frTimeToArrival'] = timeToArrival
            temp['frUnitType'] = line['UnitType']
            temp['frUnitId'] = line['UnitId']
            temp['StationDispatches'] = line['StationDispatches']
            temp['Battalion'] = line['Battalion']
            temp['NeighborhoodDistrict'] = line['NeighborhoodDistrict']
            temp['Lat'],temp['Lon'] = getlatlonpair(line['Location']) 
            temp['FinalPriority'] = line['FinalPriority']
            temp['CallFinalDisposition'] = line['CallFinalDisposition']
            temp['CallDate'] = line['CallDate']
            temp['StationArea'] = line['StationArea']
            if line['StationArea'] in [47,94]:
                continue
            temp['DirectDistance'] = haversine((temp['Lat'],temp['Lon']),
                                               (firestations[str(temp['StationArea'])][0]['latitude'],
                                               firestations[str(temp['StationArea'])][0]['longitude']))
            temp['CallType'] = line['CallType']
            temp['CallTypeGroup'] = line['CallTypeGroup']
            temp['ReceivedHour'] = line['ReceivedDtTm'].hour
            temp['ReceivedYear'] = line['ReceivedDtTm'].year
            temp['Box'] = line['Box']
            temp['Station24HrDispatches'] = line['Station24HrDispatches']
            temp['Station48to24HrDispatches'] = line['Station48to24HrDispatches']
    data_grouped[d] = temp
data_grouped = sorted(data_grouped.values(), key=itemgetter('CallDate','NeighborhoodDistrict'))

In [None]:
## Exporting grouped data
with open('general_data.json','w+') as f:
    f.write(json.dumps(data_grouped))

In [None]:
import cytoolz.curried as tlz
count = 0
for stuff in tlz.filter(lambda x:x['StationArea'] == 1,data_grouped):
    print stuff['StationArea'], stuff['CallDate'],stuff['StationDispatches'],stuff['NbrhoodDispatches'],stuff['BattalionDispatches']
    count = count+ 1
    if count == 10000:
        break

In [None]:
## Exporting late first response
import numpy as np
filter_final = tlz.filter(lambda x:x['FinalPriority'] != 4)
filter_nonlife = tlz.filter(lambda x:x['CallTypeGroup'] != 'Non Life-threatening')
stuff = filter_final(filter_nonlife(data_grouped))
nbrhood_group = tlz.groupby(lambda x:x['NeighborhoodDistrict'])
filter_dec2016 = tlz.filter(lambda x:x['CallDate'].split('-')[0]+x['CallDate'].split('-')[1] == '201612')
filter_late = tlz.filter(lambda x:x['frTimeToArrival'] > 600)
hood_map_data = []
for hood,vals in nbrhood_group(stuff).iteritems():
    if hood == 'None':
        continue
    hood_map_data.append({
        'hood':hood,
        'avg-response':np.mean([x['frTimeToArrival'] for x in vals]),
        'avg-distance':np.mean([x['DirectDistance'] for x in vals]),
        'late-ratio':np.sum([x['frTimeToArrival'] for x in vals if x['frTimeToArrival']>600])/
                np.sum([x['frTimeToArrival'] for x in vals]),
        'verylate-ratio':np.sum([x['frTimeToArrival'] for x in vals if x['frTimeToArrival']>1200])/
                np.sum([x['frTimeToArrival'] for x in vals])
        })

with io.open("hood_map_data.json","w+",encoding='utf-8') as f:
    f.write(unicode(json.dumps(hood_map_data)))

latenessess = []
# for row in tlz.filter(lambda x:x['ReceivedYear'] == 2016,data_grouped):
for row in filter_dec2016(filter_late(data_grouped)):
    response = row['frTimeToArrival']
    lateness = 0
    if response < 600:
        continue
    elif response > 1200:
        lateness = 1
    latenessess.append({
        'lat':round(row['Lat'],5),
        'lon':round(row['Lon'],5),
        'howLate':lateness,
        'distance':row['DirectDistance'],
        'station':row['StationArea']
        })
with io.open("late_arrivals_dec2016.json","w+",encoding='utf-8') as f:
    f.write(unicode(json.dumps(latenessess)))
latenessess = []
# for row in tlz.filter(lambda x:x['ReceivedYear'] == 2016,data_grouped):
for row in sorted(filter_late(data_grouped),key=itemgetter('frTimeToArrival'),reverse=True)[:200]:
    response = row['frTimeToArrival']
    lateness = 0
    if response < 600:
        continue
    elif response > 1200:
        lateness = 1
    latenessess.append({
        'lat':round(row['Lat'],5),
        'lon':round(row['Lon'],5),
        'howLate':lateness,
        'distance':row['DirectDistance'],
        'station':row['StationArea']
        })
with io.open("late_arrivals_200worst.json","w+",encoding='utf-8') as f:
    f.write(unicode(json.dumps(latenessess))) 
#Performance rating suggestions
maxavg = max([x['avg-response'] for x in hood_map_data])
minavg = min([x['avg-response'] for x in hood_map_data])
median = np.median([x['avg-response'] for x in hood_map_data])
print minavg, minavg+(median-minavg)/2,median, median+(maxavg-median)/2,maxavg
print int((minavg+(median-minavg)/2)/60)*60+60,int((median)/60)*60+60,int((median+(maxavg-median)/2)/60)*60+60
maxavg = max([x['avg-distance'] for x in hood_map_data])
minavg = min([x['avg-distance'] for x in hood_map_data])
median = np.median([x['avg-distance'] for x in hood_map_data])
print minavg, minavg+(median-minavg)/2,median, median+(maxavg-median)/2,maxavg
maxavg = max([x['late-ratio'] for x in hood_map_data])
minavg = min([x['late-ratio'] for x in hood_map_data])
median = np.median([x['late-ratio'] for x in hood_map_data])
print minavg, minavg+(median-minavg)/2,median, median+(maxavg-median)/2,maxavg
maxavg = max([x['verylate-ratio'] for x in hood_map_data])
minavg = min([x['verylate-ratio'] for x in hood_map_data])
median = np.median([x['verylate-ratio'] for x in hood_map_data])
print minavg, minavg+(median-minavg)/2,median, median+(maxavg-median)/2,maxavg

In [None]:
# Export late Gone on Arrivals
late_goa = []
filter_gone = tlz.filter(lambda x:x['CallFinalDisposition'] == 'Gone on Arrival')
for row in filter_late(filter_gone(data_grouped)):
    late_goa.append({
        'lat':round(row['Lat'],5),
        'lon':round(row['Lon'],5),
        'response':row['frTimeToArrival'],
        'distance':row['DirectDistance'],
        'station':row['StationArea']
        })
with io.open("late_arrivals_GOA.json","w+",encoding='utf-8') as f:
    f.write(unicode(json.dumps(late_goa))) 

In [None]:
# Export response times
from __future__ import division
ctg_grouping = tlz.groupby(lambda x:x['CallTypeGroup'])
year_grouping = tlz.groupby(lambda x:x['ReceivedYear'])
nbrhood_group = tlz.groupby(lambda x:x['NeighborhoodDistrict'])
hour_group = tlz.groupby(lambda x:x['ReceivedHour'])

def hoodgregations(data):
    ret = []
    for hood,vals in sorted(nbrhood_group(data).iteritems(),key=lambda x:x[0]):
        if hood == 'None':
            continue
        ret.append({
            'hood':hood,
            'response':{'avg':np.mean([x['frTimeToArrival'] for x in vals]),
                        'hourly':[np.mean([x['frTimeToArrival'] for x in v]) for k,v in sorted(hour_group(vals).iteritems(),key=lambda x:x[0])]},
            'distance':{'avg':np.mean([x['DirectDistance'] for x in vals])},
            'late-ratio':np.sum([x['frTimeToArrival'] for x in vals if x['frTimeToArrival']>600])/
                    np.sum([x['frTimeToArrival'] for x in vals]),
            'verylate-ratio':np.sum([x['frTimeToArrival'] for x in vals if x['frTimeToArrival']>1200])/
                    np.sum([x['frTimeToArrival'] for x in vals])})
    return ret
response_time_groups = {}
response_time_groups['combinedoverall'] = hoodgregations(data_grouped)
for year, valsj in sorted(year_grouping(data_grouped).iteritems(),key=lambda x:x[0]):
        response_time_groups['combined'+str(year)] = hoodgregations(valsj)
for ctg, vals in sorted(ctg_grouping(data_grouped).iteritems(),key=lambda x:x[0]):
    if ctg == '':
        continue
    response_time_groups[ctg.replace(' ','')+'overall'] = hoodgregations(vals)
    for year, valsj in sorted(year_grouping(vals).iteritems(),key=lambda x:x[0]):
        response_time_groups[ctg.replace(' ','')+str(year)] = hoodgregations(valsj)
with io.open("response_time_groups.json","w+",encoding='utf-8') as f:
    f.write(unicode(json.dumps(response_time_groups))) 

In [None]:
response_time_groups.keys()# Export response times
from __future__ import division
nlt_filter = tlz.filter(lambda x:x['CallTypeGroup'] in ['Alarm','Fire','Potentially Life-Threatening'])
ctg_grouping = tlz.groupby(lambda x:x['CallTypeGroup'])
year_grouping = tlz.groupby(lambda x:x['ReceivedYear'])
nbrhood_group = tlz.groupby(lambda x:x['NeighborhoodDistrict'])
hour_group = tlz.groupby(lambda x:x['ReceivedHour'])

def getTheGoods(hood,vals):
    return {'hood':hood,
    'response':{'avg':np.mean([x['frTimeToArrival'] for x in vals]),
                'hourly':[np.mean([x['frTimeToArrival'] for x in v]) for k,v in sorted(hour_group(vals).iteritems(),key=lambda x:x[0])]},
    'distance':{'avg':np.mean([x['DirectDistance'] for x in vals])},
    'late-ratio':np.sum([x['frTimeToArrival'] for x in vals if x['frTimeToArrival']>600])/
            np.sum([x['frTimeToArrival'] for x in vals]),
    'verylate-ratio':np.sum([x['frTimeToArrival'] for x in vals if x['frTimeToArrival']>1200])/
            np.sum([x['frTimeToArrival'] for x in vals])}
    ''

def hoodgregations(data):
    ret = []
    for hood,vals in sorted(nbrhood_group(data).iteritems(),key=lambda x:x[0]):
        if hood == 'None':
            continue
        ret.append(getTheGoods(hood,vals))
    return ret

response_time_groups = {}
response_time_groups['combinedoverall'] = hoodgregations(nlt_filter(data_grouped))
response_time_groups['sanfranoverall'] = getTheGoods('San Fransisco',list(nlt_filter(data_grouped)))
for year, valsj in sorted(year_grouping(nlt_filter(data_grouped)).iteritems(),key=lambda x:x[0]):
        response_time_groups['combined'+str(year)] = hoodgregations(valsj)
        response_time_groups['sanfran'+str(year)] =getTheGoods('San Fransisco',valsj)
for ctg, vals in sorted(ctg_grouping(nlt_filter(data_grouped)).iteritems(),key=lambda x:x[0]):
    if ctg == '':
        continue
    response_time_groups[ctg.replace(' ','')+'overall'] = hoodgregations(vals)
    for year, valsj in sorted(year_grouping(vals).iteritems(),key=lambda x:x[0]):
        response_time_groups[ctg.replace(' ','')+str(year)] = hoodgregations(valsj)
with io.open("response_time_groups_nltfree.json","w+",encoding='utf-8') as f:
    f.write(unicode(json.dumps(response_time_groups))) 