# Distance From Firestation
This notebook computes the closest firestation for each EAS and adds the distance to the spreadsheet.

In [1]:
import pandas as pd
import os
pd.set_option("display.max_columns", 100)

## Loading Master Dataframe

In [2]:
# I renamed the Google Drive Folder to /data/ in my repo - access will change once moving to database
DATA_URL = '../data/' 
FILE_NAME = 'masterdf_20170920.csv'
NEW_FILE_NAME = 'masterdf_20171026_andirs.csv'

In [3]:
df = pd.read_csv(os.path.join(DATA_URL, FILE_NAME), index_col=0, low_memory=False)

In [4]:
df.head()

Unnamed: 0,Incident Date,EAS,Incident_Year,Incident_Cat,Incident_Dummy,Neighborhood,Location_y,Address,Building_Cat,Yr_Property_Built,Num_Bathrooms,Num_Bedrooms,Num_Rooms,Num_Stories,Num_Units,Perc_Ownership,Land_Value,Property_Area,Assessed_Improvement_Val,Tot_Rooms,landval_psqft,count potential fire control,count all complaints,count all complaints not corrected,count potential fire control not corrected,count fire emergency safety,count potential fire cause,count fire emergency safety not corrected,count potential fire cause not corrected
0,2015-06-20,451005.0,2015.0,COOKING FIRE,1.0,SUNSET/PARKSIDE,"(37.7543289339354, -122.480327187833)",1532 NORIEGA ST,COMMERCIAL USE,1989.0,0.0,0.0,11.0,3.0,2.777778,1.0,438434.3,4135.0,262181.666667,11.0,106.030069,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2010-11-28,360149.0,2010.0,COOKING FIRE,1.0,MISSION,"(37.7645472195468, -122.418358468789)",135 CAPP ST,APARTMENT,1908.0,0.0,0.0,36.0,3.0,12.0,1.0,1365665.0,9318.0,566375.428571,36.0,146.56203,3.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0
2,2011-04-26,360149.0,2011.0,COOKING FIRE,1.0,MISSION,"(37.7645472195468, -122.418358468789)",135 CAPP ST,APARTMENT,1908.0,0.0,0.0,36.0,3.0,12.0,1.0,1365665.0,9318.0,566375.428571,36.0,146.56203,3.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0
3,2006-03-09,360149.0,2006.0,BUILDING FIRE,1.0,MISSION,"(37.7645472195468, -122.418358468789)",135 CAPP ST,APARTMENT,1908.0,0.0,0.0,36.0,3.0,12.0,1.0,1365665.0,9318.0,566375.428571,36.0,146.56203,3.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0
4,2004-05-28,360149.0,2004.0,OUTDOOR FIRE,1.0,MISSION,"(37.7645472195468, -122.418358468789)",135 CAPP ST,APARTMENT,1908.0,0.0,0.0,36.0,3.0,12.0,1.0,1365665.0,9318.0,566375.428571,36.0,146.56203,3.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0


In [5]:
len(df)

195308

## Fire Stations
* Distance of closest fire station to instance

### Getting address of all fire stations in SF
[sf-fire.org](http://sf-fire.org/fire-station-locations) lists all fire stations on their website. A simple webscraper should do the trick to get the addresses. 

In [6]:
import urllib2
from bs4 import BeautifulSoup
URL = "http://sf-fire.org/fire-station-locations"

In [7]:
def get_fire_station_addresses(URL):
    page = urllib2.urlopen(URL)
    soup = BeautifulSoup(page, "lxml")
    result_set = soup.find_all('div', attrs={"class" : "view-opensf-layout"})
    
    # list to store addresses in
    fire_station_adds = []
    for links in soup.find_all('a'):
        try:
            if 'propertymap' in links.get('href'):
                fire_station_adds.append(links.get('href').split("=")[2] + ", San Francisco")
        except TypeError: #omit empty results
            continue
    return fire_station_adds

In [8]:
fire_station_adds = get_fire_station_addresses(URL)

In [9]:
fire_station_adds[:5] #show first 5 addresses

['935 FOLSOM, San Francisco',
 '1340 POWELL, San Francisco',
 '1067 POST, San Francisco',
 '449 MISSION ROCK, San Francisco',
 '1301 TURK, San Francisco']

The site states 45 fire stations (Ranging from 1 to 51 with some numbers left out). Let's double check we got all 45 addresses.

In [10]:
print "There are {} addresses in the list".format(len(fire_station_adds))

There are 45 addresses in the list


### Getting location information for fire stations
For all 45 fire stations in SF the geolocations will be fetched from two services geopy and google maps. Since both services are 'hit-or-miss' the method that's being used recursively works on all addresses it hasn't gotten a response yet. Once an address was returned, the address is being deleted from the list. If the list doesn't return any new values after 5 iterations, the process is being stopped.

In [11]:
import time
import random
import requests

def fetch_address_info(address_list, service='google', verbose=True, max_iter=5, timer=False):
    """
    Uses geopy iteratively until all addresses are stored.
    """
    def _fetch_geopy(address):
        try:
            tmp_result = geolocator.geocode(address)
        except Exception:
            tmp_result = []
        return tmp_result
    
    def _fetch_google(address):
        URL = "https://maps.googleapis.com/maps/api/geocode/json?address=" + address
        response = requests.get(URL)
        resp_json_payload = response.json()
        return resp_json_payload['results']
    
    address_dict = {}
    non_succ_set = list(address_list)
    iterations = 1
    len_counter = 1
    len_val = -1
    
    from geopy.geocoders import Nominatim
    geolocator = Nominatim()

    while non_succ_set:
        if len_val == len(non_succ_set):
            len_counter += 1
        len_val = len(non_succ_set)
        print "{} addresses in the queue (Iteration {})".format(len_val, iterations)
        for address in non_succ_set:
            fetch_verbose_string = "Fetching data for: " + address
            if service == 'google':
                address_dict[address] = _fetch_google(address)
            elif service == 'geopy':
                address_dict[address] = _fetch_geopy(address)
            else:
                raise AttributeError("You need to specify either 'google' or 'geopy' as service attribute.")
            if address_dict[address]:
                fetch_verbose_string += "\t\t\t ... successful"
                non_succ_set.remove(address)
            else:
                fetch_verbose_string += "\t\t\t ... not successful, queueing up again"
            if verbose:
                print fetch_verbose_string
            if timer:
                sleep_time = random.randint(2, 4) 
                time.sleep(sleep_time)
        iterations += 1
        if len_counter > max_iter-1:
            print "Termination: {} addresses couldn't be found".format(len_val)
            return address_dict
    return address_dict

In [12]:
refetch = False # indicates whether location data should be fetched or stored should be used

In [13]:
if refetch:
    geopy_address_dict = fetch_address_info(fire_station_adds, service='geopy', verbose=False)

The google api seems to have better result with a timing offset for each call.

In [14]:
if refetch:
    google_address_dict = fetch_address_info(fire_station_adds, service='google', verbose=False, timer=True)

In [15]:
import pickle

if refetch:
    with open('google_address_dict.pickle', 'wb') as handle:
        pickle.dump(google_address_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open('geopy_address_dict.pickle', 'wb') as handle:
        pickle.dump(geopy_address_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

if not refetch:
    with open('google_address_dict.pickle', 'rb') as handle:
        google_address_dict = pickle.load(handle)
    with open('geopy_address_dict.pickle', 'rb') as handle:
        geopy_address_dict = pickle.load(handle)

Geopy has some issues with two of the addresses we ingested. Google does a better job at matching address and geolocation.

In [16]:
def get_lat_long(address_dict):
    lat_lng_dict = {}
    for address in address_dict:
        lat_lng_dict[address] = (address_dict[address][0]['geometry']['location']['lat'], 
                                 address_dict[address][0]['geometry']['location']['lng'])
    return lat_lng_dict

In [17]:
get_lat_long(google_address_dict) # additional step for google results to get latitude and longitude values

{'100 Hoffman Avenue, San Francisco': (37.7531106, -122.4410957),
 '1000 Ocean Avenue, San Francisco': (37.7232908, -122.4529247),
 '1067 POST, San Francisco': (37.7866445, -122.4193329),
 '109 Oak Street, San Francisco': (37.7749469, -122.4212156),
 '1091 Portola Drive, San Francisco': (37.7400312, -122.4586575),
 '1145 Stanyan Street, San Francisco': (37.7634628, -122.4526316),
 '1290 16th Avenue , San Francisco': (37.7639903, -122.4736446),
 '1295 Shafter Avenue, San Francisco': (37.7275225, -122.385028),
 '1298 Girard Street, San Francisco': (37.7166417, -122.4004505),
 '1301 TURK, San Francisco': (37.7804435, -122.430725),
 '1325 Leavenworth Street , San Francisco': (37.7933971, -122.4165178),
 '1340 POWELL, San Francisco': (37.7970513, -122.4099507),
 '1348 45th Avenue, San Francisco': (37.7614112, -122.5046487),
 '135 SANCHEZ, San Francisco': (37.767088, -122.4307689),
 '1415 Evans Avenue, San Francisco': (37.740742, -122.3853284),
 '1443 GROVE, San Francisco': (37.7754351, -122

In [18]:
# additional step for google results to get latitude and longitude values
google_address_dict = get_lat_long(google_address_dict)

### Calculate Haversine Distance
In the next step we'll use a simple distance formula to calculate distance between the retrieved points. Since we need to compute the distance for all points to all data bases this needs a few moments to complete.

> "The haversine [distance] determines the great-circle distance between two points on a sphere given their longitudes and latitudes." 

This is not the driving distance, which will be computed using one of the api offerings from either one of the big navigation players.

In [19]:
from math import pi,sqrt,sin,cos,atan2

def haversine(pos1, pos2):
    lat1 = float(pos1[0])
    long1 = float(pos1[1])
    lat2 = float(pos2[0])
    long2 = float(pos2[1])

    degree_to_rad = float(pi / 180.0)

    d_lat = (lat2 - lat1) * degree_to_rad
    d_long = (long2 - long1) * degree_to_rad

    a = pow(sin(d_lat / 2), 2) + cos(lat1 * degree_to_rad) * cos(lat2 * degree_to_rad) * pow(sin(d_long / 2), 2)
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    km = 6367 * c

    return km

In [239]:
from ast import literal_eval
def hav_all(row):
    pos1 = literal_eval(row) # make sure the entry is a well formed tuple
    min_distance = 12742.0 # diameter of earth in km as maximum distance
    min_address = ""
    min_latlong = ""
    for fire_station in google_address_dict:
        distance = haversine(pos1, google_address_dict[fire_station])
        if distance < min_distance:
            min_address = fire_station
            min_distance = distance
            min_latlong = google_address_dict[fire_station]
    return (min_distance, min_address, min_latlong)

In [250]:
def get_nearest_fire_station(data):
    def _select_distance(row):
        return row[0]
    def _select_address(row):
        return row[1]
    def _select_latlong(row):
        return row[2]
    
    data["next_fire_dpt_address"] = data["Location_y"].apply(hav_all)
    data["next_fire_dpt_distance"] = data["next_fire_dpt_address"].apply(_select_distance)
    data["next_fire_dpt_latlong"] = data["next_fire_dpt_address"].apply(_select_latlong)
    data["next_fire_dpt_address"] = data["next_fire_dpt_address"].apply(_select_address)
    
    return data

In [240]:
get_nearest_fire

0    (0.859293772626, 2155 18th Avenue, San Francis...
1    (0.552280565555, 2300 FOLSOM, San Francisco, (...
2    (0.552280565555, 2300 FOLSOM, San Francisco, (...
Name: Location_y, dtype: object

In [244]:
df2 = df[:100].copy()

In [245]:
df2["next_fire_dpt_address"] = df2["Location_y"].apply(hav_all)

In [247]:
df2["next_fire_dpt_distance"] = df2["next_fire_dpt_address"].apply(select_distance)

In [248]:
df2["next_fire_dpt_latlong"] = df2["next_fire_dpt_address"].apply(select_latlong)
df2["next_fire_dpt_address"] = df2["next_fire_dpt_address"].apply(select_address)

In [249]:
df2

Unnamed: 0,Incident Date,EAS,Incident_Year,Incident_Cat,Incident_Dummy,Neighborhood,Location_y,Address,Building_Cat,Yr_Property_Built,Num_Bathrooms,Num_Bedrooms,Num_Rooms,Num_Stories,Num_Units,Perc_Ownership,Land_Value,Property_Area,Assessed_Improvement_Val,Tot_Rooms,landval_psqft,count potential fire control,count all complaints,count all complaints not corrected,count potential fire control not corrected,count fire emergency safety,count potential fire cause,count fire emergency safety not corrected,count potential fire cause not corrected,distance_next_fire_dpt_hav,next_fire_dpt_address,next_fire_dpt_distance,next_fire_dpt_latlong
0,2015-06-20,451005.0,2015.0,COOKING FIRE,1.0,SUNSET/PARKSIDE,"(37.7543289339354, -122.480327187833)",1532 NORIEGA ST,COMMERCIAL USE,1989.0,0.000000,0.000000,11.000000,3.000000,2.777778,1.000000,4.384343e+05,4135.000000,2.621817e+05,11.000000,106.030069,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.859294,"2155 18th Avenue, San Francisco",0.859294,"(37.7476471, -122.4754049)"
1,2010-11-28,360149.0,2010.0,COOKING FIRE,1.0,MISSION,"(37.7645472195468, -122.418358468789)",135 CAPP ST,APARTMENT,1908.0,0.000000,0.000000,36.000000,3.000000,12.000000,1.000000,1.365665e+06,9318.000000,5.663754e+05,36.000000,146.562030,3.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.552281,"2300 FOLSOM, San Francisco",0.552281,"(37.7602611, -122.4151762)"
2,2011-04-26,360149.0,2011.0,COOKING FIRE,1.0,MISSION,"(37.7645472195468, -122.418358468789)",135 CAPP ST,APARTMENT,1908.0,0.000000,0.000000,36.000000,3.000000,12.000000,1.000000,1.365665e+06,9318.000000,5.663754e+05,36.000000,146.562030,3.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.552281,"2300 FOLSOM, San Francisco",0.552281,"(37.7602611, -122.4151762)"
3,2006-03-09,360149.0,2006.0,BUILDING FIRE,1.0,MISSION,"(37.7645472195468, -122.418358468789)",135 CAPP ST,APARTMENT,1908.0,0.000000,0.000000,36.000000,3.000000,12.000000,1.000000,1.365665e+06,9318.000000,5.663754e+05,36.000000,146.562030,3.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.552281,"2300 FOLSOM, San Francisco",0.552281,"(37.7602611, -122.4151762)"
4,2004-05-28,360149.0,2004.0,OUTDOOR FIRE,1.0,MISSION,"(37.7645472195468, -122.418358468789)",135 CAPP ST,APARTMENT,1908.0,0.000000,0.000000,36.000000,3.000000,12.000000,1.000000,1.365665e+06,9318.000000,5.663754e+05,36.000000,146.562030,3.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.552281,"2300 FOLSOM, San Francisco",0.552281,"(37.7602611, -122.4151762)"
5,2003-10-05,279186.0,2003.0,OUTDOOR FIRE,1.0,FINANCIAL DISTRICT/SOUTH BEACH,"(37.7973751986894, -122.399959825377)",639 FRONT ST,COMMERCIAL USE,1906.0,0.000000,0.000000,4.000000,3.000000,1.888889,0.750000,2.225888e+05,10540.000000,1.548943e+05,4.000000,21.118480,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.240488,"530 Sansome Street, San Francisco",0.240488,"(37.7955293, -122.4013894)"
6,2007-01-17,279186.0,2007.0,VEHICLE FIRE,1.0,FINANCIAL DISTRICT/SOUTH BEACH,"(37.7973751986894, -122.399959825377)",639 FRONT ST,COMMERCIAL USE,1906.0,0.000000,0.000000,4.000000,3.000000,1.888889,0.750000,2.225888e+05,10540.000000,1.548943e+05,4.000000,21.118480,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.240488,"530 Sansome Street, San Francisco",0.240488,"(37.7955293, -122.4013894)"
7,2004-04-23,279186.0,2004.0,OUTDOOR FIRE,1.0,FINANCIAL DISTRICT/SOUTH BEACH,"(37.7973751986894, -122.399959825377)",639 FRONT ST,COMMERCIAL USE,1906.0,0.000000,0.000000,4.000000,3.000000,1.888889,0.750000,2.225888e+05,10540.000000,1.548943e+05,4.000000,21.118480,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.240488,"530 Sansome Street, San Francisco",0.240488,"(37.7955293, -122.4013894)"
8,2008-03-18,279186.0,2008.0,TRASH FIRE (INDOOR),1.0,FINANCIAL DISTRICT/SOUTH BEACH,"(37.7973751986894, -122.399959825377)",639 FRONT ST,COMMERCIAL USE,1906.0,0.000000,0.000000,4.000000,3.000000,1.888889,0.750000,2.225888e+05,10540.000000,1.548943e+05,4.000000,21.118480,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.240488,"530 Sansome Street, San Francisco",0.240488,"(37.7955293, -122.4013894)"
9,2012-10-27,279186.0,2012.0,TRASH FIRE (INDOOR),1.0,FINANCIAL DISTRICT/SOUTH BEACH,"(37.7973751986894, -122.399959825377)",639 FRONT ST,COMMERCIAL USE,1906.0,0.000000,0.000000,4.000000,3.000000,1.888889,0.750000,2.225888e+05,10540.000000,1.548943e+05,4.000000,21.118480,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.240488,"530 Sansome Street, San Francisco",0.240488,"(37.7955293, -122.4013894)"


In [22]:
df["distance_next_fire_dpt_hav"].describe()

count    195308.000000
mean          0.648027
std           0.312564
min           0.007126
25%           0.417233
50%           0.630113
75%           0.841236
max           2.471300
Name: distance_next_fire_dpt_hav, dtype: float64

In [47]:
df.to_csv(os.path.join(DATA_URL, NEW_FILE_NAME))

## Driving Distance
To comptue the driving distance we'll be using the Google API. Because retrieving the driving distance and comparing all 45 possibilities would be a intense endeavour for the API, we'll take the haversine distance as a proxy to hand out the closest direction.

# Hydrants
* Hydrant count in district
* closest hydrant to instance

# Additional Thoughts
* To make sure we're getting the closest fire station we could compute the smallest 3 haversine scores and take the directions of each of those. But this would increase the Google API calls times 3 as well.

In [115]:
def hav_all_3(row):
    pos1 = literal_eval(row) # make sure the entry is a well formed tuple
    first = second = third = 12742.0
    first_add = second_add = third_add = ""
    for fire_station in google_address_dict:
        distance = haversine(pos1, google_address_dict[fire_station])
        if distance < first:
            third = second
            third_add = second_add
            second = first
            second_add = first_add
            first = distance
            first_add = fire_station
        elif distance < second and distance != first:
            third = second
            third_add = second_add
            second = distance
            second_add = fire_station
        elif distance < third and distance != second:
            third = distance
            third_add = fire_station
    return pd.Series([row, first, first_add, second, second_add, third, third_add])

In [105]:
df2 = df.copy()

In [116]:
df2["Location_y"][:3].apply(hav_all_3)

Unnamed: 0,0,1,2,3,4,5,6
0,"(37.7543289339354, -122.480327187833)",0.859294,"2155 18th Avenue, San Francisco",0.97363,"1935 32nd Avenue, San Francisco",1.223659,"1290 16th Avenue , San Francisco"
1,"(37.7645472195468, -122.418358468789)",0.552281,"2300 FOLSOM, San Francisco",1.126183,"135 SANCHEZ, San Francisco",1.182604,"109 Oak Street, San Francisco"
2,"(37.7645472195468, -122.418358468789)",0.552281,"2300 FOLSOM, San Francisco",1.126183,"135 SANCHEZ, San Francisco",1.182604,"109 Oak Street, San Francisco"


In [45]:
def get_smallest_3(tmplist, addlist):
    first = second = third = 12742.0
    first_add = second_add = third_add = ""
    for i, add in zip(tmplist, addlist):
        if i < first:
            third = second
            third_add = second_add
            second = first
            second_add = first_add
            first = i
            first_add = add
        elif i < second and i != first:
            second = i
            second_add = add
        elif i < third and i != second:
            third = i
            thid_add = add
    return (first, first_add), (second, second_add), (third, third_add)

In [42]:
test = [11, 4, 2, 3, 1, 5, 9, 10]
address = fire_station_adds[:len(test)]

In [46]:
get_smallest_3(test, address)

((1, '1301 TURK, San Francisco'),
 (2, '1067 POST, San Francisco'),
 (3, '449 MISSION ROCK, San Francisco'))

In [47]:
address

['935 FOLSOM, San Francisco',
 '1340 POWELL, San Francisco',
 '1067 POST, San Francisco',
 '449 MISSION ROCK, San Francisco',
 '1301 TURK, San Francisco',
 '135 SANCHEZ, San Francisco',
 '2300 FOLSOM, San Francisco',
 '36 BLUXOME, San Francisco']