In [2]:
import requests
from requests import Session
import os
import pandas as pd
from pandas import DataFrame
import sqlite3
from typing import Tuple
from geopy.distance import geodesic
import matplotlib.pyplot as plt
import seaborn as sns
import time

In [None]:
WORKING_DIR = 'D:/Fire Project/data/'
NFIRS_PATHS = ['nfirs_fire_hazmat_pdr_2020/NFIRS_FIRES_2020_022322',
                 'USFA NFIRS 2019 Hazmat/NFIRS_FIRES_2019_011921',
                 'USFA NFIRS 2018 Hazmat/NFIRS_FIRES_2018_110119',
                 'USFA NFIRS 2017 Hazmat/NFIRS_FIRES_2017_020719',
                 'USFA NFIRS 2016 Hazmat/NFIRS_FIRES_2016_02-05-2018',
                 'USFA NFIRS 2015 Hazmat/NFIRS_FIRES_2015_20170215']

The goal of this notebook is to link HUD REAC inspection addresses with FEMA NFIRS fire incident addresses.

This function gets standard, geocoded information about matching addresses from the U.S. Census Bureau API.

In [144]:
def code_address(address: Tuple[str], session: Session) -> dict:
    street = address[0].upper().strip()
    city = address[1].upper().strip()
    state = address[2].upper().strip()
    zipcode = str(address[3]).strip()
    
    BENCHMARK = "Public_AR_Current"
    VINTAGE = "Current_Current"
    OUTPUT_FORMAT = "json"

    # URL encode the address components and construct the API request URL
    URL = f"https://geocoding.geo.census.gov/geocoder/locations/onelineaddress?address={street.replace(' ', '+')},+{city.replace(' ', '+')}%2C+{state}+{zipcode}&benchmark={BENCHMARK}&vintage={VINTAGE}&format={OUTPUT_FORMAT}"

    response = session.get(URL)
    if response.status_code == 200:
        return response.json()
    else:
        print("Request failed")
        return None

Because the data from NFIRS doesn't have longitude and latitude, we'll add this data using the Census Bureau API so that we can compare the geographic locations of the two sets of addresses.

To get a more granular look, we will use the Census Bureau's geocoding api to get longitude and latitude data for every point in our database that is located in NY state.

We'll start by creating a table to store the latitude and longitude data and connecting it to our existing list of addresses.

In [158]:
# conn = sqlite3.Connection('fire_data.db')
# cur = conn.cursor()
# cur.execute("""
#     CREATE TABLE IF NOT EXISTS address_geocoded (
#         id INTEGER PRIMARY KEY,
#         latitude REAL,
#         longitude REAL,
#         INCIDENT_KEY TEXT,
#         FOREIGN KEY (INCIDENT_KEY)
#             REFERENCES incident_address(INCIDENT_KEY)
#     )
# """)
# conn.commit()

In [172]:
def insert_coordinates(latitude: float, longitude: float, incident_key: str, conn) -> None:
    cur = conn.cursor()
    cur.execute("""INSERT INTO address_geocoded (
                            latitude, 
                            longitude, 
                            INCIDENT_KEY) 
                        VALUES (
                            ?, ?, ?
                        )
    """, (latitude, longitude, incident_key))
    conn.commit()

In [173]:
conn = sqlite3.Connection('fire_data.db')
cur = conn.cursor()

cur.execute("""
SELECT INCIDENT_KEY,
    COALESCE(NUM_MILE, '') || ' ' ||
    COALESCE(STREET_PRE, '') || ' ' ||
    COALESCE(STREETNAME, '') || ' ' ||
    COALESCE(STREETTYPE, '') || ' ' ||
    COALESCE(STREETSUF, '') || ' ' ||
    COALESCE(APT_NO, '') as street,
    CITY as city,
    STATE as state,
    ZIP5 as zipcode
FROM incident_address
WHERE STATE = 'NY'
ORDER BY RANDOM()
""")
ny_addresses = cur.fetchall()

Now we'll go address-by-address returned by the SELECT query and get the coordinates from the API.

In [2]:
# session = requests.Session()
# count = 0
# try:
#     for row in ny_addresses:
#         incident_key = row[0]
#         address = row[1:]
#         census_result = code_address(address, session)

#         if census_result:
#             matches = census_result['result']['addressMatches']
#             if matches:
#                 # Keep only the first match
#                 matches = matches[0]
#                 coordinates = matches['coordinates']
#                 insert_coordinates(coordinates['y'], coordinates['x'], incident_key, conn)
#                 count += 1
#                 if count % 1000 == 0:
#                     print(count, time.time())
# finally:
#     session.close()