In [1]:
import requests
from requests import Session
import os
import pandas as pd
from pandas import DataFrame
import sqlite3

## Loading NFIRS incidentaddress.txt into SQLite db file

FEMA provides the NFIRS datasets as multiple CSV files covering multiple years. There are also two different table schemas for each year.

We'll load this data into a local SQLite db file, which will enable us to more easily geocode our addresses, allowing us to compare between the NFIRS and HUD REAC datasets.

For our purposes, we'll only load the "incidentaddress.txt" and "basicincident.txt" files into SQL. These files contain data on the location of the incidents and the nature of the incidents respectively.

## Note: Before running this notebook, place the NFIRS fire incident data files in your working directory, and extract both the main folder and the subfolders.

In [2]:
WORKING_DIRECTORY = 'D:/Fire Project/data/'
NFIRS_PATHS = [
    'nfirs_fire_hazmat_pdr_2020/nfirs_fire_hazmat_pdr_2020/NFIRS_FIRES_2020_022322',
    'usfa_nfirs_2019_hazmat/USFA NFIRS 2019 Hazmat/NFIRS_FIRES_2019_011921',
    'usfa_nfirs_2018_hazmat/USFA NFIRS 2018 Hazmat/NFIRS_FIRES_2018_110119',
    'usfa_nfirs_2017_hazmat/USFA NFIRS 2017 Hazmat/NFIRS_FIRES_2017_020719',
    'usfa_nfirs_2016_hazmat/USFA NFIRS 2016 Hazmat/NFIRS_FIRES_2016_02-05-2018',
    'usfa_nfirs_2015_hazmat/USFA NFIRS 2015 Hazmat/NFIRS_FIRES_2015_20170215',
    'usfa_nfirs_2014_hazmat/USFA NFIRS 2014 Hazmat/NFIRS_2014_030216',
    'usfa_nfirs_2013_hazmat/USFA NFIRS 2013 Hazmat/NFIRS_2013_121514'
]

We'll create a local sqlite3 database file so that we can easily store our data as we add geocodes to the existing addresses.

In [5]:
# Create table for incidentaddresses.
conn = sqlite3.Connection(WORKING_DIRECTORY + 'fire_data.db')
cur = conn.cursor()
cur.execute("""CREATE TABLE IF NOT EXISTS incident_address (
    ID INTEGER PRIMARY KEY,
    INCIDENT_KEY TEXT,
    STATE TEXT,
    FDID INTEGER,
    INC_DATE INTEGER,
    INC_NO INTEGER,
    EXP_NO INTEGER,
    LOC_TYPE INTEGER,
    NUM_MILE INTEGER,
    STREET_PRE TEXT,
    STREETNAME TEXT,
    STREETTYPE TEXT,
    STREETSUF TEXT,
    APT_NO TEXT,
    CITY TEXT,
    STATE_ID TEXT,
    ZIP5 INTEGER,
    ZIP4 INTEGER,
    X_STREET TEXT
)""")
conn.commit()

cur.execute("""
    CREATE TABLE IF NOT EXISTS basic_incident (
        ID INTEGER PRIMARY KEY,
        STATE TEXT,
        FDID INTEGER,
        INC_DATE TEXT,
        INC_NO INTEGER,
        EXP_NO INTEGER,
        VERSION REAL,
        DEPT_STA TEXT,
        INC_TYPE INTEGER,
        ADD_WILD INTEGER,
        AID TEXT,
        ALARM INTEGER,
        ARRIVAL TEXT,
        INC_CONT TEXT,
        LU_CLEAR REAL,
        SHIFT TEXT,
        ALARMS INTEGER,
        DISTRICT INTEGER,
        ACT_TAK1 REAL,
        ACT_TAK2 INTEGER,
        ACT_TAK3 INTEGER,
        APP_MOD TEXT,
        SUP_APP INTEGER,
        EMS_APP INTEGER,
        OTH_APP INTEGER,
        SUP_PER INTEGER,
        EMS_PER INTEGER,
        OTH_PER INTEGER,
        RESOU_AID TEXT,
        PROP_LOSS REAL,
        CONT_LOSS REAL,
        PROP_VAL REAL,
        CONT_VAL REAL,
        FF_DEATH REAL,
        OTH_DEATH REAL,
        FF_INJ REAL,
        OTH_INJ REAL,
        DET_ALERT TEXT,
        HAZ_REL TEXT,
        MIXED_USE TEXT,
        PROP_USE INTEGER,
        CENSUS INTEGER,
        INCIDENT_KEY TEXT,
        FOREIGN KEY (INCIDENT_KEY) 
            REFERENCES INCIDENT_ADDRESS (INCIDENT_KEY)
    )""")
conn.commit()
conn.close()

Now we'll load our csv files, each called incidentaddress.txt, and put them in the same SQL table.

The datasets from 2013-2018 doesn't have an INCIDENT_KEY column, so we will construct one out of the other information in the dataset. This format, with five components, is consistent with the INCIDENT_KEY field in 2019-2020.

In [6]:
conn = sqlite3.Connection(WORKING_DIRECTORY + 'fire_data.db')
cur = conn.cursor()

# Append each dataframe to existing table.
for path in NFIRS_PATHS:
    df = pd.read_csv(WORKING_DIRECTORY + path + '/incidentaddress.txt', 
                        sep='^',
                        low_memory=False,
                        # Specify alternative text encoding.
                        encoding='ISO-8859-1')
    
    # The CSVs from 2018 and earlier have 17 columns (instead of 18)
    if len(df.columns) == 17:
        incident_key = df.loc[:, ['STATE', 'FDID', 'INC_DATE', 'INC_NO', 'EXP_NO']].astype(str)
        df['INCIDENT_KEY'] = incident_key.agg('_'.join, axis=1)

    df['CITY'] = df.CITY.str.upper()
    df['STATE'] = df.STATE.str.upper()

    df['CITYSTATE'] = df.CITY.str.upper() + ',' + df.STATE.str.upper()

    df = df[df.CITYSTATE.isin(unique_locs)]

    df = df.drop(['CITYSTATE'], axis=1)

    df.to_sql('incident_address',
                    conn, 
                    if_exists='append', 
                    index=False)
    conn.commit()
conn.close()

Now, we'll load the data from the 2013-2020 basicincident.txt files, which will have a foreign key "INCIDENT_KEY" that will connect to the column with the same name in incident_address.

In [7]:
conn = sqlite3.Connection(WORKING_DIRECTORY + 'fire_data.db')
cur = conn.cursor()

# Append each dataframe to existing table.
for path in NFIRS_PATHS:
    df = pd.read_csv(WORKING_DIRECTORY + path + '/basicincident.txt', 
                        sep='^',
                        low_memory=False,
                        # Specify alternative text encoding.
                        encoding='ISO-8859-1')
    
    # The CSVs from 2018 and earlier have 41 columns (instead of 42)
    if len(df.columns) == 41:
        incident_key = df.loc[:, ['STATE', 'FDID', 'INC_DATE', 'INC_NO', 'EXP_NO']].astype(str)
        df['INCIDENT_KEY'] = incident_key.agg('_'.join, axis=1)
    
    df.to_sql('basic_incident',
                    conn, 
                    if_exists='append', 
                    index=False)
    conn.commit()

    print('Finished:', path)
conn.close()

Finished: nfirs_fire_hazmat_pdr_2020/nfirs_fire_hazmat_pdr_2020/NFIRS_FIRES_2020_022322
Finished: usfa_nfirs_2019_hazmat/USFA NFIRS 2019 Hazmat/NFIRS_FIRES_2019_011921
Finished: usfa_nfirs_2018_hazmat/USFA NFIRS 2018 Hazmat/NFIRS_FIRES_2018_110119
Finished: usfa_nfirs_2017_hazmat/USFA NFIRS 2017 Hazmat/NFIRS_FIRES_2017_020719
Finished: usfa_nfirs_2016_hazmat/USFA NFIRS 2016 Hazmat/NFIRS_FIRES_2016_02-05-2018
Finished: usfa_nfirs_2015_hazmat/USFA NFIRS 2015 Hazmat/NFIRS_FIRES_2015_20170215
Finished: usfa_nfirs_2014_hazmat/USFA NFIRS 2014 Hazmat/NFIRS_2014_030216
Finished: usfa_nfirs_2013_hazmat/USFA NFIRS 2013 Hazmat/NFIRS_2013_121514


Because we'll likely be joining on incident key a lot, we'll index those columns.

In [9]:
conn = sqlite3.Connection(WORKING_DIRECTORY + 'fire_data.db')
cur = conn.cursor()

cur.execute('CREATE INDEX idx_basic_incident_incident_key ON basic_incident (INCIDENT_KEY)')
conn.commit()

cur.execute('CREATE INDEX idx_incident_address_incident_key ON incident_address (INCIDENT_KEY)')
conn.commit()