In [None]:
import requests
from requests import Session
import os
import pandas as pd
from pandas import DataFrame
import sqlite3
from typing import Tuple
from geopy.distance import geodesic
import matplotlib.pyplot as plt
import seaborn as sns

## Loading NFIRS incidentaddress.txt into SQLite db file

In [None]:
WORKING_DIR = 'D:/Fire Project/data/'
NFIRS_PATHS = ['nfirs_fire_hazmat_pdr_2020/NFIRS_FIRES_2020_022322',
                 'USFA NFIRS 2019 Hazmat/NFIRS_FIRES_2019_011921',
                 'USFA NFIRS 2018 Hazmat/NFIRS_FIRES_2018_110119',
                 'USFA NFIRS 2017 Hazmat/NFIRS_FIRES_2017_020719',
                 'USFA NFIRS 2016 Hazmat/NFIRS_FIRES_2016_02-05-2018',
                 'USFA NFIRS 2015 Hazmat/NFIRS_FIRES_2015_20170215']

We'll create a local sqlite3 database file so that we can easily store our data as we add geocodes to the existing addresses.

In [None]:
# Create table for incidentaddresses.
conn = sqlite3.Connection('fire_data.db')
cur = conn.cursor()
cur.execute("""CREATE TABLE IF NOT EXISTS incident_address (
    INTEGER PRIMARY KEY,
    INCIDENT_KEY TEXT,
    STATE TEXT,
    FDID INTEGER,
    INC_DATE INTEGER,
    INC_NO INTEGER,
    EXP_NO INTEGER,
    LOC_TYPE INTEGER,
    NUM_MILE INTEGER,
    STREET_PRE TEXT,
    STREETNAME TEXT,
    STREETTYPE TEXT,
    STREETSUF TEXT,
    APT_NO TEXT,
    CITY TEXT,
    STATE_ID TEXT,
    ZIP5 INTEGER,
    ZIP4 INTEGER,
    X_STREET TEXT
)""")
conn.commit()

Now we'll load our csv files, each called incidentaddress.txt, and put them in the same SQL table.

The datasets from 2015-2018 doesn't have an INCIDENT_KEY column, so we will construct one out of the other information in the dataset. This format, with five components, is consistent with the INCIDENT_KEY field in 2019-2020.

In [None]:
conn = sqlite3.Connection('fire_data.db')
cur = conn.cursor()

# Append each dataframe to existing table.
for path in NFIRS_PATHS:
    df = pd.read_csv(WORKING_DIR + path + '/incidentaddress.txt', 
                        sep='^',
                        low_memory=False,
                        # Specify alternative text encoding.
                        encoding='ISO-8859-1')
    
    if len(df.columns) == 17:
        incident_key = df.loc[:, ['STATE', 'FDID', 'INC_DATE', 'INC_NO', 'EXP_NO']].astype(str)
        df['INCIDENT_KEY'] = incident_key.agg('_'.join, axis=1)
    
    df.to_sql('incident_address',
                    conn, 
                    if_exists='append', 
                    index=False)
    conn.commit()