In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sqlite3
import re

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
seed = 538

# Get basic stats on fire incidents from NFIRS by city

In this notebook, we'll get basic summary stats for our cities based on the NFIRS data. The goal is to calculate:
* Average fire spread (defined by number of other structures impacted)
* Average deaths per fire reported
* Fire incidents per capita

In [5]:
WORKING_DIRECTORY = 'D:/Fire Project/data/'

We'll load our cleaned REAC data for comparison.

In [6]:
public = pd.read_csv(WORKING_DIRECTORY + 'clean_agg_public.csv', sep=',')
multi = pd.read_csv(WORKING_DIRECTORY + 'clean_agg_multi.csv', sep=',')

In [7]:
public['CITYSTATE'] = public.CITY + ',' + public.STATE
multi['CITYSTATE'] = multi.CITY + ',' + multi.STATE

## Calculate average spread

In [8]:
conn = sqlite3.Connection(WORKING_DIRECTORY + 'fire_data.db')

Let's check the proportion of missing values in the fields we care about.

EXP_NO is the number of other structures/vehicles to which the fire spreads. By calculating the average exposure number by city, we can see which cities are most likely to see fires spread.

* PROP_LOSS and CONT_LOSS are dollar values of lost property and lost contents (anything inside a burning structure/vehicle), there is also PROP_VAL and CONT_VAL, but these measure the pre-fire value.
* DET_ALERT (Detector alert) indicates whether a fire detector alerted the occupants or not. Options are 1 (yes alerted), 2 (not alerted), U (unknown), and NULL (not reported)

Here is the extract year code:
* SUBSTR(ia.INC_DATE, LENGTH(ia.INC_DATE) - 3, 4)

In [9]:
# 4 mins to run
spread = pd.read_sql("""
    SELECT ia.CITY || ',' || ia.STATE 
                AS CITYSTATE, 
            AVG(ia.EXP_NO) 
                AS AVG_SPREAD,
            AVG(bi.ALARMS)
                AS AVG_ALARMS,
            AVG(bi.PROP_LOSS + bi.CONT_LOSS)
                AS AVG_MONEY_LOST,
            AVG(bi.OTH_DEATH + bi.FF_DEATH)
                AS AVG_FATALITIES,
            AVG(bi.OTH_INJ + bi.FF_INJ)
                AS AVG_INJURED,
            COUNT(*)
                AS SUPPORT
    FROM basic_incident bi JOIN incident_address ia
        USING (INCIDENT_KEY)
    GROUP BY ia.CITY, ia.STATE
""", conn)
spread.sample(n=5, random_state=seed)

Unnamed: 0,CITYSTATE,AVG_SPREAD,AVG_ALARMS,AVG_MONEY_LOST,AVG_FATALITIES,AVG_INJURED,SUPPORT
5838,"PIERRE,SD",0.002028,0.0,18582.818533,0.0,1.0,493
4420,"MANASSA,CO",0.037975,,7705.405405,,,79
6020,"POWERS,MI",0.0,,3256.756757,,,41
6770,"SHOSHONE,ID",0.0,0.0,0.0,,,85
215,"ARANSAS PASS,TX",0.000779,0.997608,2206.695652,1.0,0.0,1283


## Incident counts, weighted by population

We'll start by downloading the total 2020 population counts for US incorporated places and minor encorporated places: April 1 2020 to July 1 2021 from [the Census.gov website](https://www.census.gov/data/tables/time-series/demo/popest/2020s-total-cities-and-towns.html). [Direct link.](https://www2.census.gov/programs-surveys/popest/datasets/2020-2021/cities/totals/sub-est2021_all.csv)

In [275]:
pop_counts = pd.read_csv(WORKING_DIRECTORY + 'sub-est2021_all.csv',
                         sep=',',
                         encoding='ISO-8859-1')
pop_counts.sample()

Unnamed: 0,SUMLEV,STATE,COUNTY,PLACE,COUSUB,CONCIT,PRIMGEO_FLAG,FUNCSTAT,NAME,STNAME,ESTIMATESBASE2020,POPESTIMATE2020,POPESTIMATE2021
6386,162,13,0,62328,0,0,0,A,Port Wentworth city,Georgia,10846,10877,11746


In [276]:
pop_counts = pop_counts.loc[:, ['NAME', 'STNAME', 'ESTIMATESBASE2020']]
pop_counts.sample()

Unnamed: 0,NAME,STNAME,ESTIMATESBASE2020
25084,Balance of Hayes township,Kansas,66


In [277]:
pop_counts['NAME'] = pop_counts.NAME \
                                .str \
                                .upper()

pop_counts['STNAME'] = pop_counts.STNAME \
                                .str \
                                .upper()

The state data is spelled out, so we'll split and convert to state codes.

In [192]:
state_dict = {
    "ALABAMA": "AL",
    "ALASKA": "AK",
    "ARIZONA": "AZ",
    "ARKANSAS": "AR",
    "CALIFORNIA": "CA",
    "COLORADO": "CO",
    "CONNECTICUT": "CT",
    "DELAWARE": "DE",
    "FLORIDA": "FL",
    "GEORGIA": "GA",
    "HAWAII": "HI",
    "IDAHO": "ID",
    "ILLINOIS": "IL",
    "INDIANA": "IN",
    "IOWA": "IA",
    "KANSAS": "KS",
    "KENTUCKY": "KY",
    "LOUISIANA": "LA",
    "MAINE": "ME",
    "MARYLAND": "MD",
    "MASSACHUSETTS": "MA",
    "MICHIGAN": "MI",
    "MINNESOTA": "MN",
    "MISSISSIPPI": "MS",
    "MISSOURI": "MO",
    "MONTANA": "MT",
    "NEBRASKA": "NE",
    "NEVADA": "NV",
    "NEW HAMPSHIRE": "NH",
    "NEW JERSEY": "NJ",
    "NEW MEXICO": "NM",
    "NEW YORK": "NY",
    "NORTH CAROLINA": "NC",
    "NORTH DAKOTA": "ND",
    "OHIO": "OH",
    "OKLAHOMA": "OK",
    "OREGON": "OR",
    "PENNSYLVANIA": "PA",
    "RHODE ISLAND": "RI",
    "SOUTH CAROLINA": "SC",
    "SOUTH DAKOTA": "SD",
    "TENNESSEE": "TN",
    "TEXAS": "TX",
    "UTAH": "UT",
    "VERMONT": "VT",
    "VIRGINIA": "VA",
    "WASHINGTON": "WA",
    "WEST VIRGINIA": "WV",
    "WISCONSIN": "WI",
    "WYOMING": "WY"
}


In [278]:
pop_counts['STNAME'] = pop_counts.STNAME \
                                    .str \
                                    .upper() \
                                    .map(state_dict)

In [287]:
state_names = state_dict.keys()
state_codes = state_dict.values()

name_mask = pop_counts['NAME'].isin(state_names)
code_mask = pop_counts['STNAME'].isin(state_codes)

pop_counts = pop_counts[~name_mask | ~code_mask]

In [300]:
pop_counts = pop_counts[~pop_counts['NAME'].str.endswith('COUNTY')]

When the Census lists parts of an incorporated place (denoted with (PT.)), these parts are also included together as one entry. For example, NYC has counts for five of its parts, and also one row that sums them together. 

In [311]:
pop_counts[pop_counts['NAME'].str.startswith('NEW YORK CITY')]

Unnamed: 0,NAME,STNAME,ESTIMATESBASE2020
46597,NEW YORK CITY,NY,8804190
46928,NEW YORK CITY (PT.),NY,1472654
47860,NEW YORK CITY (PT.),NY,2736074
48253,NEW YORK CITY (PT.),NY,1694251
48743,NEW YORK CITY (PT.),NY,2405464
48786,NEW YORK CITY (PT.),NY,495747


We'll drop the parts for this analysis.

In [312]:
pop_counts = pop_counts[~pop_counts['NAME'].str.endswith('(PT.)')]

In [314]:
patterns = [
    re.compile(r'\b\sCITY\b(?!\s*\b\sCITY\b)', re.IGNORECASE),
    re.compile(r'\b\sBOROUGH\b(?!\s*\b\sBOROUGH\b)', re.IGNORECASE),
    re.compile(r'\b\sVILLAGE\b(?!\s*\b\sVILLAGE\b)', re.IGNORECASE),
    re.compile(r'\b\sTOWN\b(?!\s*\b\sTOWN\b)', re.IGNORECASE),
    re.compile(r'\b\sTOWNSHIP\b(?!\s*\b\sTOWNSHIP\b)', re.IGNORECASE)
    ]

for pattern in patterns:
    pop_counts['NAME'] = pop_counts['NAME'].apply(lambda x: pattern.sub('', str(x)))

In [316]:
pop_counts['CITYSTATE'] = pop_counts['NAME'].str.upper() + ',' + pop_counts['STNAME'].str.upper()

In [317]:
pop_counts = pop_counts[pop_counts.ESTIMATESBASE2020 > 0]

In [318]:
pop_counts = pop_counts[~pop_counts.duplicated()]

In [16]:
total_inc_counts = pd.read_sql("""
    SELECT CITY || ',' || STATE 
                AS CITYSTATE, 
            COUNT(*) AS INC_COUNT
    FROM incident_address
    GROUP BY CITY, STATE
""", conn)

In [17]:
inc_type_counts = pd.read_sql("""
    SELECT ia.CITY || ',' || ia.STATE 
                AS CITYSTATE, 
            bi.INC_TYPE 
                AS INC_TYPE,
            COUNT(bi.ID) 
                AS INC_COUNT
    FROM basic_incident bi JOIN incident_address ia
        USING (INCIDENT_KEY)
    GROUP BY ia.CITY, ia.STATE, bi.INC_TYPE
""", conn)

In [320]:
total_inc_pop = total_inc_counts.merge(pop_counts, 
                                       how='inner', 
                                       on='CITYSTATE')

In [322]:
total_inc_pop['INC_COUNT_ADJ'] = total_inc_pop.INC_COUNT / total_inc_pop.ESTIMATESBASE2020

In [324]:
total_inc_pop.loc[:, ['CITYSTATE', 'INC_COUNT_ADJ']].sort_values(by='INC_COUNT_ADJ').head(25)

Unnamed: 0,CITYSTATE,INC_COUNT_ADJ
5818,"NEW YORK,NY",1e-06
8830,"WHEATFIELD,NY",5.4e-05
5621,"MOUNT WASHINGTON,KY",5.5e-05
1122,"BURRILLVILLE,RI",6.2e-05
1997,"DANIA BEACH,FL",9.4e-05
3387,"HALLANDALE BEACH,FL",9.7e-05
7414,"SHEFFIELD,AL",0.000106
5588,"MOUNT OLIVE,NJ",0.000139
7187,"SALINA,NY",0.00015
559,"BEEVILLE,TX",0.000219


### Get average time from alarm to response

### Total incident count by year and city, adjusted for population

### Average severity of fires in city, as measured by deaths per fire, number of alarms triggered? Other information about property damage?

### Proportion of different types of causes

### Demographic factors related to deaths.