In [285]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sqlite3
import re

In [286]:
pd.set_option('display.max_columns', None)

In [287]:
seed = 538

In [288]:
WORKING_DIRECTORY = 'D:/Fire Project/data/'

PATHS_TO_MERGE = [
    'public_pre2019.csv',
    'multi_pre2019.csv',
    'inc_type_pop111_pre2019.csv',
    'inc_type_pop113_pre2019.csv',
    'total_inc_pop_pre2019.csv',
    'nfirs_stats_pre2019.csv'
]

## Aggregate REAC data

In [289]:
public = pd.read_csv(WORKING_DIRECTORY + 'public_pre2019.csv')
multi = pd.read_csv(WORKING_DIRECTORY + 'multi_pre2019.csv')

In [290]:
public['CITYSTATE'] = public.CITY.str.upper() + ',' + public.STATE.str.upper()
multi['CITYSTATE'] = multi.CITY.str.upper() + ',' + multi.STATE.str.upper()

In [291]:
public_agg = public.loc[:, ['LATITUDE', 'LONGITUDE', 'INSPECTION_SCORE', 'CITYSTATE']] \
                    .groupby(by='CITYSTATE').mean()
public_agg = public_agg.rename(columns={'INSPECTION_SCORE': 'PUBLIC_INSPECTION_SCORE'})

multi_agg = multi.loc[:, ['LATITUDE', 'LONGITUDE', 'INSPECTION_SCORE', 'CITYSTATE']] \
                    .groupby(by='CITYSTATE').mean()
multi_agg = multi_agg.rename(columns={'INSPECTION_SCORE': 'MULTI_INSPECTION_SCORE'})

## Drop unnecessary columns from incident count DataFrames

In [292]:
inc_type111 = pd.read_csv(WORKING_DIRECTORY + 'inc_type_pop111_pre2019.csv')
inc_type111 = inc_type111.rename(columns={'INC_COUNT': '111_COUNT'})
inc_type111 = inc_type111.loc[:, ['CITYSTATE', '111_COUNT']]

In [293]:
inc_type113 = pd.read_csv(WORKING_DIRECTORY + 'inc_type_pop113_pre2019.csv')
inc_type113 = inc_type113.rename(columns={'INC_COUNT': '113_COUNT'})
inc_type113 = inc_type113.loc[:, ['CITYSTATE', '113_COUNT']]

In [294]:
total_inc_pop = pd.read_csv(WORKING_DIRECTORY + 'total_inc_pop_pre2019.csv')
total_inc_pop = total_inc_pop.rename(columns={'ESTIMATESBASE2020': 'POPULATION', 'INC_COUNT': 'TOTAL_INC_COUNT'})

In [296]:
total_inc_pop = total_inc_pop.loc[:, ['CITYSTATE', 'TOTAL_INC_COUNT', 'POPULATION']]

We'll merge the incident counts of 111 (building fire) and 113 (cooking fire) with the overall incident count DataFrame.

In [297]:
total_inc_pop = total_inc_pop.merge(right=inc_type111, how='left', on='CITYSTATE')
total_inc_pop = total_inc_pop.merge(right=inc_type113, how='left', on='CITYSTATE')

In [298]:
total_inc_pop = total_inc_pop.groupby(by='CITYSTATE').mean()

In [299]:
nfirs = pd.read_csv(WORKING_DIRECTORY + 'nfirs_stats_pre2019.csv')
nfirs = nfirs.drop('Unnamed: 0', axis=1)

## Merge DataFrames into one.

In [300]:
nfirs = nfirs.merge(right=total_inc_pop, how='left', on='CITYSTATE')

In [301]:
nfirs = nfirs.merge(right=multi_agg, how='left', on='CITYSTATE')
nfirs = nfirs.merge(right=public_agg, how='left', on='CITYSTATE')

In [302]:
nfirs[nfirs.CITYSTATE == 'KANSAS CITY,KS']

Unnamed: 0,CITY,STATE,CITYSTATE,AVG_SPREAD_ADJ,AVG_ALARMS,AVG_MONEY_LOST,AVG_FATALITIES,AVG_INJURED,SUPPORT,TOTAL_INC_COUNT,POPULATION,111_COUNT,113_COUNT,LATITUDE_x,LONGITUDE_x,MULTI_INSPECTION_SCORE,LATITUDE_y,LONGITUDE_y,PUBLIC_INSPECTION_SCORE
3717,KANSAS CITY,KS,"KANSAS CITY,KS",0.008179,0.962406,13415.562205,0.234043,1.148936,9904,,,,,39.106343,-94.681624,75.697674,39.11481,-94.67389,91.75


In [303]:
nfirs.to_csv(WORKING_DIRECTORY + 'citystate_merged.csv', sep=',', index=False)