In [16]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
import geopandas as gpd
from shapely.geometry import Point

pd.set_option('display.max_columns', None)

WORKING_DIRECTORY = '../data/processed/'

# Merge datasets

In [17]:
PATHS_TO_MERGE = [
    'reac_13-19.csv',
    'top_5_incident_count.csv',
    'total_incident_count.csv',
    'other_nfirs_stats.csv',
    'populations_clean.csv'
]

Load the DataFrames into a list.

In [18]:
dfs = [
    pd.read_csv(WORKING_DIRECTORY + path) \
    for path in PATHS_TO_MERGE
]

Merge the DataFrames based on NFIRS and REAC data.

In [19]:
merged_df = pd.merge(dfs[0], dfs[1], on='CITYSTATE', how='inner')
merged_df = pd.merge(merged_df, dfs[2], on='CITYSTATE', how='inner')
merged_df = pd.merge(merged_df, dfs[3], on='CITYSTATE', how='inner')

Do a left-join add Census population count to locations that have it.

In [20]:
merged_df = pd.merge(merged_df, dfs[4], on='CITYSTATE', how='left')

Calculate nulls per column.

In [21]:
merged_df.isnull().sum() / merged_df.shape[0]

CITYSTATE               0.000000
AVG_SCORE               0.000000
LATITUDE                0.000000
LONGITUDE               0.000000
COUNT_111               0.000000
COUNT_113               0.000000
COUNT_131               0.000000
COUNT_151               0.000000
COUNT_142               0.000000
TOTAL_INCIDENT_COUNT    0.000000
AVG_SPREAD              0.000000
AVG_MONEY_LOST          0.000000
AVG_FATALITIES          0.000000
AVG_INJURIES            0.000000
AVG_ALARMS              0.000000
SUPPORT                 0.000000
POPULATION              0.086273
dtype: float64

# Clean and format merged dataset

In [22]:
def adjust_by_population(count: float, population: float):
    if count == 0:
        return 0.0

    if math.isnan(count) or math.isnan(population):
        return 'Not available.'
    
    if count and population:
        return count / population

columns_to_adjust = [
    'COUNT_111',
    'COUNT_113',
    'COUNT_131',
    'COUNT_151',
    'COUNT_142',
    'TOTAL_INCIDENT_COUNT'
]

for column in columns_to_adjust:
    merged_df[column + '_ADJ'] = merged_df \
        .apply(lambda x: adjust_by_population(x[column], x['POPULATION']), axis=1)

Drop the columns with raw counts.

In [23]:
COLUMNS_TO_DROP = [
    'COUNT_111', 
    'COUNT_113', 
    'COUNT_131', 
    'COUNT_151', 
    'COUNT_142',
    'TOTAL_INCIDENT_COUNT'
]
df = merged_df.drop(labels=COLUMNS_TO_DROP, axis=1)

In [24]:
df['CITY'] = df.CITYSTATE.str.split(',').str.get(0)
df['STATE'] = df.CITYSTATE.str.split(',').str.get(1)

In [25]:
df = df.drop(columns='CITYSTATE', axis=1)

In [26]:
df.sample(5)

Unnamed: 0,AVG_SCORE,LATITUDE,LONGITUDE,AVG_SPREAD,AVG_MONEY_LOST,AVG_FATALITIES,AVG_INJURIES,AVG_ALARMS,SUPPORT,POPULATION,COUNT_111_ADJ,COUNT_113_ADJ,COUNT_131_ADJ,COUNT_151_ADJ,COUNT_142_ADJ,TOTAL_INCIDENT_COUNT_ADJ,CITY,STATE
6339,91.666667,41.993003,-73.199431,0.0,3.861004,0.0,0.007722,0.003861,259,1592.0,0.009422,0.054648,0.002513,0.008166,0.000628,0.099874,NORFOLK,CT
899,80.0,40.145905,-74.703372,0.0,432.050857,0.0,0.001658,0.048646,1809,3991.0,0.03984,0.058632,0.021799,0.016537,0.007016,0.19995,BORDENTOWN,NJ
4684,85.0,48.044056,-98.353826,0.0,14159.090909,0.0,0.0,0.0,22,51.0,0.0,0.0,0.019608,0.078431,0.0,0.372549,LAKOTA,ND
5320,96.75,45.950643,-86.2403,0.0,18157.425743,0.0,0.0,0.623762,101,1050.0,0.02,0.002857,0.006667,0.005714,0.000952,0.06381,MANISTIQUE,MI
4634,90.666667,30.206595,-82.646386,0.017487,4206.628464,0.000807,0.004305,0.85795,3717,12478.0,0.018673,0.003686,0.020356,0.031335,0.02268,0.146418,LAKE CITY,FL


Export to JSON.

In [29]:
df.to_json(WORKING_DIRECTORY + 'dashboard.json', orient='records')

Add fire risk index column

In [31]:
df.sample(5)

Unnamed: 0,AVG_SCORE,LATITUDE,LONGITUDE,AVG_SPREAD,AVG_MONEY_LOST,AVG_FATALITIES,AVG_INJURIES,AVG_ALARMS,SUPPORT,POPULATION,COUNT_111_ADJ,COUNT_113_ADJ,COUNT_131_ADJ,COUNT_151_ADJ,COUNT_142_ADJ,TOTAL_INCIDENT_COUNT_ADJ,CITY,STATE
9021,90.75,43.557532,-90.891785,0.0,2689.124668,0.0,0.005305,0.0,377,1752.0,0.030822,0.006279,0.00742,0.006279,0.001142,0.082192,VIROQUA,WI
7715,89.0,44.330621,-93.965019,0.036364,10819.69697,0.0,0.0,0.0,165,,Not available.,Not available.,Not available.,Not available.,Not available.,Not available.,SAINT PETER,MN
3362,77.5,43.682814,-70.444443,0.001081,1284.486486,0.001081,0.007568,0.007568,925,18366.0,0.004247,0.00196,0.00147,0.000817,0.000272,0.01481,GORHAM,ME
3389,44.5,32.530461,-92.718074,0.005848,704.093567,0.005848,0.017544,0.0,171,5128.0,0.00741,0.002535,0.00273,0.00273,0.001365,0.028471,GRAMBLING,LA
7453,96.0,40.978918,-74.121058,0.0,1213.635009,0.0,0.001278,0.985094,2348,25985.0,0.002078,0.003348,0.000808,0.002463,0.000539,0.013739,RIDGEWOOD,NJ
