In [215]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math

pd.set_option('display.max_columns', None)

WORKING_DIRECTORY = '../data/processed/'

# Merge datasets

In [216]:
PATHS_TO_MERGE = [
    'reac_13-19.csv',
    'top_5_incident_count.csv',
    'total_incident_count.csv',
    'other_nfirs_stats.csv',
    'populations_clean.csv'
]

Load the DataFrames into a list.

In [217]:
dfs = [
    pd.read_csv(WORKING_DIRECTORY + path) \
    for path in PATHS_TO_MERGE
]

Merge the DataFrames based on NFIRS and REAC data.

In [218]:
merged_df = pd.merge(dfs[0], dfs[1], on='CITYSTATE', how='inner')
merged_df = pd.merge(merged_df, dfs[2], on='CITYSTATE', how='inner')
merged_df = pd.merge(merged_df, dfs[3], on='CITYSTATE', how='inner')

Do a left-join add Census population count to locations that have it.

In [219]:
merged_df = pd.merge(merged_df, dfs[4], on='CITYSTATE', how='left')

Calculate nulls per column.

In [220]:
merged_df.isnull().sum() / merged_df.shape[0]

CITYSTATE               0.000000
AVG_SCORE               0.000000
LATITUDE                0.000000
LONGITUDE               0.000000
COUNT_111               0.000000
COUNT_113               0.000000
COUNT_131               0.000000
COUNT_151               0.000000
COUNT_142               0.000000
TOTAL_INCIDENT_COUNT    0.000000
AVG_SPREAD              0.000000
AVG_MONEY_LOST          0.000000
AVG_FATALITIES          0.000000
AVG_INJURIES            0.000000
AVG_ALARMS              0.000000
SUPPORT                 0.000000
POPULATION              0.086273
dtype: float64

# Clean and format merged dataset

In [221]:
def adjust_by_population(count: float, population: float):
    if count == 0:
        return 0.0

    if math.isnan(count) or math.isnan(population):
        return 'Not available.'
    
    if count and population:
        return count / population

columns_to_adjust = [
    'COUNT_111',
    'COUNT_113',
    'COUNT_131',
    'COUNT_151',
    'COUNT_142',
    'TOTAL_INCIDENT_COUNT'
]

for column in columns_to_adjust:
    merged_df[column + '_ADJ'] = merged_df \
        .apply(lambda x: adjust_by_population(x[column], x['POPULATION']), axis=1)

Drop the columns with raw counts.

In [222]:
COLUMNS_TO_DROP = [
    'COUNT_111', 
    'COUNT_113', 
    'COUNT_131', 
    'COUNT_151', 
    'COUNT_142',
    'TOTAL_INCIDENT_COUNT'
]
df = merged_df.drop(labels=COLUMNS_TO_DROP, axis=1)

In [223]:
df['CITY'] = df.CITYSTATE.str.split(',').str.get(0)
df['STATE'] = df.CITYSTATE.str.split(',').str.get(1)

In [224]:
df = df.drop(columns='CITYSTATE', axis=1)

In [225]:
# df.to_csv(WORKING_DIRECTORY + 'dashboard.csv',
#           sep=',',
#           index=False)

df.to_json(WORKING_DIRECTORY + 'dashboard.json')

Add fire risk index column