In [1]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

pd.set_option('display.max_columns', None)

df = pd.read_csv('../data/ph_fires_matched_2.1.csv')

For buildings that had an unconfined fire in 2021, how many also had an unconfined fire in a previous year?

In [2]:
for year in range(2012, 2022):
    df[f'any_111_{year}'] = df[f'inc_type_111_{year}'].apply(lambda x: 1 if x > 0 else 0)
df['any_111_years'] = df[[x for x in df.columns if 'any_111_' in x]].sum(axis=1)

In [3]:
in2021 = df[df.inc_type_111_2021 > 0]

print('% of buildings with a 111 fire in 2021 and a prior year')
len(in2021[in2021.any_111_years > 1]) / len(in2021)

% of buildings with a 111 fire in 2021 and a prior year


0.1859504132231405

In [4]:
cdf = df.copy()
cdf = cdf[cdf.total_dwelling_units > 30]

in2021 = cdf[cdf.inc_type_111_2021 > 0]

print('% of buildings (>30 units) with a 111 fire in 2021 and a prior year')
len(in2021[in2021.any_111_years > 1]) / len(in2021)

% of buildings (>30 units) with a 111 fire in 2021 and a prior year


0.5333333333333333

In [5]:
cdf = df.copy()
cdf = cdf[cdf.total_dwelling_units > 100]

in2021 = cdf[cdf.inc_type_111_2021 > 0]

print('% of buildings (>100 units) with a 111 fire in 2021 and a prior year')
len(in2021[in2021.any_111_years > 1]) / len(in2021)

% of buildings (>100 units) with a 111 fire in 2021 and a prior year


0.5428571428571428

How does that rate compare to what we would expect by chance?

In [6]:
def get_summary_stats_for_type_111(df):
    n_buildings = len(df)

    inc_type_111_annual = []
    for year in range(2012, 2022):
        inc_type_111_annual.append(
            len(df[df[f'inc_type_111_{year}'] > 0])
        )
    # Average number of buildings that have a fire across all years, 
    #     divided by total buildings in sample.
    inc_type_111_annual_rate = np.mean(inc_type_111_annual) / len(df)
    return n_buildings, inc_type_111_annual_rate


def simulate(odds_fire, num_buildings, years=10):
    outcomes = [
        {f'fire_{year}': 0 for year in range(years)}
        for _ in range(num_buildings)
    ]
    for i in range(num_buildings):
        outcomes[i]['building'] = i

    for year in range(years):
        for building in range(num_buildings):
            if np.random.rand() <= odds_fire:
                outcomes[building][f'fire_{year}'] = 1
            else:
                outcomes[building][f'fire_{year}'] = 0

    rand = pd.DataFrame(outcomes)
    rand['total'] = rand[[x for x in rand.columns if 'fire_' in x]].sum(axis=1)
    return rand


def final_year_plus_prev(n_buildings, odds_fire, n_years, n_simulations):
    final_year_plus_prev = []
    for i in range(n_simulations):
        print('Sim:', i)
        rand = simulate(odds_fire, n_buildings)
        
        for year in range(n_years):
            rand[f'any_{year}'] = rand[f'fire_{year}'].apply(lambda x: 1 if x > 0 else 0)
        rand['fire_years'] = rand[[x for x in rand.columns if 'any_' in x]].sum(axis=1)
        
        final_year_fires = rand[rand.fire_9 > 0]
        final_year_plus_prev.append(
            len(final_year_fires[final_year_fires.fire_years > 1]) / len(final_year_fires)
        )
    return np.mean(final_year_plus_prev)


YEARS = 10
SIMS = 100

# For all buildings
n_buildings, odds = get_summary_stats_for_type_111(df)
print(
    '% of all buildings with final year fire with also a previous fire.',
    final_year_plus_prev(n_buildings, odds, YEARS, SIMS)
)


# For buildings with >30 units
UNITS = 30

cdf = df.copy()
cdf = cdf[cdf.total_dwelling_units > UNITS]
n_buildings_over_100, odds_over_100 = get_summary_stats_for_type_111(cdf)

print(
    '% of >30 unit buildings with final year fire with also a previous fire.',
    final_year_plus_prev(n_buildings_over_100, odds_over_100, YEARS, SIMS)
)


# For buildings with >100 units
UNITS = 100

cdf = df.copy()
cdf = cdf[cdf.total_dwelling_units > UNITS]
n_buildings_over_100, odds_over_100 = get_summary_stats_for_type_111(cdf)

print(
    '% of >100 unit buildings with final year fire with also a previous fire.',
    final_year_plus_prev(n_buildings_over_100, odds_over_100, YEARS, SIMS)
)


Sim: 0
Sim: 1
Sim: 2
Sim: 3
Sim: 4
Sim: 5
Sim: 6
Sim: 7
Sim: 8
Sim: 9
Sim: 10
Sim: 11
Sim: 12
Sim: 13
Sim: 14
Sim: 15
Sim: 16
Sim: 17
Sim: 18
Sim: 19
Sim: 20
Sim: 21
Sim: 22
Sim: 23
Sim: 24
Sim: 25
Sim: 26
Sim: 27
Sim: 28
Sim: 29
Sim: 30
Sim: 31
Sim: 32
Sim: 33
Sim: 34
Sim: 35
Sim: 36
Sim: 37
Sim: 38
Sim: 39
Sim: 40
Sim: 41
Sim: 42
Sim: 43
Sim: 44
Sim: 45
Sim: 46
Sim: 47
Sim: 48
Sim: 49
Sim: 50
Sim: 51
Sim: 52
Sim: 53
Sim: 54
Sim: 55
Sim: 56
Sim: 57
Sim: 58
Sim: 59
Sim: 60
Sim: 61
Sim: 62
Sim: 63
Sim: 64
Sim: 65
Sim: 66
Sim: 67
Sim: 68
Sim: 69
Sim: 70
Sim: 71
Sim: 72
Sim: 73
Sim: 74
Sim: 75
Sim: 76
Sim: 77
Sim: 78
Sim: 79
Sim: 80
Sim: 81
Sim: 82
Sim: 83
Sim: 84
Sim: 85
Sim: 86
Sim: 87
Sim: 88
Sim: 89
Sim: 90
Sim: 91
Sim: 92
Sim: 93
Sim: 94
Sim: 95
Sim: 96
Sim: 97
Sim: 98
Sim: 99
% of all buildings with final year fire with also a previous fire. 0.011051509503912725
Sim: 0
Sim: 1
Sim: 2
Sim: 3
Sim: 4
Sim: 5
Sim: 6
Sim: 7
Sim: 8
Sim: 9
Sim: 10
Sim: 11
Sim: 12
Sim: 13
Sim: 14
Sim: 15
Sim: