In [1]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

pd.set_option('display.max_columns', None)

df = pd.read_csv('../data/ph_fires_matched_2.1.csv')

For buildings that had an unconfined fire in 2021, how many also had an unconfined fire in a previous year?

In [2]:
for year in range(2012, 2022):
    df[f'any_111_{year}'] = df[f'inc_type_111_{year}'].apply(lambda x: 1 if x > 0 else 0)
df['num_111_years'] = df[[x for x in df.columns if 'any_111_' in x]].sum(axis=1)

In [3]:
print('Buildings with two unconfined fire years.')
sum(df.num_111_years > 1)

Buildings with two unconfined fire years.


157

In [4]:
cdf = df.copy()
cdf = cdf[cdf.total_dwelling_units > 30]

print('Buildings (>30 units) with two unconfined fire years.')
sum(cdf.num_111_years > 1)

Buildings (>30 units) with two unconfined fire years.


135

In [5]:
cdf = df.copy()
cdf = cdf[cdf.total_dwelling_units > 100]

print('Buildings (>100 units) with two unconfined fire years.')
sum(cdf.num_111_years > 1)

Buildings (>100 units) with two unconfined fire years.


78

How does that rate compare to what we would expect by chance?

In [6]:
def get_summary_stats_for_type_111(df):
    n_buildings = len(df)

    inc_type_111_annual = []
    for year in range(2012, 2022):
        inc_type_111_annual.append(
            len(df[df[f'inc_type_111_{year}'] > 0])
        )
    # Average number of buildings that have a fire across all years, 
    #     divided by total buildings in sample.
    inc_type_111_annual_rate = np.mean(inc_type_111_annual) / len(df)
    return n_buildings, inc_type_111_annual_rate


def simulate(odds_fire, num_buildings, years=10):
    outcomes = [
        {f'fire_{year}': 0 for year in range(years)}
        for _ in range(num_buildings)
    ]
    for i in range(num_buildings):
        outcomes[i]['building'] = i

    for year in range(years):
        for building in range(num_buildings):
            if np.random.rand() <= odds_fire:
                outcomes[building][f'fire_{year}'] = 1
            else:
                outcomes[building][f'fire_{year}'] = 0

    rand = pd.DataFrame(outcomes)

    # Return number of buildings with 2 or more years with fire
    rand['total'] = rand[[x for x in rand.columns if 'fire_' in x]].sum(axis=1)
    return sum(rand.total > 1)


def buildings_with_multiple_fire_years(n_buildings, odds_fire, n_years, n_simulations):
    buildings_with_multiple_fire_years = []
    for i in range(n_simulations):
        if i % 10 == 0:
            print('Sim:', i)
        buildings_with_multiple_fire_years.append(
            simulate(odds_fire, n_buildings)
        )
        
    return buildings_with_multiple_fire_years


YEARS = 10
SIMS = 300

# For all buildings
n_buildings, odds = get_summary_stats_for_type_111(df)
buildings = buildings_with_multiple_fire_years(n_buildings, odds, YEARS, SIMS)
print(
    'Buildings (all) with multiple fire years. Mean:',
    np.mean(buildings), 'STD:', np.std(buildings)
)
print('n_buildings:', n_buildings, 'odds:', odds)
print()




Sim: 0
Sim: 10
Sim: 20
Sim: 30
Sim: 40
Sim: 50
Sim: 60
Sim: 70
Sim: 80
Sim: 90
Sim: 100
Sim: 110
Sim: 120
Sim: 130
Sim: 140
Sim: 150
Sim: 160
Sim: 170
Sim: 180
Sim: 190
Sim: 200
Sim: 210
Sim: 220
Sim: 230
Sim: 240
Sim: 250
Sim: 260
Sim: 270
Sim: 280
Sim: 290
Buildings (all) with multiple fire years. Mean: 12.056666666666667 STD: 3.6977635883808952
n_buildings: 179043 odds: 0.0012449523298872338



In [7]:
# For buildings with >30 units
UNITS = 30

cdf = df.copy()
cdf = cdf[cdf.total_dwelling_units > UNITS]
n_buildings_over_30, odds_over_30 = get_summary_stats_for_type_111(cdf)
buildings = buildings_with_multiple_fire_years(n_buildings_over_30, odds_over_30, YEARS, SIMS)
print(
    'Buildings (>30 units) with multiple fire years. Mean:',
    np.mean(buildings), 'STD:', np.std(buildings)
)
print('n_buildings:', n_buildings_over_30, 'odds:', odds_over_30)
print()


# For buildings with >100 units
UNITS = 100

cdf = df.copy()
cdf = cdf[cdf.total_dwelling_units > UNITS]
n_buildings_over_100, odds_over_100 = get_summary_stats_for_type_111(cdf)
buildings = buildings_with_multiple_fire_years(n_buildings_over_100, odds_over_100, YEARS, SIMS)
print(
    'Buildings (>100 units) with multiple fire years. Mean:',
    np.mean(buildings), 'STD:', np.std(buildings)
)
print('n_buildings:', n_buildings_over_100, 'odds:', odds_over_100)
print()

Sim: 0
Sim: 10
Sim: 20
Sim: 30
Sim: 40
Sim: 50
Sim: 60
Sim: 70
Sim: 80
Sim: 90
Sim: 100
Sim: 110
Sim: 120
Sim: 130
Sim: 140
Sim: 150
Sim: 160
Sim: 170
Sim: 180
Sim: 190
Sim: 200
Sim: 210
Sim: 220
Sim: 230
Sim: 240
Sim: 250
Sim: 260
Sim: 270
Sim: 280
Sim: 290
Buildings (>30 units) with multiple fire years. Mean: 47.403333333333336 STD: 6.9909934121617425
n_buildings: 4064 odds: 0.016929133858267716

Sim: 0
Sim: 10
Sim: 20
Sim: 30
Sim: 40
Sim: 50
Sim: 60
Sim: 70
Sim: 80
Sim: 90
Sim: 100
Sim: 110
Sim: 120
Sim: 130
Sim: 140
Sim: 150
Sim: 160
Sim: 170
Sim: 180
Sim: 190
Sim: 200
Sim: 210
Sim: 220
Sim: 230
Sim: 240
Sim: 250
Sim: 260
Sim: 270
Sim: 280
Sim: 290
Buildings (>100 units) with multiple fire years. Mean: 36.13666666666666 STD: 5.605769845039623
n_buildings: 1333 odds: 0.02663165791447862

