In [1]:
import numpy as np
import pandas as pd
import altair as alt

In [2]:
# Read in the merged dataset
data_fn = 'fema_disaster_with_noaa_monthly_temp_combined.csv'
data_df = pd.read_csv(data_fn, index_col=0)
print(data_df.shape)
data_df.head()

(63698, 34)


  data_df = pd.read_csv(data_fn, index_col=0)


Unnamed: 0,fema_declaration_string,disaster_number,state_disaster,declaration_type,declaration_date,fy_declared,incident_type,declaration_title,ih_program_declared,ia_program_declared,...,State,Standard,Postal,year,month,state_temperature,average_temp,monthly_mean_from_1901_to_2000,centroid_lon,centroid_lat
0,DR-1-GA,1,GA,DR,1953-05-02 00:00:00+00:00,1953,Tornado,Tornado,0,1,...,Georgia,Ga.,GA,1953,5,Georgia,68.0,66.9,-83.446342,32.649229
1,DR-2-TX,2,TX,DR,1953-05-15 00:00:00+00:00,1953,Tornado,Tornado & Heavy Rainfall,0,1,...,Texas,Texas,TX,1953,5,Texas,68.5,68.6,-99.350697,31.484464
2,DR-3-LA,3,LA,DR,1953-05-29 00:00:00+00:00,1953,Flood,Flood,0,1,...,Louisiana,La.,LA,1953,5,Louisiana,70.4,69.8,-92.585662,31.058407
3,DR-4-MI,4,MI,DR,1953-06-02 00:00:00+00:00,1953,Tornado,Tornado,0,1,...,Michigan,Mich.,MI,1953,6,Michigan,60.0,58.4,-85.437167,44.350681
4,DR-5-MT,5,MT,DR,1953-06-06 00:00:00+00:00,1953,Flood,Floods,0,1,...,Montana,Mont.,MT,1953,6,Montana,51.6,54.3,-109.645149,47.033536


In [29]:
# Combine month and year columns to form new 'date' column
data_df['date'] = pd.to_datetime(data_df[['year', 'month']].assign(DAY=1))
data_df.head()

Unnamed: 0,fema_declaration_string,disaster_number,state_disaster,declaration_type,declaration_date,fy_declared,incident_type,declaration_title,ih_program_declared,ia_program_declared,...,Standard,Postal,year,month,state_temperature,average_temp,monthly_mean_from_1901_to_2000,centroid_lon,centroid_lat,date
0,DR-1-GA,1,GA,DR,1953-05-02 00:00:00+00:00,1953,Tornado,Tornado,0,1,...,Ga.,GA,1953,5,Georgia,68.0,66.9,-83.446342,32.649229,1953-05-01
1,DR-2-TX,2,TX,DR,1953-05-15 00:00:00+00:00,1953,Tornado,Tornado & Heavy Rainfall,0,1,...,Texas,TX,1953,5,Texas,68.5,68.6,-99.350697,31.484464,1953-05-01
2,DR-3-LA,3,LA,DR,1953-05-29 00:00:00+00:00,1953,Flood,Flood,0,1,...,La.,LA,1953,5,Louisiana,70.4,69.8,-92.585662,31.058407,1953-05-01
3,DR-4-MI,4,MI,DR,1953-06-02 00:00:00+00:00,1953,Tornado,Tornado,0,1,...,Mich.,MI,1953,6,Michigan,60.0,58.4,-85.437167,44.350681,1953-06-01
4,DR-5-MT,5,MT,DR,1953-06-06 00:00:00+00:00,1953,Flood,Floods,0,1,...,Mont.,MT,1953,6,Montana,51.6,54.3,-109.645149,47.033536,1953-06-01


In [31]:
# Select only desired columns
fema_df = data_df[['date','incident_type','disaster_number','state_disaster','average_temp']]

# Filter between 1979 and 2022
fema_df = fema_df.loc[
    (data_df['date'] >= '1979-01-01') & (data_df['date'] < '2022-11-01')]

# Drop duplicate disaster numbers
fema_df = fema_df.drop_duplicates('disaster_number', keep='first')

# Select only a few incident types
fema_df = fema_df.loc[(fema_df['incident_type'] == 'Hurricane')]

# Keep only the year
fema_df['year'] = pd.DatetimeIndex(fema_df['date']).year

# Keep only date and incident_type
fema_df = fema_df[['year', 'date', 'incident_type']]

print(fema_df.shape)
fema_df.head()

(390, 3)


Unnamed: 0,year,date,incident_type
7112,1979,1979-09-01,Hurricane
7184,1979,1979-09-01,Hurricane
7185,1979,1979-09-01,Hurricane
7188,1979,1979-09-01,Hurricane
7221,1979,1979-09-01,Hurricane


In [26]:
# Import average monthly temperatures

# Read in the merged dataset
temp_fn = 'average_monthly_temperature_by_state_1950-2022.csv'
temp_df = pd.read_csv(temp_fn, index_col=0)
print(temp_df.shape)
temp_df.head()

(41856, 7)


Unnamed: 0,month,year,state,average_temp,monthly_mean_from_1901_to_2000,centroid_lon,centroid_lat
0,1,1950,Alabama,53.8,45.9,-86.828372,32.789832
1,1,1950,Arizona,39.6,41.1,-111.664418,34.29311
2,1,1950,Arkansas,45.6,40.4,-92.439268,34.899745
3,1,1950,California,39.4,42.7,-119.610699,37.246071
4,1,1950,Colorado,25.2,24.5,-105.547825,38.998552


In [27]:
# Create a date from month and year
temp_df['date'] = pd.to_datetime(data_df[['year', 'month']].assign(DAY=1))

# Average the temperature by year, disregarding state-level temperatures
temp_year_df = temp_df.groupby('year', as_index=False).mean().drop(columns='month')

# Keep only year and average_temp columns
temp_year_df = temp_year_df[['year', 'average_temp']]

print(temp_year_df.shape)
temp_year_df.head()

(73, 2)


Unnamed: 0,year,average_temp
0,1950,50.857118
1,1951,50.825347
2,1952,51.893403
3,1953,52.992535
4,1954,52.607292


In [32]:
# Combine temperature and incidents
fema_temp_years_df = pd.merge(fema_df, temp_year_df, how='outer')

# Filter between 1979 and 2022
fema_temp_years_df = fema_temp_years_df.loc[
    (fema_temp_years_df['year'] >= 1979) & (fema_temp_years_df['year'] < 2023)]

# Only show 'Hurricane' incidents
fema_temp_years_df = fema_temp_years_df.loc[(fema_temp_years_df['incident_type'] == 'Hurricane')]

print(fema_temp_years_df.shape)
fema_temp_years_df.tail()

(390, 4)


Unnamed: 0,year,date,incident_type,average_temp
385,2022,2022-09-01,Hurricane,52.551562
386,2022,2022-09-01,Hurricane,52.551562
387,2022,2022-09-01,Hurricane,52.551562
388,2022,2022-09-01,Hurricane,52.551562
389,2022,2022-10-01,Hurricane,52.551562


In [33]:
### FINAL ###

# Data source
source = fema_temp_years_df

domain = ['1979-01-01', '2022-11-01']

# Base encoding the X-axis
base = alt.Chart(source).encode(
    x = alt.X('date:T', title='Year', scale=alt.Scale(domain=domain))
).properties(
    title='Average Temp and Hurricanes',
    width=1500,
    height=600
)

# Line chart for average temperatures
temperatures = base.mark_line(color="lightgray", strokeWidth=1).encode(
    y = alt.Y('average(average_temp):Q', title='Average Temperature (deg. F)')
)

# Regression line for average temperatures
temp_regression = temperatures + temperatures.transform_regression('date','average_temp').mark_line(color='red').encode(
    y = alt.Y('average(average_temp):Q', axis=None)
)

# Stacked bar chart for top 3 incidents
disasters = base.mark_bar().encode(
    y = alt.Y('count(incident_type):Q', title='Number of Hurricanes'),
    color = alt.Color('incident_type:N', title='Incident', scale=alt.Scale(scheme='category10'))
)

# Graph containing actual data only (using independent scales)
temp_disaster = (temperatures + disasters).resolve_scale(y='independent')
chart = temp_disaster + temp_regression
chart