# Seasonality Modeling of H5N1 Cases

In [1]:
import pandas as pd
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt

In [3]:
case_data = pd.read_excel("h5n1_case_data.xlsx", parse_dates=["Collection Date"], sheet_name="Combined Timeseries")

case_data

Unnamed: 0,Collection Date,Mammalian (Global),Mammalian (USA only),Avian (USA only),Avian (North America wo USA),Total
0,2005-01-01,0,0,0,0,0
1,2005-02-01,0,0,0,0,0
2,2005-03-01,0,0,0,0,0
3,2005-04-01,0,0,0,0,0
4,2005-05-01,0,0,0,0,0
...,...,...,...,...,...,...
247,2025-08-01,0,0,0,0,0
248,2025-09-01,0,25,0,0,25
249,2025-10-01,0,0,0,0,0
250,2025-11-01,0,8,0,0,8


In [8]:
mammal_global_cases = case_data[["Collection Date", "Mammalian (Global)"]].set_index("Collection Date")
mammal_usa_cases = case_data[["Collection Date", "Mammalian (USA only)"]].set_index("Collection Date")
avian_usa_cases = case_data[["Collection Date", "Avian (USA only)"]].set_index("Collection Date")
avian_na_cases = case_data[["Collection Date", "Avian (North America wo USA)"]].set_index("Collection Date")
total_cases = case_data[["Collection Date", "Total"]].set_index("Collection Date")


## Time Series Decomposition

Time series decomposition divides the data into its trend, seasonal, and residual components.

In [10]:
## Perform time series decomposition

mammal_global_decomp = seasonal_decompose(mammal_global_cases, model='stl')
# mammal_global_decomp.plot()

mammal_usa_decomp = seasonal_decompose(mammal_usa_cases, model='stl')
# mammal_usa_decomp.plot()

avian_usa_decomp = seasonal_decompose(avian_usa_cases, model='stl')
# avian_usa_decomp.plot()

avian_na_decomp = seasonal_decompose(avian_na_cases, model='stl')
# avian_na_decomp.plot()

total_decomp = seasonal_decompose(total_cases, model='stl')
# total_decomp.plot()

# plt.show()

In [None]:
total_decomp.

In [23]:
case_decomp_df = pd.DataFrame({
    "resid_mammal_global": mammal_global_decomp.resid,
    "seasonal_mammal_global": mammal_global_decomp.seasonal,
    "trend_mammal_global": mammal_global_decomp.trend,
    "weights_mammal_global": mammal_global_decomp.weights,
    "resid_mammal_usa": mammal_usa_decomp.resid,
    "seasonal_mammal_usa": mammal_usa_decomp.seasonal,
    "trend_mammal_usa": mammal_usa_decomp.trend,
    "weights_mammal_usa": mammal_usa_decomp.weights,
    "resid_avian_usa": avian_usa_decomp.resid,
    "seasonal_avian_usa": avian_usa_decomp.seasonal,
    "trend_avian_usa": avian_usa_decomp.trend,
    "weights_avian_usa": avian_usa_decomp.weights,
    "resid_avian_na": avian_na_decomp.resid,
    "seasonal_avian_na": avian_na_decomp.seasonal,
    "trend_avian_na": avian_na_decomp.trend,
    "weights_avian_na": avian_na_decomp.weights,
    "resid_total": total_decomp.resid,
    "seasonal_total": total_decomp.seasonal,
    "trend_total": total_decomp.trend,
    "weights_total": total_decomp.weights,
})

## Rename 'Collection Date' to 'collection_date' for consistency
case_decomp_df.index.name = 'collection_date'
## Turn index into a column
case_decomp_df.reset_index(inplace=True)

case_decomp_df.head()

Unnamed: 0,collection_date,resid_mammal_global,seasonal_mammal_global,trend_mammal_global,weights_mammal_global,resid_mammal_usa,seasonal_mammal_usa,trend_mammal_usa,weights_mammal_usa,resid_avian_usa,...,trend_avian_usa,weights_avian_usa,resid_avian_na,seasonal_avian_na,trend_avian_na,weights_avian_na,resid_total,seasonal_total,trend_total,weights_total
0,2005-01-01,,0.351736,,1.0,,-8.928993,,1.0,,...,,1.0,,-6.372049,,1.0,,-24.417014,,1.0
1,2005-02-01,,-0.098264,,1.0,,-8.128993,,1.0,,...,,1.0,,-4.822049,,1.0,,-3.617014,,1.0
2,2005-03-01,,0.251736,,1.0,,1.918924,,1.0,,...,,1.0,,-4.822049,,1.0,,-1.169097,,1.0
3,2005-04-01,,-0.648264,,1.0,,4.61684,,1.0,,...,,1.0,,10.977951,,1.0,,3.178819,,1.0
4,2005-05-01,,-1.298264,,1.0,,0.850174,,1.0,,...,,1.0,,12.277951,,1.0,,5.262153,,1.0


In [28]:
case_decomp_df_long = pd.wide_to_long(case_decomp_df, stubnames=['resid', 'seasonal', 'trend', 'weights'], i=['collection_date'], j='set', sep='_', suffix=r'\w+')

case_decomp_df_long.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,resid,seasonal,trend,weights
collection_date,set,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-01,mammal_global,,0.351736,,1.0
2005-02-01,mammal_global,,-0.098264,,1.0
2005-03-01,mammal_global,,0.251736,,1.0
2005-04-01,mammal_global,,-0.648264,,1.0
2005-05-01,mammal_global,,-1.298264,,1.0


In [29]:
case_decomp_df_long.to_csv("case_decomp_df_long.csv")