# 03_early_warning_demo

In [None]:
## Early Warning Demonstration (Prototype)

This notebook demonstrates a simple early warning concept using
lagged climate indicators to estimate elevated Lassa fever risk.

This is a proof-of-concept, not a deployed forecasting system.

In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("../data/processed/model/lassa_era5_weekly_lagged_rolling_2018_2021.csv")
print(df.shape)
print([c for c in df.columns if "roll" in c.lower()][:50])


from sklearn.linear_model import LogisticRegression

(7733, 25)
['rain_mm_roll4', 'rain_mm_roll8', 'rain_mm_roll12']


In [36]:
# Desired features (we will keep only those that exist)
desired = [
    "rain_mm_lag4",
    "rain_mm_lag6",
    "temp_c_lag4",
    "rain_mm_roll4",
    "temp_c_roll4",
]

features = [c for c in desired if c in df.columns]
missing  = [c for c in desired if c not in df.columns]

print("Using features:", features)
print("Missing features:", missing)



Using features: ['rain_mm_lag4', 'rain_mm_lag6', 'temp_c_lag4', 'rain_mm_roll4']
Missing features: ['temp_c_roll4']


In [33]:
import numpy as np

# sort so rolling is correct within each state
df = df.sort_values(["state", "year", "week"]).reset_index(drop=True)

# create temp rolling features to match rain rolling
df["temp_c_roll4"] = (
    df.groupby("state")["temp_c"]
      .rolling(window=4, min_periods=4)
      .mean()
      .reset_index(level=0, drop=True)
)

# optional: also create roll8/roll12 for symmetry
df["temp_c_roll8"] = (
    df.groupby("state")["temp_c"]
      .rolling(window=8, min_periods=8)
      .mean()
      .reset_index(level=0, drop=True)
)

df["temp_c_roll12"] = (
    df.groupby("state")["temp_c"]
      .rolling(window=12, min_periods=12)
      .mean()
      .reset_index(level=0, drop=True)
)

print([c for c in df.columns if "temp_c_roll" in c])


['temp_c_roll4', 'temp_c_roll8', 'temp_c_roll12']


In [37]:
features = ["rain_mm_lag4", "rain_mm_lag6", "temp_c_lag4", "rain_mm_roll4", "temp_c_roll4"]

# drop rows where rolling creates NaNs (first 3 weeks per state)
df_model = df.dropna(subset=features).copy()

X = df_model[features]
y = df_model["outbreak_week"]


KeyError: ['temp_c_roll4']

In [31]:
print([c for c in df.columns if "roll" in c.lower()])


['rain_mm_roll4', 'rain_mm_roll8', 'rain_mm_roll12']
