In [1]:
import geopandas as gpd
import pandas as pd

# Load dengue data with temporal lags
gdf = gpd.read_file("../data/processed/india_dengue_state_year.geojson")

gdf["cases"] = pd.to_numeric(gdf["cases"], errors="coerce")
gdf["year"] = pd.to_numeric(gdf["year"], errors="coerce")

# Sort properly
gdf = gdf.sort_values(by=["state", "year"]).reset_index(drop=True)

# Recreate temporal lag (if not saved earlier)
gdf["cases_lag1"] = gdf.groupby("state")["cases"].shift(1)

# Load adjacency list
adj_df = pd.read_csv("../data/processed/state_adjacency.csv")

gdf.head(), adj_df.head()


(     GID_1 GID_0 COUNTRY state_std VARNAME_1 NL_NAME_1 TYPE_1 ENGTYPE_1 CC_1  \
 0  IND.4_1   IND   India     Assam        NA        NA  State     State   NA   
 1  IND.4_1   IND   India     Assam        NA        NA  State     State   NA   
 2  IND.4_1   IND   India     Assam        NA        NA  State     State   NA   
 3  IND.4_1   IND   India     Assam        NA        NA  State     State   NA   
 4  IND.4_1   IND   India     Assam        NA        NA  State     State   NA   
 
   HASC_1  ISO_1  state    year   cases deaths  \
 0  IN.AS  IN-AS  Assam  2019.0   196.0      0   
 1  IN.AS  IN-AS  Assam  2020.0    33.0      0   
 2  IN.AS  IN-AS  Assam  2021.0   103.0      0   
 3  IN.AS  IN-AS  Assam  2022.0  1826.0      2   
 4  IN.AS  IN-AS  Assam  2023.0  8208.0      7   
 
                                             geometry  cases_lag1  
 0  MULTIPOLYGON (((92.7545 24.5011, 92.7436 24.47...         NaN  
 1  MULTIPOLYGON (((92.7545 24.5011, 92.7436 24.47...       196.0  
 2  MU

In [2]:
lag_lookup = gdf[["state", "year", "cases_lag1"]].copy()



In [3]:
def compute_spatial_lag(row, lag_lookup, adj_df):
    state = row["state"]
    year = row["year"]
    
    # Spatial lag uses previous year
    target_year = year - 1
    
    # Find neighbors
    neighbors = adj_df[adj_df["state"] == state]["neighbor"].tolist()
    
    if len(neighbors) == 0:
        return None
    
    # Get neighbors' lagged cases
    neighbor_values = lag_lookup[
        (lag_lookup["state"].isin(neighbors)) &
        (lag_lookup["year"] == target_year)
    ]["cases_lag1"]
    
    if neighbor_values.empty:
        return None
    
    return neighbor_values.mean()


In [4]:
gdf["spatial_lag_cases"] = gdf.apply(
    compute_spatial_lag,
    axis=1,
    lag_lookup=lag_lookup,
    adj_df=adj_df
)

gdf[["state", "year", "cases", "cases_lag1", "spatial_lag_cases"]].head(10)


Unnamed: 0,state,year,cases,cases_lag1,spatial_lag_cases
0,Assam,2019.0,196.0,,
1,Assam,2020.0,33.0,196.0,
2,Assam,2021.0,103.0,33.0,200.5
3,Assam,2022.0,1826.0,103.0,52.0
4,Assam,2023.0,8208.0,1826.0,143.0
5,Assam,2024.0,270.0,8208.0,1185.5
6,Bihar,2019.0,6712.0,,
7,Bihar,2020.0,493.0,6712.0,
8,Bihar,2021.0,633.0,493.0,
9,Bihar,2022.0,13972.0,633.0,


In [5]:
state_check = "Bihar"
gdf[gdf["state"] == state_check][
    ["year", "cases", "cases_lag1", "spatial_lag_cases"]
]


Unnamed: 0,year,cases,cases_lag1,spatial_lag_cases
6,2019.0,6712.0,,
7,2020.0,493.0,6712.0,
8,2021.0,633.0,493.0,
9,2022.0,13972.0,633.0,
10,2023.0,20224.0,13972.0,
11,2024.0,7.0,20224.0,


In [6]:
gdf["spatial_lag_cases"].isna().sum()


np.int64(107)