In [2]:
import pandas as pd
import geopandas as gpd
import xarray as xr
import rioxarray
from shapely.geometry import mapping
import gc

## Load and prepare data

In [3]:
# import ESSD Copernicus SPEI data
spei_file = 'Data/spei48.nc'
spei_data = xr.open_dataset(spei_file)

# convert 'time' to datetime
spei_data['time'] = pd.to_datetime(spei_data['time'].values)

# filter SPEI data to get data from 1999-2024
spei_data_filtered = spei_data.sel(time=slice('1999-01-01', '2023-12-31'))

# set spatial dimensions
spei_data_filtered.rio.set_spatial_dims(x_dim = 'lon', y_dim = 'lat', inplace = True)

# define CRS 
spei_data_filtered.rio.write_crs('EPSG:4326', inplace = True)

## Compile monthly SPEI values

In [4]:
# load districts shapefile
district_level = 'Data/district.shp'

# read shapefile into geodataframe
gdf = gpd.read_file(district_level)

# create variable identifying district names
districts = gdf['NAME_2'].unique()

# shift timestamps to end of month
spei_data_filtered['time'] = pd.to_datetime(spei_data_filtered.time.values) + pd.offsets.MonthEnd(0)

data_dict = {}

# loop to plot time series for each district
for i, district in enumerate(districts, 1):
    try:
        district_geom = gdf[gdf['NAME_2'] == district].geometry.iloc[0]
        clipped = spei_data_filtered.rio.clip([mapping(district_geom)], crs=gdf.crs)
        area_avg = clipped['spei'].mean(dim=['lat', 'lon'], skipna=True).load()
        data_dict[district] = area_avg

        del clipped, area_avg
        gc.collect()

        print(f'[{i}/{len(districts)}] Processing {district}')

    except Exception as e:
         print(f'Error in district {district}: {e}')

# combine into new xarray dataset
all_means = xr.Dataset(data_dict)

# compute
all_means_computed = all_means.compute()

# create wide dataframe
df_wide = all_means_computed.to_dataframe()

df_wide = df_wide.reset_index()

df_wide = df_wide.rename(columns={'time': 'date'})

# melt to long format
df_long = df_wide.melt(id_vars='date', var_name='district', value_name='spei')

df_long = df_long.sort_values(by=['district', 'date'])

# export to CSV
df_long.to_csv('district_spei_average_long.csv', index=False)

[1/115] Processing Chibombo
[2/115] Processing Chisamba
[3/115] Processing Chitambo
[4/115] Processing Itezhi-tezhi
[5/115] Processing Kabwe
[6/115] Processing Kapiri Mposhi
[7/115] Processing Luano
[8/115] Processing Mkushi
[9/115] Processing Mumbwa
[10/115] Processing Ngabwe
[11/115] Processing Serenje
[12/115] Processing Chililabombwe
[13/115] Processing Chingola
[14/115] Processing Kalulushi
[15/115] Processing Kitwe
[16/115] Processing Luanshya
[17/115] Processing Lufwanyama
[18/115] Processing Masaiti
[19/115] Processing Mpongwe
[20/115] Processing Mufulira
[21/115] Processing Ndola
[22/115] Processing Chadiza
[23/115] Processing Chasefu
[24/115] Processing Chipangali
[25/115] Processing Chipata
[26/115] Processing Kasenengwa
[27/115] Processing Katete
[28/115] Processing Lumezi
[29/115] Processing Lundazi
[30/115] Processing Mambwe
[31/115] Processing Nyimba
[32/115] Processing Petauke
[33/115] Processing Sinda
[34/115] Processing Vubwi
[35/115] Processing Chembe
[36/115] Proces