In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn as sns
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

## Load and prepare data

In [2]:
grow_df = pd.read_csv('Data/full_grow_data.csv')
spei_df = pd.read_csv('Data/district_spei_average_long.csv')
gdf = gpd.read_file('Data/district.shp')

grow_df['mean_start_date'] = grow_df.apply(
    lambda row: datetime(int(row['year']), 1, 1) + timedelta(days=row['mean_start_day'] - 1),
    axis=1
)

grow_df['mean_end_date'] = grow_df['mean_start_date'] + pd.to_timedelta(grow_df['mean_growing_period'], unit='D')

spei_df['date'] = pd.to_datetime(spei_df['date'])

ERROR 1: PROJ: proj_create_from_database: Open of /mnt/shared/moduleapps/EarthSci/Python-3.9.15-Mamba/share/proj failed


## Calculate seasonal SPEI for each growing season

In [3]:
# create empty list to store results
seasonal_spei = []

for _, row in grow_df.iterrows():
    district = row['district']
    start = row['mean_start_date']
    end = row['mean_end_date']
    season_year = row['year']

    # list the 15th of month dates
    fifteenth_dates = []
    current = start.replace(day=15)
    if current < start:
        current += relativedelta(months = 1)

    while current <= end: 
        fifteenth_dates.append(current)
        current += relativedelta(months=1)

    # convert 15th dates to corresponding month-year strings
    valid_months = set((d.year, d.month) for d in fifteenth_dates)
    
    # filter SPEI for this district and growing season
    matched_spei = spei_df[
        (spei_df['district'] == district) &
        (spei_df['date'].apply(lambda x: (x.year, x.month)).isin(valid_months))
    ]
    # if matches found, compute mean SPEI
    if not matched_spei.empty:
        mean_spei = matched_spei['spei'].mean()
        median_spei = matched_spei['spei'].median()
        seasonal_spei.append({
            'district': district,
            'season_year': season_year,
            'mean_growing_spei': mean_spei,
            'median_growing_spei': median_spei,
            'n_months': matched_spei.shape[0]
        })

seasonal_df = pd.DataFrame(seasonal_spei)

## Bin into provinces

In [4]:
# create district-province lookup
district_to_province = gdf[['NAME_2', 'NAME_1']].drop_duplicates()
district_to_province.columns = ['district', 'province']

# merge province info
seasonal_df = seasonal_df.merge(district_to_province, on='district', how='left')

# merge with growing season info to get start/end days
grow_df = grow_df.merge(district_to_province, on='district', how='left')
province_avg_start = grow_df.groupby('province')['mean_start_day'].mean().reset_index()
province_avg_start['province_avg_start_date'] = province_avg_start['mean_start_day'].apply(
    lambda d: (datetime(2001, 1, 1) + timedelta(days=d - 1)).strftime('%B %d')
)

province_avg_end = grow_df.groupby('province')['mean_end_day'].mean().reset_index()
province_avg_end['province_avg_end_date'] = province_avg_end['mean_end_day'].apply(
    lambda d: (datetime(2001, 1, 1) + timedelta(days=d - 1)).strftime('%B %d')
)

# merge average start/end dates
final_df = seasonal_df.merge(province_avg_start[['province', 'province_avg_start_date']], on='province', how='left')
final_df = final_df.merge(province_avg_end[['province', 'province_avg_end_date']], on='province', how='left')

# export table
final_df.to_csv('district_province_spei_growingperiod.csv', index=False)