## Import Libraries


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colorbar import ColorbarBase
from matplotlib.colors import Normalize
import seaborn as sns
import os
from glob import glob
from tqdm.auto import tqdm
import geopandas as gpd
from scipy.stats import linregress

plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = 'Times New Roman'

import warnings
warnings.filterwarnings('ignore')

# Path of the directories
WORK_DIR = '/beegfs/halder/GITHUB/RESEARCH/landscape-yield-analysis/'
os.chdir(WORK_DIR)
MAIN_DATA_DIR = '/beegfs/halder/DATA/'
WORK_DATA_DIR = os.path.join(WORK_DIR, 'data')

## Read the Data

In [None]:
# Read the NUTS1 and NUTS3 shapefile for DE
de_nuts1_gdf = gpd.read_file(os.path.join(MAIN_DATA_DIR, 'DE_NUTS', 'DE_NUTS_3.shp'))
de_nuts1_gdf = de_nuts1_gdf[de_nuts1_gdf['LEVL_CODE']==1] # filter only NUT1 level code

de_nuts3_gdf = gpd.read_file(os.path.join(MAIN_DATA_DIR, 'DE_NUTS', 'DE_NUTS_3.shp'))
de_nuts3_gdf = de_nuts3_gdf[de_nuts3_gdf['LEVL_CODE']==3] # filter only NUT3 level code

fig, ax = plt.subplots(figsize=(8, 8))
de_nuts3_gdf.plot(ax=ax, column='NUTS_NAME', cmap='Set3', edgecolor='grey', linewidth=0.5, label='NUTS3')
de_nuts1_gdf.plot(ax=ax, facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')
plt.show()

print(de_nuts1_gdf.shape, de_nuts3_gdf.shape)
de_nuts3_gdf.head()

In [None]:
# Read the yield data
yield_df = pd.read_csv(os.path.join(MAIN_DATA_DIR, 'DE_Crop_Yield', 'DE_Crop_Yield_Master.csv'))
yield_df.dropna(subset=['value'], inplace=True) # dropping NaN values
yield_df = yield_df[['nuts_id', 'year', 'var', 'value']]
yield_df.rename(columns={
    'nuts_id': 'NUTS_ID',
    'var': 'crop',
    'value': 'yield'
}, inplace=True) # renaming the columns
print('Name of the crops:', yield_df['crop'].unique())
print(yield_df.shape)
yield_df.head()

## Analyze the Yield Data

In [None]:
crop = 'ww'
yield_df_filtered = yield_df[yield_df['crop']==crop].reset_index(drop=True)
nuts3_info = de_nuts3_gdf.copy()

OUT_DIR = os.path.join(WORK_DIR, 'output', 'figures', 'yield_eda', crop)
if os.path.exists(OUT_DIR) != True:
    os.makedirs(OUT_DIR)
    print('Output folder successfully created!')

print(nuts3_info.shape, yield_df_filtered.shape)

### Count of Yield Observation per NUTS

In [None]:
# Calculate the count of yield observation per nuts
yield_obs_count = yield_df_filtered['NUTS_ID'].value_counts().to_frame().reset_index()
yield_obs_count.rename(columns={'count': f'yield_count'}, inplace=True)
nuts3_info = pd.merge(left=nuts3_info, right=yield_obs_count, on=['NUTS_ID'], how='left')

fig, ax = plt.subplots(figsize=(8, 8))
nuts3_info.plot(ax=ax, column='yield_count', cmap='RdYlGn', edgecolor='grey', linewidth=0.5, label='Yield Count', legend=True)
de_nuts1_gdf.plot(ax=ax, facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')
plt.title('Number of Yield Observation during 1979 to 2023')
plt.savefig(os.path.join(OUT_DIR, 'count_of_yield_obs.png'), format='png', bbox_inches='tight')
plt.show()

### Yield Anomaly and Extremes

#### Yield Anomaly Calculation on Observed and Detrended Yield (Loess Method)

In [None]:
from statsmodels.nonparametric.smoothers_lowess import lowess

def detrend_loess(group, frac=0.7):
    """
    Detrend yield data per NUTS_ID using LOESS smoothing.

    Parameters:
    - group: DataFrame grouped by NUTS_ID
    - frac: Smoothing parameter (typically between 0.2 and 0.5)

    Returns:
    - group with added 'trend_loess' and 'detrended_yield_loess'
    """
    # Make sure data is sorted by year
    group = group.sort_values('year')

    # Apply LOWESS smoothing
    # lowess returns an array of shape (n, 2): columns are x (year), y (smoothed)
    loess_result = lowess(endog=group['yield'], exog=group['year'], frac=frac, return_sorted=False)

    # Add the smoothed trend to the dataframe
    group['trend_loess'] = loess_result

    # Subtract trend to get detrended yield
    group['detrended_yield'] = group['yield'] - group['trend_loess']

    return group

yield_detrended = yield_df_filtered.groupby(by='NUTS_ID').apply(detrend_loess).reset_index(drop=True)

# Calculate long-term mean and std yield per NUTS
yield_anomaly = yield_detrended.copy()

yield_anomaly['mean_yield'] = yield_anomaly.groupby(by=['NUTS_ID'])['yield'].transform('mean')
yield_anomaly['std_yield'] = yield_anomaly.groupby(by=['NUTS_ID'])['yield'].transform('std')

# Calculate mean and std of the detrended yield per NUTS
yield_anomaly['mean_detrended'] = yield_anomaly.groupby('NUTS_ID')['detrended_yield'].transform('mean')
yield_anomaly['std_detrended'] = yield_anomaly.groupby('NUTS_ID')['detrended_yield'].transform('std')

# Absolute anomaly of actual yield and detrended yield
yield_anomaly['anomaly'] = yield_anomaly['yield'] - yield_anomaly['mean_yield']
yield_anomaly['anomaly_detrended'] = yield_anomaly['detrended_yield'] - yield_anomaly['mean_detrended']

# Standardized anomaly (Z-score) from actual yield and detrended yield
yield_anomaly['z_anomaly'] = yield_anomaly['anomaly'] / yield_anomaly['std_yield']
yield_anomaly['z_anomaly_detrended'] = yield_anomaly['anomaly_detrended'] / yield_anomaly['std_detrended']

# # Identify extreme events: |Z| > 2
yield_extremes =  yield_anomaly.loc[yield_anomaly['z_anomaly'].abs() > 2]
yield_extremes_detrended = yield_anomaly.loc[yield_anomaly['z_anomaly_detrended'].abs() > 2]

In [None]:
# Plot yield trend vs actual yield
plt.figure(figsize=(14, 4))
sns.lineplot(data=yield_detrended, x='year', y='trend_loess', estimator=None, units='NUTS_ID',
    color='orange', legend=False, alpha=0.05)

sns.lineplot(data=yield_detrended, x='year', y='yield', estimator=None, units='NUTS_ID',
    color='grey', legend=False, alpha=0.05)

plt.title('Yield Trend vs Yield Observed')
plt.savefig(os.path.join(OUT_DIR, 'yield_trend_vs_yield_observed.png'), format='png', bbox_inches='tight')
plt.show()

#### Plot Mean and Std of Observed and Detrended Yield

In [None]:
# Plot the mean and standard deviation map
nuts3_info = pd.merge(
    left=nuts3_info, 
    right=yield_anomaly[['NUTS_ID', 'mean_yield', 'std_yield', 'mean_detrended', 'std_detrended']].drop_duplicates(), 
    on=['NUTS_ID'], 
    how='left'
)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 7))
axes = axes.flatten()

nuts3_info.plot(ax=axes[0], column='mean_yield', cmap='RdYlGn', edgecolor='grey', linewidth=0.5, label='Mean Yield', legend=True,
                legend_kwds={"shrink": 0.7, 'label': 'Observed Yield (μ) (t/ha)'})
de_nuts1_gdf.plot(ax=axes[0], facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')

nuts3_info.plot(ax=axes[1], column='std_yield', cmap='YlOrRd', edgecolor='grey', linewidth=0.5, label='Std Yield', legend=True,
                legend_kwds={"shrink": 0.7, 'label': 'Observed Yield (σ) (t/ha)'})
de_nuts1_gdf.plot(ax=axes[1], facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')

axes[0].set_title('Observed Yield (μ) (t/ha)')
axes[1].set_title('Observed Yield (σ) (t/ha)')

plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, 'yield_mean_and_std.png'), format='png', bbox_inches='tight')
plt.show()

#### Plot Yearly Anomaly Map of Observed and Detrended Yield

In [None]:
years = sorted(yield_anomaly['year'].unique())

ncols = 5
nrows = (len(years) + ncols - 1) // ncols  # ceiling division

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(4*ncols, 4*nrows))
axes = axes.flatten()

var = 'z_anomaly_detrended'
vmin, vmax = -2, 2
cmap = 'RdYlGn'

for i, year in tqdm(enumerate(years)):
    anomaly_df = yield_anomaly[yield_anomaly['year'] == year][['NUTS_ID', 'year', var]]
    anomaly_gdf = pd.merge(
        left=nuts3_info[['NUTS_ID', 'geometry']],
        right=anomaly_df,
        on='NUTS_ID',
        how='left'
    )

    # plot
    anomaly_gdf.plot(
        ax=axes[i],
        column=var,
        cmap=cmap,
        vmin=vmin,
        vmax=vmax,
        edgecolor='grey',
        linewidth=0.2
    )
    axes[i].set_title(f'Year: {year}', fontsize=10)
    axes[i].axis('off')

# remove extra empty axes if any
for j in range(i+1, len(axes)):
    fig.delaxes(axes[j])

# add a single colorbar
cax = fig.add_axes([0.15, 0.025, 0.5, 0.01])  # [left, bottom, width, height]
norm = Normalize(vmin=vmin, vmax=vmax)
cb = ColorbarBase(cax, cmap=plt.get_cmap(cmap), norm=norm, orientation='horizontal', extend='both')
cb.set_label(var)

fig.subplots_adjust(left=0.02, right=0.80, top=0.95, bottom=0.05, wspace=0.05, hspace=0.05)
plt.savefig(os.path.join(OUT_DIR, f'yearly_{var}.png'), format='png', bbox_inches='tight')
plt.show()

#### Plot Long-term Timeseries of Observed and Detrended Yield

In [None]:
# Define threshold
threshold = 2

# Identify extremes
yield_anomaly['pos_extreme'] = yield_anomaly['z_anomaly'].where(yield_anomaly['z_anomaly'] > threshold)
yield_anomaly['neg_extreme'] = yield_anomaly['z_anomaly'].where(yield_anomaly['z_anomaly'] < -threshold)

yield_anomaly['pos_extreme_detrended'] = yield_anomaly['z_anomaly_detrended'].where(yield_anomaly['z_anomaly_detrended'] > threshold)
yield_anomaly['neg_extreme_detrended'] = yield_anomaly['z_anomaly_detrended'].where(yield_anomaly['z_anomaly_detrended'] < -threshold)

In [None]:
df_grouped = yield_anomaly.groupby('year').mean(numeric_only=True).reset_index()
df_grouped = df_grouped.sort_values('year')

df_perc = yield_anomaly.groupby('year').agg(
    perc_pos_extreme=('pos_extreme', lambda x: x.notna().sum()),
    perc_neg_extreme=('neg_extreme', lambda x: x.notna().sum()),
    perc_pos_extreme_detrended=('pos_extreme_detrended', lambda x: x.notna().sum()),
    perc_neg_extreme_detrended=('neg_extreme_detrended', lambda x: x.notna().sum()),
).reset_index()
df_perc.iloc[:, 1:] = ((df_perc.iloc[:, 1:] / len(nuts3_info)) * 100).astype('int')

df_grouped = df_grouped.merge(df_perc, on='year')

years = np.arange(df_grouped['year'].min(), df_grouped['year'].max() + 1)

fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(14, 8), sharex=True)
axes = axes.flatten()

### TOP PLOT ###
# plot z_anomaly line on left axis
sns.lineplot(
    data=yield_anomaly, x='year', y='z_anomaly', estimator=None, units='NUTS_ID',
    color='grey', legend=False, alpha=0.05, ax=axes[0]
)

# add secondary y-axis for % extremes
ax2_0 = axes[0].twinx()

# plot percentage of extremes as bars on right axis
ax2_0.bar(
    df_grouped['year'], df_grouped['perc_pos_extreme'], 
    color='none', edgecolor='red', alpha=1, width=0.8, label='Positive extremes (%)'
)
ax2_0.bar(
    df_grouped['year'], -df_grouped['perc_neg_extreme'],  # negative so bars go downward
   color='none', edgecolor='blue', alpha=1, width=0.8, label='Negative extremes (%)'
)

### TOP PLOT annotations ###
for i, row in df_grouped.iterrows():
    x = row['year']
    # positive bar
    if row['perc_pos_extreme'] > 0:
        ax2_0.text(
            x, row['perc_pos_extreme'] + 1,  # small offset above the bar
            str(int(round(row['perc_pos_extreme']))),
            ha='center', va='bottom', fontsize=9, color='red'
        )
    # negative bar
    if row['perc_neg_extreme'] > 0:
        ax2_0.text(
            x, -row['perc_neg_extreme'] - 1,  # small offset below the bar
            str(int(round(row['perc_neg_extreme']))),
            ha='center', va='top', fontsize=9, color='blue'
        )

# add horizontal line at 0
axes[0].axhline(0, color='black', linewidth=0.8, linestyle='--')
axes[0].set_title('Yield z_anomaly with % NUTS under extremes')
axes[0].grid(True, linestyle='--', alpha=0.5)

ax2_0.set_ylabel('% NUTS under extremes')
ax2_0.legend(loc='upper right')

axes[0].set_ylim(-4, 4)           # left y-axis: z_anomaly
ax2_0.set_ylim(-40, 40)  

### BOTTOM PLOT ###
# plot z_anomaly_detrended line on left axis
sns.lineplot(
    data=yield_anomaly, x='year', y='z_anomaly_detrended', estimator=None, units='NUTS_ID',
    color='grey', legend=False, alpha=0.05, ax=axes[1]
)

# add secondary y-axis
ax2_1 = axes[1].twinx()

# plot percentage of extremes as bars
ax2_1.bar(
    df_grouped['year'], df_grouped['perc_pos_extreme_detrended'], 
    color='none', edgecolor='red', alpha=1, width=0.8, label='Positive extremes (%)'
)
ax2_1.bar(
    df_grouped['year'], -df_grouped['perc_neg_extreme_detrended'], 
    color='none', edgecolor='blue', alpha=1, width=0.8, label='Negative extremes (%)'
)

### BOTTOM PLOT annotations ###
for i, row in df_grouped.iterrows():
    x = row['year']
    if row['perc_pos_extreme_detrended'] > 0:
        ax2_1.text(
            x, row['perc_pos_extreme_detrended'] + 1,
            str(int(round(row['perc_pos_extreme_detrended']))),
            ha='center', va='bottom', fontsize=9, color='red'
        )
    if row['perc_neg_extreme_detrended'] > 0:
        ax2_1.text(
            x, -row['perc_neg_extreme_detrended'] - 1,
            str(int(round(row['perc_neg_extreme_detrended']))),
            ha='center', va='top', fontsize=9, color='blue'
        )

axes[1].axhline(0, color='black', linewidth=0.8, linestyle='--')
axes[1].set_title('Yield z_anomaly_detrended with % NUTS under extremes')
axes[1].grid(True, linestyle='--', alpha=0.5)

ax2_1.set_ylabel('% NUTS under extremes')
ax2_1.legend(loc='upper right')

# shared x-axis
axes[1].set_xticks(years)
axes[1].set_xticklabels(years, rotation=90)

axes[1].set_ylim(-4, 4)
ax2_1.set_ylim(-40, 40)

plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, f'yield_timeseries_with_pos_neg_extremes.png'), format='png', bbox_inches='tight')
plt.show()

### Yield Trends per NUTS

Predict trend_loess and detrended_yield both

In [None]:
# Get list of unique districts
nuts = yield_detrended['NUTS_ID'].unique()

# Create a dataframe to store slopes & p-values
trend_results = []

# Loop through each district
for i in nuts:
    df_nuts = yield_detrended[yield_detrended['NUTS_ID'] == i].sort_values('year')
    
    # Linear regression
    slope, intercept, r_value, p_value, std_err = linregress(df_nuts['year'], df_nuts['yield'])
    
    trend_results.append({
        'NUTS_ID': i,
        'slope': slope,
        'r_squared': r_value**2,
        'p_value': p_value
    })

# Convert results to dataframe
trend_df = pd.DataFrame(trend_results)

# Show the trend table
print(trend_df.shape)
trend_df.head()

In [None]:
# Plot the mean and standard deviation map
nuts3_info = pd.merge(
    left=nuts3_info, 
    right=trend_df, 
    on=['NUTS_ID'], 
    how='left'
)

slope_bins = [-float('inf'), -0.05, 0, 0.05, float('inf')]
slope_labels = ['Strong Decrease', 'Slight Decrease', 'Slight Increase', 'Strong Increase']

nuts3_info['slope_category'] = pd.cut(
    nuts3_info['slope'],
    bins=slope_bins,
    labels=slope_labels
)

# P-value bins & categories
pvalue_bins = [-float('inf'), 0.01, 0.05, 0.1, float('inf')]
pvalue_labels = ['Highly significant', 'Significant', 'Marginally significant', 'Not significant']

nuts3_info['pvalue_category'] = pd.cut(
    nuts3_info['p_value'],
    bins=pvalue_bins,
    labels=pvalue_labels
)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 8))
axes = axes.flatten()

nuts3_info.plot(ax=axes[0], column='slope_category', cmap='RdYlGn', edgecolor='grey', linewidth=0.5, legend=True)
de_nuts1_gdf.plot(ax=axes[0], facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')

nuts3_info.plot(ax=axes[1], column='pvalue_category', cmap='YlOrRd', edgecolor='grey', linewidth=0.5, legend=True)
de_nuts1_gdf.plot(ax=axes[1], facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')

axes[0].set_title('Slope')
axes[1].set_title('P Value')

plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, 'yield_slope_and_trend.png'), format='png', bbox_inches='tight')
plt.show()

## Analyze the Gridded Yield Data

In [None]:
# Read the gridded yield data
gridded_yield_df = pd.read_csv(os.path.join(WORK_DATA_DIR, 'OUTPUT', 'Landscape_Data_2.5KM.csv'))
gridded_yield_df = gridded_yield_df[['id', 'NUTS_ID', 'year', 'distributed_yield']]
gridded_yield_df.rename(columns={'distributed_yield': 'yield'}, inplace=True)

# Read the grids geodataframe
grid_gdf = gpd.read_file(os.path.join(WORK_DATA_DIR, 'VECTOR', 'DE_Hexbins_2.5sqkm_EPSG_25832.shp'))
grid_gdf.to_crs(de_nuts1_gdf.crs, inplace=True)
print(gridded_yield_df.shape, grid_gdf.shape)
gridded_yield_df.head()

### Count of Yield Observation per Grid

In [None]:
# Calculate the count of yield observation per grid
yield_obs_count = gridded_yield_df['id'].value_counts().to_frame().reset_index()
yield_obs_count.rename(columns={'count': f'yield_count'}, inplace=True)
grid_info = pd.merge(left=grid_gdf, right=yield_obs_count, on=['id'], how='left')

fig, ax = plt.subplots(figsize=(8, 8))
grid_info.plot(ax=ax, column='yield_count', cmap='RdYlGn', label='Yield Count', legend=True)
de_nuts1_gdf.plot(ax=ax, facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')
plt.title('Number of Yield Observation during 2001 to 2023')
plt.savefig(os.path.join(OUT_DIR, 'count_of_yield_obs_(grid_scale).png'), format='png', bbox_inches='tight')
plt.show()

### Yield Anomaly and Extremes
#### Yield Anomaly Calculation on Observed and Detrended Yield (Loess Method)

In [None]:
yield_detrended = gridded_yield_df.groupby(by='id').apply(detrend_loess).reset_index(drop=True)

# Calculate long-term mean and std yield per NUTS
yield_anomaly = yield_detrended.copy()

yield_anomaly['mean_yield'] = yield_anomaly.groupby(by=['id'])['yield'].transform('mean')
yield_anomaly['std_yield'] = yield_anomaly.groupby(by=['id'])['yield'].transform('std')

# Calculate mean and std of the detrended yield per NUTS
yield_anomaly['mean_detrended'] = yield_anomaly.groupby('id')['detrended_yield'].transform('mean')
yield_anomaly['std_detrended'] = yield_anomaly.groupby('id')['detrended_yield'].transform('std')

# Absolute anomaly of actual yield and detrended yield
yield_anomaly['anomaly'] = yield_anomaly['yield'] - yield_anomaly['mean_yield']
yield_anomaly['anomaly_detrended'] = yield_anomaly['detrended_yield'] - yield_anomaly['mean_detrended']

# Standardized anomaly (Z-score) from actual yield and detrended yield
yield_anomaly['z_anomaly'] = yield_anomaly['anomaly'] / yield_anomaly['std_yield']
yield_anomaly['z_anomaly_detrended'] = yield_anomaly['anomaly_detrended'] / yield_anomaly['std_detrended']

# # Identify extreme events: |Z| > 2
yield_extremes =  yield_anomaly.loc[yield_anomaly['z_anomaly'].abs() > 2]
yield_extremes_detrended = yield_anomaly.loc[yield_anomaly['z_anomaly_detrended'].abs() > 2]

In [None]:
# Plot yield trend vs actual yield
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(14, 10))
axes = axes.flatten()

sns.lineplot(data=yield_detrended, x='year', y='trend_loess', estimator=None, units='id',
    color='orange', legend=False, alpha=0.005, ax=axes[0])

sns.lineplot(data=yield_detrended, x='year', y='yield', estimator=None, units='id',
    color='grey', legend=False, alpha=0.005, ax=axes[1])

axes[0].set_title('Yield Trend')
axes[1].set_title('Yield Observed')

plt.savefig(os.path.join(OUT_DIR, 'yield_trend_vs_yield_observed_(grid_scale).png'), format='png', bbox_inches='tight')
plt.tight_layout()
plt.show()

#### Plot Mean and Std of Observed and Detrended Yield

In [None]:
# Plot the mean and standard deviation map
grid_info = pd.merge(
    left=grid_info, 
    right=yield_anomaly[['id', 'mean_yield', 'std_yield', 'mean_detrended', 'std_detrended']].drop_duplicates(), 
    on=['id'], 
    how='left'
)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 7))
axes = axes.flatten()

grid_info.plot(ax=axes[0], column='mean_yield', cmap='RdYlGn', label='Mean Yield', legend=True,
                legend_kwds={"shrink": 0.7, 'label': 'Disaggregated Yield (μ) (t/ha)'})
de_nuts1_gdf.plot(ax=axes[0], facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')

grid_info.plot(ax=axes[1], column='std_yield', cmap='YlOrRd', label='Std Yield', legend=True,
                legend_kwds={"shrink": 0.7, 'label': 'Disaggregated Yield (σ) (t/ha)'})
de_nuts1_gdf.plot(ax=axes[1], facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')

axes[0].set_title('Disaggregated Yield (μ) (t/ha)')
axes[1].set_title('Disaggregated Yield (σ) (t/ha)')

plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, 'yield_mean_and_std_(grid_scale).png'), format='png', bbox_inches='tight')
plt.show()

#### Plot Yearly Anomaly Map of Observed and Detrended Yield

In [None]:
years = sorted(yield_anomaly['year'].unique())

ncols = 5
nrows = (len(years) + ncols - 1) // ncols  # ceiling division

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(4*ncols, 4*nrows))
axes = axes.flatten()

var = 'detrended_yield'
vmin, vmax = -2, 1.8
cmap = 'RdYlGn'

for i, year in tqdm(enumerate(years)):
    anomaly_df = yield_anomaly[yield_anomaly['year'] == year][['id', 'year', var]]
    anomaly_gdf = pd.merge(
        left=grid_info[['id', 'geometry']],
        right=anomaly_df,
        on='id',
        how='left'
    )

    # plot
    anomaly_gdf.plot(
        ax=axes[i],
        column=var,
        cmap=cmap,
        vmin=vmin,
        vmax=vmax
    )
    axes[i].set_title(f'Year: {year}', fontsize=10)
    axes[i].axis('off')

# remove extra empty axes if any
for j in range(i+1, len(axes)):
    fig.delaxes(axes[j])

# add a single colorbar
cax = fig.add_axes([0.15, 0.025, 0.5, 0.01])  # [left, bottom, width, height]
norm = Normalize(vmin=vmin, vmax=vmax)
cb = ColorbarBase(cax, cmap=plt.get_cmap(cmap), norm=norm, orientation='horizontal', extend='both')
cb.set_label(var)

fig.subplots_adjust(left=0.02, right=0.80, top=0.95, bottom=0.05, wspace=0.05, hspace=0.05)
plt.savefig(os.path.join(OUT_DIR, f'yearly_{var}_(grid_scale).png'), format='png', bbox_inches='tight')
plt.show()

#### Plot Long-term Timeseries of Observed and Detrended Yield

In [None]:
# Define threshold
threshold = 2

# Identify extremes
yield_anomaly['pos_extreme'] = yield_anomaly['z_anomaly'].where(yield_anomaly['z_anomaly'] > threshold)
yield_anomaly['neg_extreme'] = yield_anomaly['z_anomaly'].where(yield_anomaly['z_anomaly'] < -threshold)

yield_anomaly['pos_extreme_detrended'] = yield_anomaly['z_anomaly_detrended'].where(yield_anomaly['z_anomaly_detrended'] > threshold)
yield_anomaly['neg_extreme_detrended'] = yield_anomaly['z_anomaly_detrended'].where(yield_anomaly['z_anomaly_detrended'] < -threshold)

In [None]:
df_grouped = yield_anomaly.groupby('year').mean(numeric_only=True).reset_index()
df_grouped = df_grouped.sort_values('year')

df_perc = yield_anomaly.groupby('year').agg(
    perc_pos_extreme=('pos_extreme', lambda x: x.notna().sum()),
    perc_neg_extreme=('neg_extreme', lambda x: x.notna().sum()),
    perc_pos_extreme_detrended=('pos_extreme_detrended', lambda x: x.notna().sum()),
    perc_neg_extreme_detrended=('neg_extreme_detrended', lambda x: x.notna().sum()),
).reset_index()
df_perc.iloc[:, 1:] = ((df_perc.iloc[:, 1:] / len(grid_info)) * 100).astype('int')

df_grouped = df_grouped.merge(df_perc, on='year')

years = np.arange(df_grouped['year'].min(), df_grouped['year'].max() + 1)

fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(14, 8), sharex=True)
axes = axes.flatten()

### TOP PLOT ###
# plot z_anomaly line on left axis
sns.lineplot(
    data=yield_anomaly, x='year', y='z_anomaly', estimator=None, units='id',
    color='grey', legend=False, alpha=0.05, ax=axes[0]
)

# add secondary y-axis for % extremes
ax2_0 = axes[0].twinx()

# plot percentage of extremes as bars on right axis
ax2_0.bar(
    df_grouped['year'], df_grouped['perc_pos_extreme'], 
    color='none', edgecolor='red', alpha=1, width=0.8, label='Positive extremes (%)'
)
ax2_0.bar(
    df_grouped['year'], -df_grouped['perc_neg_extreme'],  # negative so bars go downward
   color='none', edgecolor='blue', alpha=1, width=0.8, label='Negative extremes (%)'
)

### TOP PLOT annotations ###
for i, row in df_grouped.iterrows():
    x = row['year']
    # positive bar
    if row['perc_pos_extreme'] > 0:
        ax2_0.text(
            x, row['perc_pos_extreme'] + 1,  # small offset above the bar
            str(int(round(row['perc_pos_extreme']))),
            ha='center', va='bottom', fontsize=9, color='red'
        )
    # negative bar
    if row['perc_neg_extreme'] > 0:
        ax2_0.text(
            x, -row['perc_neg_extreme'] - 1,  # small offset below the bar
            str(int(round(row['perc_neg_extreme']))),
            ha='center', va='top', fontsize=9, color='blue'
        )

# add horizontal line at 0
axes[0].axhline(0, color='black', linewidth=0.8, linestyle='--')
axes[0].set_title('Yield z_anomaly with % area under extremes')
axes[0].grid(True, linestyle='--', alpha=0.5)

ax2_0.set_ylabel('% area under extremes')
ax2_0.legend(loc='upper right')

axes[0].set_ylim(-4, 4)           # left y-axis: z_anomaly
ax2_0.set_ylim(-40, 40)  

### BOTTOM PLOT ###
# plot z_anomaly_detrended line on left axis
sns.lineplot(
    data=yield_anomaly, x='year', y='z_anomaly_detrended', estimator=None, units='id',
    color='grey', legend=False, alpha=0.05, ax=axes[1]
)

# add secondary y-axis
ax2_1 = axes[1].twinx()

# plot percentage of extremes as bars
ax2_1.bar(
    df_grouped['year'], df_grouped['perc_pos_extreme_detrended'], 
    color='none', edgecolor='red', alpha=1, width=0.8, label='Positive extremes (%)'
)
ax2_1.bar(
    df_grouped['year'], -df_grouped['perc_neg_extreme_detrended'], 
    color='none', edgecolor='blue', alpha=1, width=0.8, label='Negative extremes (%)'
)

### BOTTOM PLOT annotations ###
for i, row in df_grouped.iterrows():
    x = row['year']
    if row['perc_pos_extreme_detrended'] > 0:
        ax2_1.text(
            x, row['perc_pos_extreme_detrended'] + 1,
            str(int(round(row['perc_pos_extreme_detrended']))),
            ha='center', va='bottom', fontsize=9, color='red'
        )
    if row['perc_neg_extreme_detrended'] > 0:
        ax2_1.text(
            x, -row['perc_neg_extreme_detrended'] - 1,
            str(int(round(row['perc_neg_extreme_detrended']))),
            ha='center', va='top', fontsize=9, color='blue'
        )

axes[1].axhline(0, color='black', linewidth=0.8, linestyle='--')
axes[1].set_title('Yield z_anomaly_detrended with % area under extremes')
axes[1].grid(True, linestyle='--', alpha=0.5)

ax2_1.set_ylabel('% area under extremes')
ax2_1.legend(loc='upper right')

# shared x-axis
axes[1].set_xticks(years)
axes[1].set_xticklabels(years, rotation=90)

axes[1].set_ylim(-4, 4)
ax2_1.set_ylim(-40, 40)

plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, f'yield_timeseries_with_pos_neg_extremes_(grid_scale).png'), format='png', bbox_inches='tight')
plt.show()

### Yield Trends per Grid

In [None]:
# Get list of unique districts
nuts = yield_detrended['id'].unique()

# Create a dataframe to store slopes & p-values
trend_results = []

# Loop through each district
for i in tqdm(nuts):
    df_nuts = yield_detrended[yield_detrended['id'] == i].sort_values('year')
    
    # Linear regression
    slope, intercept, r_value, p_value, std_err = linregress(df_nuts['year'], df_nuts['yield'])
    
    trend_results.append({
        'id': i,
        'slope': slope,
        'r_squared': r_value**2,
        'p_value': p_value
    })

# Convert results to dataframe
trend_df = pd.DataFrame(trend_results)

# Show the trend table
print(trend_df.shape)
trend_df.head()

In [None]:
# Plot the mean and standard deviation map
grid_info = pd.merge(
    left=grid_info, 
    right=trend_df, 
    on=['id'], 
    how='left'
)

slope_bins = [-float('inf'), -0.05, 0, 0.05, float('inf')]
slope_labels = ['Strong Decrease', 'Slight Decrease', 'Slight Increase', 'Strong Increase']

grid_info['slope_category'] = pd.cut(
    grid_info['slope'],
    bins=slope_bins,
    labels=slope_labels
)

# P-value bins & categories
pvalue_bins = [-float('inf'), 0.01, 0.05, 0.1, float('inf')]
pvalue_labels = ['Highly significant', 'Significant', 'Marginally significant', 'Not significant']

grid_info['pvalue_category'] = pd.cut(
    grid_info['p_value'],
    bins=pvalue_bins,
    labels=pvalue_labels
)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 8))
axes = axes.flatten()

grid_info.plot(ax=axes[0], column='slope_category', cmap='RdYlGn', legend=True)
de_nuts1_gdf.plot(ax=axes[0], facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')

grid_info.plot(ax=axes[1], column='pvalue_category', cmap='YlOrRd', legend=True)
de_nuts1_gdf.plot(ax=axes[1], facecolor='none', edgecolor='k', linewidth=1, label='NUTS1')

axes[0].set_title('Slope')
axes[1].set_title('P Value')

plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, 'yield_slope_and_trend_(grid_scale).png'), format='png', bbox_inches='tight')
plt.show()

## Save the Anomaly Data

In [None]:
OUT_DIR = os.path.join(WORK_DATA_DIR, crop)
if os.path.exists(OUT_DIR) != True:
    os.makedirs(OUT_DIR)
    print('Output folder successfully created!')

yield_anomaly.to_csv(os.path.join(OUT_DIR, 'yield.csv'), index=False)
print('Data saved successfully!')