In [1]:
import pandas as pd
import geopandas as gpd
import rasterio
from rasterio.plot import show
from rasterio.sample import sample_gen
import numpy as np
import glob
import os
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm


In [32]:
koppen_labels = {
    1: "Af; Tropical, rainforest",
    2: "Am; Tropical, monsoon",
    3: "Aw; Tropical, savannah",
    4: "BWh; Arid, desert, hot",
    5: "BWk; Arid, desert, cold",
    6: "BSh; Arid, steppe, hot",
    7: "BSk; Arid, steppe, cold",
    8: "Csa; Temperate, dry summer, hot summer",
    9: "Csb; Temperate, dry summer, warm summer",
    10: "Csc; Temperate, dry summer, cold summer",
    11: "Cwa; Temperate, dry winter, hot summer",
    12: "Cwb; Temperate, dry winter, warm summer",
    13: "Cwc; Temperate, dry winter, cold summer",
    14: "Cfa; Temperate, no dry season, hot summer",
    15: "Cfb; Temperate, no dry season, warm summer",
    16: "Cfc; Temperate, no dry season, cold summer",
    17: "Dsa; Cold, dry summer, hot summer",
    18: "Dsb; Cold, dry summer, warm summer",
    19: "Dsc; Cold, dry summer, cold summer",
    20: "Dsd; Cold, dry summer, very cold winter",
    21: "Dwa; Cold, dry winter, hot summer",
    22: "Dwb; Cold, dry winter, warm summer",
    23: "Dwc; Cold, dry winter, cold summer",
    24: "Dwd; Cold, dry winter, very cold winter",
    25: "Dfa; Cold, no dry season, hot summer",
    26: "Dfb; Cold, no dry season, warm summer",
    27: "Dfc; Cold, no dry season, cold summer",
    28: "Dfd; Cold, no dry season, very cold winter",
    29: "ET; Polar, tundra",
    30: "EF; Polar, frost"
}

koppen_abbreviations = {
    1: "Af",
    2: "Am",
    3: "Aw",
    4: "BWh",
    5: "BWk",
    6: "BSh",
    7: "BSk",
    8: "Csa",
    9: "Csb",
    10: "Csc",
    11: "Cwa",
    12: "Cwb",
    13: "Cwc",
    14: "Cfa",
    15: "Cfb",
    16: "Cfc",
    17: "Dsa",
    18: "Dsb",
    19: "Dsc",
    20: "Dsd",
    21: "Dwa",
    22: "Dwb",
    23: "Dwc",
    24: "Dwd",
    25: "Dfa",
    26: "Dfb",
    27: "Dfc",
    28: "Dfd",
    29: "ET",
    30: "EF"
}


#### Bestanden

In [8]:
# Cols
cols = ['location_id', 'x', 'y', 'bare', 'crops', 'grassland', 'shrub', 'tree', 'urban_built_up', 'water']

# Prediction files
prediction_files = [
    r"C:\Users\augus\Documents\Studie\MGI\Paper\Data\LSTM\predict2015.csv",
    r"C:\Users\augus\Documents\Studie\MGI\Paper\Data\LSTM\Dense\predict2015.csv",
    r"C:\Users\augus\Documents\Studie\MGI\Thesis\DataAugust\RF_LSTMinput\RF\LSTM_formula\predict2015.csv",
    r"C:\Users\augus\Documents\Studie\MGI\Thesis\DataAugust\RF_LSTMinput\RF\Dense\predict2015.csv",
    r"C:\Users\augus\Documents\Studie\MGI\Paper\Data\RF-LSTM\predict2015.csv",
    r"C:\Users\augus\Documents\Studie\MGI\Paper\Data\RF-LSTM\Dense\predict2015.csv",
    r"C:\Users\augus\Documents\Studie\MGI\Thesis\DataAugust\RF_LSTMinput\RF-Markov\RFMarkov2015.csv",
    r"C:\Users\augus\Documents\Studie\MGI\Thesis\DataAugust\RF_LSTMinput\RF-Markov\Dense\Version 2\predict2015.csv"
]
model_titles = [
    "Annual LSTM", "Dense LSTM", "Annual RF", "Dense RF",
    "Annual RF-PostLSTM", "Dense RF-PostLSTM", "Annual RF-Markov", "Dense RF-Markov"
]
model_dfs = {
    title: pd.read_csv(file)
    for title, file in zip(model_titles, prediction_files)
}

# Validation
val_df = pd.read_csv(r"C:\Users\augus\Documents\Studie\MGI\Thesis\DataAugust\RobMethod_entireTrain_3yearVali\Validation\All Validation\vali2015.csv")
val_df = val_df[cols]

# Köppen-Geiger raster
with rasterio.open(r"C:\Users\augus\Documents\Studie\MGI\Paper\Code\koppen\Beck_KG_V1\Beck_KG_V1_present_0p5.tif") as src:
    coords = [(x, y) for x, y in zip(val_df['x'], val_df['y'])]
    koppen_values = [val[0] for val in src.sample(coords)]

# Col KG vali
val_df['koppen_zone'] = koppen_values
val_df.head()

Unnamed: 0,location_id,x,y,bare,crops,grassland,shrub,tree,urban_built_up,water,koppen_zone
0,1906318,7.71131,11.05655,32,0,0,0,3,65,0,3
1,1906319,-10.75794,6.382937,15,0,24,5,33,23,0,2
2,1906320,-7.561508,33.39583,4,0,10,0,1,85,0,8
3,1906321,30.09722,31.26885,19,0,19,1,8,53,0,4
4,1906322,-5.289683,6.819444,12,0,50,9,7,22,0,3


#### Bereken MAE

In [33]:
land_cover_classes = ['bare', 'crops', 'grassland', 'shrub', 'tree', 'urban_built_up', 'water']

results = []

for model_name, pred_df in tqdm(model_dfs.items(), desc="Processing models"):
    merged = val_df[['location_id', 'koppen_zone'] + land_cover_classes].merge(
        pred_df[['location_id'] + land_cover_classes], 
        on='location_id', 
        suffixes=('_true', '_pred')
    )

    # Compute MAE per row
    mae_cols = []
    for cls in land_cover_classes:
        mae_col = f'mae_{cls}'
        merged[mae_col] = (merged[f'{cls}_true'] - merged[f'{cls}_pred']).abs()
        mae_cols.append(mae_col)

    # Average MAE across classes
    merged['mae_all'] = merged[mae_cols].mean(axis=1)

    # Group by climate zone
    grouped = merged.groupby('koppen_zone')[['mae_all'] + mae_cols].mean().reset_index()
    grouped['model'] = model_name
    results.append(grouped)

final_df = pd.concat(results, ignore_index=True)
final_df.head()

Processing models: 100%|██████████| 8/8 [00:00<00:00, 20.41it/s]


Unnamed: 0,koppen_zone,mae_all,mae_bare,mae_crops,mae_grassland,mae_shrub,mae_tree,mae_urban_built_up,mae_water,model
0,0,9.808971,11.53259,3.481418,16.111035,12.032634,14.685551,1.525116,9.294451,Annual LSTM
1,1,6.330061,2.274815,3.239555,14.563619,6.716085,13.364701,0.921896,3.22976,Annual LSTM
2,2,6.728369,2.539523,4.008223,15.222416,8.874149,10.795663,2.538382,3.120228,Annual LSTM
3,3,10.398285,3.79934,9.800811,23.980892,13.77218,15.524473,1.79043,4.119872,Annual LSTM
4,4,9.708613,20.503359,6.473508,21.983498,9.156007,3.450414,2.015933,4.377574,Annual LSTM


#### Format

In [46]:
# Average MAE
class_cols = ['mae_bare', 'mae_crops', 'mae_grassland', 'mae_shrub', 'mae_tree', 'mae_urban_built_up', 'mae_water']
final_df['mae'] = final_df[class_cols].mean(axis=1)
df_filtered = final_df[['koppen_zone', 'model', 'mae']]

# Pivot
pivot_table = df_filtered.pivot(index='koppen_zone', columns='model', values='mae').reset_index()
pivot_table = pivot_table.round(1) # round naar 1 decimaal

# Rename koppen zones
pivot_table = pivot_table.drop(index=0, errors='ignore') # drop 0 (geen klasse)
pivot_table.rename(index=koppen_abbreviations, inplace=True)
pivot_table = pivot_table.drop(columns=['koppen_zone'])

# Juiste kolom volgorde
pivot_table = pivot_table[model_titles]
pivot_table.head()

model,Annual LSTM,Dense LSTM,Annual RF,Dense RF,Annual RF-PostLSTM,Dense RF-PostLSTM,Annual RF-Markov,Dense RF-Markov
Af,6.3,6.2,6.1,6.0,6.0,6.0,6.1,6.0
Am,6.7,6.5,6.1,6.1,6.2,6.2,6.1,6.1
Aw,10.4,10.5,10.3,10.3,9.9,10.1,10.3,10.2
BWh,9.7,9.6,9.2,9.1,8.9,8.9,9.1,9.0
BWk,10.8,11.1,9.7,9.6,9.4,9.4,9.5,9.4


In [47]:
# Naar LaTeX
latex_code = pivot_table.to_latex(index=True)
pivot_table.to_latex(
    index=True,            # Include the index (climate zone names)
    float_format="%.1f",   # Format floats to 1 decimal
    caption="Average MAE by Climate Zone and Model",
    label="tab:mae_by_climate",
    column_format="l" + "r" * len(pivot_table.columns),  # Align: left for index, right for numbers
    bold_rows=True,
    escape=False           # Set to False if you want to allow special characters like underscore
)
print(pivot_table.to_latex(float_format=lambda x: '%.1f' % x))

\begin{tabular}{lrrrrrrrr}
\toprule
model & Annual LSTM & Dense LSTM & Annual RF & Dense RF & Annual RF-PostLSTM & Dense RF-PostLSTM & Annual RF-Markov & Dense RF-Markov \\
\midrule
Af & 6.3 & 6.2 & 6.1 & 6.0 & 6.0 & 6.0 & 6.1 & 6.0 \\
Am & 6.7 & 6.5 & 6.1 & 6.1 & 6.2 & 6.2 & 6.1 & 6.1 \\
Aw & 10.4 & 10.5 & 10.3 & 10.3 & 9.9 & 10.1 & 10.3 & 10.2 \\
BWh & 9.7 & 9.6 & 9.2 & 9.1 & 8.9 & 8.9 & 9.1 & 9.0 \\
BWk & 10.8 & 11.1 & 9.7 & 9.6 & 9.4 & 9.4 & 9.5 & 9.4 \\
BSh & 11.7 & 12.1 & 11.7 & 11.5 & 11.4 & 11.4 & 11.5 & 11.4 \\
BSk & 12.7 & 12.2 & 11.5 & 11.6 & 11.3 & 11.2 & 11.5 & 11.5 \\
Csa & 12.8 & 12.3 & 12.1 & 12.0 & 12.0 & 11.9 & 12.1 & 12.0 \\
Csb & 13.1 & 12.2 & 12.8 & 12.8 & 12.2 & 12.2 & 12.7 & 12.8 \\
Csc & 9.7 & 9.4 & 9.8 & 9.9 & 9.5 & 9.5 & 9.7 & 9.7 \\
Cwa & 11.3 & 10.6 & 11.0 & 10.9 & 10.5 & 10.5 & 10.9 & 10.8 \\
Cwb & 9.9 & 10.1 & 10.6 & 10.6 & 10.2 & 10.2 & 10.8 & 10.6 \\
Cwc & 11.6 & 12.6 & 11.8 & 11.8 & 11.4 & 11.5 & 11.7 & 11.8 \\
Cfa & 17.1 & 17.2 & 15.0 & 14.8 & 14.8 & 1