# Make plots from the master summary file

Before running any of this code, combine all the individual ecoregion summary files (over quarter-century future periods) into one master file called `master_summary.csv`.

This Notebook then makes circumpolar plots of:

*  Bias-corrected raw/percent changes in climate and fire weather indices over time compared to the observed historic values
*  Bar plots of the modelled future burned area per grouped region (North America boreal/Eurasia boreal/all tundra)
*  Extreme year bar plots
*  Raw change of BA per time period
*  Calculates the Fire Return Interval (FRI) and makes plots of the model/actual/change in FRI over time periods
*  Basic ecoregion maps (i.e. boreal vs tundra, with/without numerical labels, with/without permafrost)
*  Spearman correlations of each variable against BA

Edit as necessary.

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize, BoundaryNorm
import matplotlib.patches as mpatches
import os
import numpy as np
from collections import defaultdict
from adjustText import adjust_text

In [None]:
# Load shapefile
shp_path = '/home/users/clelland/Model/Analysis/RESOLVE shapefile from GEE/resolve_shapefile_from_gee.shp'
gdf = gpd.read_file(shp_path)
gdf = gdf.to_crs(epsg=6931)
selected_ecoregions = gdf[gdf['BIOME_NAME'].isin(['Boreal Forests/Taiga', 'Tundra'])].reset_index(drop=True)

world_path = '/home/users/clelland/Model/Analysis/Countries shapefile/world-administrative-boundaries.shp'
gdf_world = gpd.read_file(world_path)
gdf_world = gdf_world.to_crs(epsg=6931)

## Climate and FWI variables

### % change

In [None]:
# Load master summary file
summary_df = pd.read_csv('/home/users/clelland/Model/Analysis/Summary stats/master_summary.csv')

# Get unique values
variables = summary_df['variable'].unique()
models = [m for m in summary_df['model'].unique() if m != 'Observed']
periods = summary_df['period'].unique()

In [None]:
new = summary_df[summary_df['variable'] == 'ISI']
quantiles = new['percent_change'].quantile([0.05, 0.5, 0.95])
print("Min: ", new['percent_change'].min())
print(quantiles)
print("Max: ", new['percent_change'].max())

In [None]:
# Loop and create plots
#for var in variables:
for var in ['t2m']:
    #for model in models:
    for model in ['ACCESS_SSP126']:
        #for period in periods:
        for period in ['2025_2050']:
            if period == 'historical':
                continue
            df_filtered = summary_df[
                (summary_df['variable'] == var) &
                (summary_df['model'] == model) &
                (summary_df['period'] == period)
            ].reset_index(drop=True)

            if len(df_filtered) != len(selected_ecoregions):
                print(f"Skipping {var}-{model}-{period}: mismatch in rows ({len(df_filtered)} vs {len(selected_ecoregions)})")
                continue

            # Attach percent_change to shapefile GeoDataFrame
            selected_ecoregions['percent_change'] = df_filtered['percent_change'].values

            # Plot with stretched colormap from -5 to 5
            fig, ax = plt.subplots(figsize=(10, 10))
            minx, miny, maxx, maxy = selected_ecoregions.total_bounds
            buffer = 300000 # 300 km
            ax.set_xlim(minx-buffer, maxx+buffer)
            ax.set_ylim(miny-buffer, maxy+buffer)
            
            # Plot light blue background (ocean)
            ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer), (maxx - minx) + 2 * buffer, (maxy - miny) + 2 * buffer, 
                                       facecolor='#f8fcff', zorder=0))
            
            gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)
        
            norm = Normalize(vmin=-2, vmax=2)
            gdf.plot(
                column=selected_ecoregions['percent_change'],
                cmap='coolwarm',
                linewidth=0.1,
                edgecolor='black',
                norm=norm,
                ax=ax
            )
            
            # Add colorbar
            sm = plt.cm.ScalarMappable(cmap='coolwarm', norm=norm)
            sm._A = []
            cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01)
            cbar.set_label('% Change')
            
            plt.title(f'{var} - {model} ({period})', fontsize=14)
            plt.axis('off')
            plt.tight_layout()

            # Save figure
            out_path = f'/home/users/clelland/Model/Analysis/Summary stats/{var}/{var}_{model}_{period}_percent_change.png'
            #plt.savefig(out_path, dpi=300, bbox_inches='tight')
            plt.show()
            plt.close()

### Raw change

In [None]:
# Load master summary file
summary_df = pd.read_csv('/home/users/clelland/Model/Analysis/Summary stats/master_summary.csv')

# Get unique values
variables = summary_df['variable'].unique()
models = [m for m in summary_df['model'].unique() if m != 'Observed']
periods = summary_df['period'].unique()

In [None]:
summary_df = summary_df[summary_df['variable'] == 't2m']
observed_means = new[new['model'] == 'Observed'][['region', 'variable', 'mean_value']]
observed_means = observed_means.rename(columns={'mean_value': 'observed_mean'})

# Step 2: Merge with the original DataFrame
summary_df = summary_df.merge(observed_means, on=['region', 'variable'], how='left')

# Step 3: Calculate raw_change
summary_df['raw_change'] = summary_df['mean_value'] - summary_df['observed_mean']
summary_df

In [None]:
# Loop and create plots
#for var in variables:
for var in ['t2m']:
    for model in models:
    #for model in ['ACCESS_SSP370']:
        for period in periods:
        #for period in ['2025_2050']:
            if period == 'historical':
                continue
            df_filtered = summary_df[
                (summary_df['variable'] == var) &
                (summary_df['model'] == model) &
                (summary_df['period'] == period)
            ].reset_index(drop=True)

            if len(df_filtered) != len(selected_ecoregions):
                print(f"Skipping {var}-{model}-{period}: mismatch in rows ({len(df_filtered)} vs {len(selected_ecoregions)})")
                continue

            # Attach percent_change to shapefile GeoDataFrame
            selected_ecoregions['raw_change'] = df_filtered['raw_change'].values

            # Plot with stretched colormap from -5 to 5
            fig, ax = plt.subplots(figsize=(10, 10))
            minx, miny, maxx, maxy = selected_ecoregions.total_bounds
            buffer = 300000 # 300 km
            ax.set_xlim(minx-buffer, maxx+buffer)
            ax.set_ylim(miny-buffer, maxy+buffer)
            
            # Plot light blue background (ocean)
            ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer), (maxx - minx) + 2 * buffer, (maxy - miny) + 2 * buffer, 
                                       facecolor='#f8fcff', zorder=0))
            
            gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)
        
            norm = Normalize(vmin=-9, vmax=9)
            gdf.plot(
                column=selected_ecoregions['raw_change'],
                cmap='coolwarm',
                linewidth=0.1,
                edgecolor='black',
                norm=norm,
                ax=ax
            )
            
            # Add colorbar
            sm = plt.cm.ScalarMappable(cmap='coolwarm', norm=norm)
            sm._A = []
            cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01)
            cbar.set_label('Raw Change (°C)')
            
            plt.title(f'{var} - {model} ({period})', fontsize=14)
            plt.axis('off')
            plt.tight_layout()

            # Save figure
            out_path = f'/home/users/clelland/Model/Analysis/Summary stats/{var}/{var}_{model}_{period}_raw_change.png'
            plt.savefig(out_path, dpi=300, bbox_inches='tight')
            plt.show()
            plt.close()

## Load for BA

In [None]:
# List of region names — must match order of selected_ecoregions
region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr'), ('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor'), ('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')]

# Model/scenario combinations
models = ['access 126', 'access 245', 'access 370', 'mri 126', 'mri 245', 'mri 370']
model_labels = {
    'access 126': 'ACCESS_SSP126',
    'access 245': 'ACCESS_SSP245',
    'access 370': 'ACCESS_SSP370',
    'mri 126': 'MRI_SSP126',
    'mri 245': 'MRI_SSP245',
    'mri 370': 'MRI_SSP370'
}

# Time periods for projections
periods = {
    '2025_2050': ('2025', '2050'),
    '2051_2075': ('2051', '2075'),
    '2076_2100': ('2076', '2100')
}

## Bar plot of regional BA, including extreme years

In [None]:
region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
#region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
#               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
#               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
#region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
#               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
#               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
#               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
#               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
#               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

# Collect region-level data
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual data
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load model data
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Resample model data to annual
        df_model_annual = df_model.resample('YE').sum().astype(float)

        # Resample actual data to annual
        df_actual_annual = df_actual.resample('YE').sum()

        # Combine
        df_ba = pd.concat([df_actual_annual, df_model_annual], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# ---- Combine and sum across all regions ----
df_ba_all = pd.concat(ba_dfs, axis=0).groupby(level=0).sum(min_count=1)
df_ba_all.index = pd.to_datetime(df_ba_all.index)
df_ba_all.sort_index(inplace=True)

# ---- Compute extreme threshold from actuals (2001–2024) ----
historic_actual = df_ba_all.loc['2001':'2024', 'Actual']
extreme_threshold = historic_actual.quantile(0.95)

# ---- Plot each model/scenario separately ----
model_cols = [
    'ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
    'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370'
]

for col in model_cols:
#for col in ['ACCESS_SSP126']:
    if col in df_ba_all.columns:
        df_future = df_ba_all.loc['2025':, col].dropna()

        plt.figure(figsize=(12, 6))
        plt.bar(df_future.index.year, df_future.values, label=col)
        plt.axhline(extreme_threshold, color='red', linestyle='--', linewidth=2,
                    label='Extreme Threshold (2001–2024 Actuals)')
        plt.title(f'Yearly Burned Area (2025–2100) North America boreal ecoregions – {col}', fontsize=18)
        plt.ylabel('Burned Area (Mha)', fontsize=16)
        plt.xlabel('Year', fontsize=16)
        plt.xticks(fontsize=16)
        plt.yticks(fontsize=16)
        #plt.legend()
        plt.grid(True, linestyle='--', alpha=0.5)
        plt.ylim(0, 18) # 18, 27, 4.5
        plt.tight_layout()

        # Save or show
        #plt.savefig(f'/home/users/clelland/Model/Analysis/Summary stats/BA/Bar plots/ba_barplot_NAbor_{col.replace(" ", "_")}.png', dpi=300)
        plt.show()

In [None]:
new = historic_actual.copy()
quantiles = new.quantile([0.05, 0.5, 0.95])
print("Min: ", new.min())
print(quantiles)
print("Max: ", new.max())

In [None]:
region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr'),
('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor'), ('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')]

In [None]:
#region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
#               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
#               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
#region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
#               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
#               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
#               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
#               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
#               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

# Collect region-level data
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual data
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load model data
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Resample model data to annual
        df_model_annual = df_model.resample('YE').sum().astype(float)

        # Resample actual data to annual
        df_actual_annual = df_actual.resample('YE').sum()

        # Combine
        df_ba = pd.concat([df_actual_annual, df_model_annual], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# ---- Combine and sum across all regions ----
df_ba_all = pd.concat(ba_dfs, axis=0).groupby(level=0).sum(min_count=1)
df_ba_all.index = pd.to_datetime(df_ba_all.index)
df_ba_all.sort_index(inplace=True)

# ---- Compute extreme threshold from actuals (2001–2024) ----
historic_actual = df_ba_all.loc['2001':'2024', 'Actual']
extreme_threshold = historic_actual.quantile(0.95)

# Define scenario groupings
access_scenarios = ['ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370']
mri_scenarios = ['MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370']

# Colors for SSP scenarios
colors = {
    'ACCESS_SSP126': 'blue',
    'ACCESS_SSP245': 'green',
    'ACCESS_SSP370': 'purple',
    'MRI_SSP126': 'orange',
    'MRI_SSP245': 'red',
    'MRI_SSP370': 'brown'
}

def plot_extreme_years(group_name, scenario_list):
    # Collect extreme-year data for the group
    extreme_data = []
    for col in scenario_list:
        if col in df_ba_all.columns:
            df_future = df_ba_all.loc['2025':, col].dropna()
            extreme_years = df_future[df_future > extreme_threshold]
            for year, value in extreme_years.items():
                extreme_data.append((year, value, col))

    # Plot
    plt.figure(figsize=(14, 6))
    for year, value, scenario in extreme_data:
        plt.bar(year.year, value, color=colors[scenario], label=scenario)

    # Threshold line
    plt.axhline(extreme_threshold, color='black', linestyle='--', linewidth=1.5,
                label='95th Percentile (from 2001–2023)')

    # Legend
    handles, labels = plt.gca().get_legend_handles_labels()
    unique_labels = dict(zip(labels, handles))
    #plt.legend(unique_labels.values(), unique_labels.keys(), fontsize=10)

    # Styling
    plt.title(f'Extreme Years for {group_name} Scenarios in North America boreal ecoregions', fontsize=14)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.ylabel('Burned Area (Mha)', fontsize=18)
    plt.xlabel('Year', fontsize=18)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.xlim(2024, 2103)
    plt.ylim(0, 18) # 18, 27, 4.5 - 35 for all
    plt.tight_layout()
    #plt.savefig(f'/home/users/clelland/Model/Analysis/Summary stats/BA/Bar plots/extreme_years_{group_name.lower()}_NAbor.png', dpi=300)
    plt.show()

# Plot each group
plot_extreme_years("ACCESS", access_scenarios)
plot_extreme_years("MRI", mri_scenarios)

In [None]:
# Find means of non-extreme years
# Define time periods
periods = {
    '2025–2050': ('2025', '2050'),
    '2051–2075': ('2051', '2075'),
    '2076–2100': ('2076', '2100')
}

# Container for results
mean_non_extreme = []

# Loop through each model scenario
for scenario in access_scenarios + mri_scenarios:
    if scenario not in df_ba_all.columns:
        continue

    df_scenario = df_ba_all[[scenario]].copy()
    df_scenario = df_scenario.loc['2025':]  # Limit to future years

    # Mask out extreme years (keep only non-extreme years)
    df_scenario = df_scenario[df_scenario[scenario] <= extreme_threshold]

    # Loop through periods and compute mean
    for label, (start, end) in periods.items():
        df_period = df_scenario.loc[start:end]
        mean_val = df_period[scenario].mean()
        mean_non_extreme.append({
            'scenario': scenario,
            'period': label,
            'mean_burned_area': mean_val
        })

# Convert to DataFrame
df_non_extreme_means = pd.DataFrame(mean_non_extreme)

# Optional: Sort and save
df_non_extreme_means.sort_values(by=['scenario', 'period'], inplace=True)
#df_non_extreme_means.to_csv(
#    '/home/users/clelland/Model/Analysis/Summary stats/BA/non_extreme_means_by_period.csv',
#    index=False
#)

# Show result
df_non_extreme_means

In [None]:
# FOR LEGEND ONLY
region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal

# Collect region-level data
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual data
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load model data
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Resample model data to annual
        df_model_annual = df_model.resample('YE').sum().astype(float)

        # Resample actual data to annual
        df_actual_annual = df_actual.resample('YE').sum()

        # Combine
        df_ba = pd.concat([df_actual_annual, df_model_annual], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# ---- Combine and sum across all regions ----
df_ba_all = pd.concat(ba_dfs, axis=0).groupby(level=0).sum(min_count=1)
df_ba_all.index = pd.to_datetime(df_ba_all.index)
df_ba_all.sort_index(inplace=True)

# Recompute 95th percentile threshold from 2001–2024 actual burned area
historic_actual = df_ba_all.loc['2001':'2024', 'Actual']
threshold_95 = historic_actual.quantile(0.95)

# Model scenario columns
model_cols = [
    'ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
    'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370'
]

# Colors for each scenario
colors = {
    'ACCESS_SSP126': 'blue',
    'ACCESS_SSP245': 'green',
    'ACCESS_SSP370': 'purple',
    'MRI_SSP126': 'orange',
    'MRI_SSP245': 'red',
    'MRI_SSP370': 'brown'
}

# Collect extreme-year bars
extreme_data = []

for col in model_cols:
    if col in df_ba_all.columns:
        df_future = df_ba_all.loc['2025':, col].dropna()
        extreme_years = df_future[df_future > threshold_95]
        for year, value in extreme_years.items():
            extreme_data.append((year, value, col))

# Create plot
plt.figure(figsize=(14, 6))

# Plot each bar
for year, value, scenario in extreme_data:
    plt.bar(year.year, value, color=colors[scenario], label=scenario)

# Add 95th percentile threshold line
plt.axhline(threshold_95, color='black', linestyle='--', linewidth=1.5,
            label='95th Percentile (2001–2023)')

# Handle legend (remove duplicates)
handles, labels = plt.gca().get_legend_handles_labels()
unique_labels = dict(zip(labels, handles))
# Remove duplicates in legend
handles, labels = plt.gca().get_legend_handles_labels()
unique_labels = dict(zip(labels, handles))
    
# Legend below the plot with 2 columns
plt.legend(
    unique_labels.values(),
    unique_labels.keys(),
    fontsize=10,
    loc='upper center',
    bbox_to_anchor=(0.5, -0.15),
    ncol=2
)
# Labels and styling
plt.title('Extreme Years (Above 95th Percentile) for Burned Area per Scenario (2025–2100)', fontsize=14)
plt.ylabel('Burned Area (Summed Across Regions)', fontsize=12)
plt.xlabel('Year', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()

# Show or save
#plt.savefig('/home/users/clelland/Model/Analysis/Summary stats/BA/Bar plots/extreme_bar_legend.png', dpi=300)
plt.show()

## % Change of BA per time period

In [None]:
# % CHANGE OF BA PER TIME PERIOD
# Store percent change results for mapping
change_records = []

for region_actual, region_model in region_mappings:
    actual_path = '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv'
    model_path = f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv'
    
    if not os.path.exists(model_path):
        print(f"Missing model file for region: {region_model}")
        continue

    try:
        df_actual = pd.read_csv(actual_path, parse_dates=['date'], index_col='date')[region_actual]
        df_model = pd.read_csv(model_path, parse_dates=['time'], index_col='time')
    except Exception as e:
        print(f"Error reading data for {region_actual}: {e}")
        continue

    # Monthly -> Annual
    df_actual_annual = df_actual.resample('YE').sum()
    df_model_annual = df_model.resample('YE').sum().astype(float)
    
    # Historical mean (2001–2024)
    hist_mean = df_actual_annual['2001':'2024'].mean()

    for model in models:
        model_series = df_model_annual[model]

        for period_label, (start, end) in periods.items():
            future_mean = model_series[start:end].mean()
            if hist_mean == 0 or future_mean == 0:
                percent_change = 0
            else:
                percent_change = 100 * (future_mean - hist_mean) / hist_mean
            change_records.append({
                'region_model': region_model,
                'model': model_labels[model],
                'period': period_label,
                'percent_change': percent_change
            })

# Convert to DataFrame for mapping
df_change = pd.DataFrame(change_records)

# Loop over models/periods to plot maps
for model in model_labels.values():
#for model in ['ACCESS_SSP126']:
    for period in periods.keys():
    #for period in ['2025_2050']:
        df_plot = df_change[(df_change['model'] == model) & (df_change['period'] == period)].reset_index(drop=True)

        # Ensure alignment
        if len(df_plot) != len(selected_ecoregions):
            print(f"Skipping {model}-{period}: mismatch in row counts")
            continue

        selected_ecoregions['percent_change'] = df_plot['percent_change'].values

        # Plot
        fig, ax = plt.subplots(figsize=(10, 10))
        minx, miny, maxx, maxy = selected_ecoregions.total_bounds
        buffer = 300000 # 300 km
        ax.set_xlim(minx-buffer, maxx+buffer)
        ax.set_ylim(miny-buffer, maxy+buffer)
        
        # Plot light blue background (ocean)
        ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer), (maxx - minx) + 2 * buffer, (maxy - miny) + 2 * buffer, 
                                   facecolor='#f0f8ff', zorder=0))
        
        gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

        norm = Normalize(vmin=-100, vmax=100)
        selected_ecoregions.plot(
            column='percent_change',
            cmap='coolwarm',
            edgecolor='black',
            linewidth=0.2,
            norm=norm,
            ax=ax
        )

        # Colorbar
        sm = plt.cm.ScalarMappable(cmap='coolwarm', norm=norm)
        sm._A = []
        cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01)
        cbar.set_label('% Change in Burned Area')
        #cbar.set_label('% Change in Burned Area', fontsize=18)
        #cbar.ax.tick_params(labelsize=16) 
        
        plt.title(f'Burned Area % Change - {model} ({period})', fontsize=14)
        plt.axis('off')
        plt.tight_layout()

        out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/Percent change/ba_percent_change_{model}_{period}.png'
        #out_path = '/home/users/clelland/Model/Analysis/Summary stats/BA/Percent change/ba_percent_change_legend.png'
        plt.savefig(out_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

In [None]:
new = df_change.copy()
quantiles = new['percent_change'].quantile([0.05, 0.5, 0.95])
print("Min: ", new['percent_change'].min())
print(quantiles)
print("Max: ", new['percent_change'].max())

## Raw change of BA per time period in Mha

In [None]:
# RAW CHANGE OF BA PER TIME PERIOD IN MHA
# Store percent change results for mapping
change_records = []

for region_actual, region_model in region_mappings:
    actual_path = '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv'
    model_path = f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv'
    
    if not os.path.exists(model_path):
        print(f"Missing model file for region: {region_model}")
        continue

    try:
        df_actual = pd.read_csv(actual_path, parse_dates=['date'], index_col='date')[region_actual]
        df_model = pd.read_csv(model_path, parse_dates=['time'], index_col='time')
    except Exception as e:
        print(f"Error reading data for {region_actual}: {e}")
        continue

    # Monthly -> Annual
    df_actual_annual = df_actual.resample('YE').sum()
    df_model_annual = df_model.resample('YE').sum().astype(float)
    
    # Historical mean (2001–2024)
    hist_mean = df_actual_annual['2001':'2024'].mean()

    for model in models:
        model_series = df_model_annual[model]

        for period_label, (start, end) in periods.items():
            future_mean = model_series[start:end].mean()
            raw_change = (future_mean - hist_mean)
            change_records.append({
                'region_model': region_model,
                'model': model_labels[model],
                'period': period_label,
                'raw_change_mha': raw_change
            })

# Convert to DataFrame for mapping
df_change = pd.DataFrame(change_records)

# Loop over models/periods to plot maps
#for model in model_labels.values():
for model in ['ACCESS_SSP370']:
    for period in periods.keys():
        df_plot = df_change[(df_change['model'] == model) & (df_change['period'] == period)].reset_index(drop=True)

        # Ensure alignment
        if len(df_plot) != len(selected_ecoregions):
            print(f"Skipping {model}-{period}: mismatch in row counts")
            continue

        selected_ecoregions['raw_change_mha'] = df_plot['raw_change_mha'].values

        # Plot
        fig, ax = plt.subplots(figsize=(10, 10))
        minx, miny, maxx, maxy = selected_ecoregions.total_bounds
        buffer = 300000 # 300 km
        ax.set_xlim(minx-buffer, maxx+buffer)
        ax.set_ylim(miny-buffer, maxy+buffer)
        
        # Plot light blue background (ocean)
        ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer), (maxx - minx) + 2 * buffer, (maxy - miny) + 2 * buffer, 
                                   facecolor='#f8fcff', zorder=0))
        
        gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

        norm = Normalize(vmin=-1, vmax=1)
        selected_ecoregions.plot(
            column='raw_change_mha',
            cmap='coolwarm',
            edgecolor='black',
            linewidth=0.2,
            norm=norm,
            ax=ax
        )

        # Colorbar
        sm = plt.cm.ScalarMappable(cmap='coolwarm', norm=norm)
        sm._A = []
        cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01)
        cbar.set_label('Change in Mean Burned Area (Mha)')
              
        plt.title(f'Burned Area Change (Mha) - {model} ({period})', fontsize=14)
        plt.axis('off')
        plt.tight_layout()

        out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/ba_change_mha_{model}_{period}.png'
        #plt.savefig(out_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

In [None]:
# Grouped ecoregions
nabor  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')]
eubor  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor')]
tundra  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')]

# Combine all groups
all_groups = [('nabor', nabor), ('eubor', eubor), ('tundra', tundra)]

# Mapping observed name → group
observed_to_group = {obs: grp for grp, pairs in all_groups for obs, _ in pairs}

df_actual_all = pd.read_csv(actual_path, parse_dates=['date'], index_col='date')
mean_actual = df_actual_all.resample('YE').sum().mean()

# --- Step 1: Process mean_actual
# Convert Series to DataFrame
mean_actual_df = mean_actual.reset_index()
mean_actual_df.columns = ['observed_name', 'sum_actual_mha']

# Map observed_name to group
mean_actual_df['group'] = mean_actual_df['observed_name'].map(observed_to_group)

# Group by group to get average mean_actual_mha
grouped_actual = mean_actual_df.groupby('group')['sum_actual_mha'].sum().reset_index()

# --- Step 2: Process df_change (future data)
# Mapping future name → group
future_to_group = {fut: grp for grp, pairs in all_groups for _, fut in pairs}

# Assign group
df_change['group'] = df_change['region_model'].map(future_to_group)

# Group future change by group, model, and period
grouped_means = df_change.groupby(['group', 'model', 'period'])['raw_change_mha'].sum().reset_index()

# --- Step 3: Merge historical mean into future data
final_df = grouped_means.merge(grouped_actual, on='group', how='left')
final_df['pct_change'] = final_df['raw_change_mha'] / final_df['sum_actual_mha'] * 100
final_df['final_tot'] = final_df['raw_change_mha'] + final_df['sum_actual_mha']

final_df

In [None]:
# Individual ecoregions
region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr'), ('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor'), ('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')]

# Create future → observed name mapping
future_to_observed = {future: observed for observed, future in region_mappings}

# Assign observed_name to df_change
df_change['observed_name'] = df_change['region_model'].map(future_to_observed)

# Convert mean_actual to DataFrame
mean_actual_df = mean_actual.reset_index()
mean_actual_df.columns = ['observed_name', 'sum_actual_mha']

# Merge individual ecoregion change data with historical means
df_ecoregion_change = df_change.merge(mean_actual_df, on='observed_name', how='left')

# Compute percent change per ecoregion/model/period
df_ecoregion_change['pct_change'] = (
    df_ecoregion_change['raw_change_mha'] / df_ecoregion_change['sum_actual_mha'] * 100
)
df_ecoregion_change.loc[
    (df_ecoregion_change['sum_actual_mha'] == 0) & 
    (df_ecoregion_change['raw_change_mha'] != 0),
    'pct_change'
] = 100

# Keep only relevant columns
result = df_ecoregion_change[['region_model', 'model', 'period', 'raw_change_mha', 'sum_actual_mha', 'pct_change', 'group']]
result['final_tot'] = result['raw_change_mha'] + result['sum_actual_mha']
#result

result[result['region_model'] == 'westsib']
#result[(result['model'] == 'MRI_SSP126') & (result['period'] == '2076_2100')].sort_values(by='pct_change', ascending=False)
#result[(result['group'] == 'nabor') & (result['model'] == 'ACCESS_SSP370') & (result['period'] == '2076_2100')].sort_values(by='pct_change', ascending=False)

# Use those indices to extract full rows
#idx_max = result.groupby(['period', 'model'])['pct_change'].idxmax()
#max_per_group = result.loc[idx_max].reset_index(drop=True)
#max_per_group

In [None]:
total_summary = result.groupby(['model', 'period'])[['raw_change_mha', 'sum_actual_mha']].sum().reset_index()

# Compute total percent change
total_summary['total_pct_change'] = (
    total_summary['raw_change_mha'] / total_summary['sum_actual_mha'] * 100
)
total_summary['final_tot'] = total_summary['raw_change_mha'] + total_summary['sum_actual_mha']

total_summary

In [None]:
new = df_change.copy()
quantiles = new['raw_change_mha'].quantile([0.05, 0.5, 0.95])
print("Min: ", new['raw_change_mha'].min())
print(quantiles)
print("Max: ", new['raw_change_mha'].max())

## Calculate FRI

In [None]:
# Load region areas (Mha)
df_ecoarea = pd.read_csv('/home/users/clelland/Model/Analysis/ecoregion_area.csv')
area_dict = dict(zip(df_ecoarea['short_name'], df_ecoarea['area_Mha']))

# Store FRI results
fri_records = []

for region_actual, region_model in region_mappings:
    actual_path = '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv'
    model_path = f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv'
    
    if not os.path.exists(model_path):
        print(f"Missing model file for region: {region_model}")
        continue

    try:
        df_actual = pd.read_csv(actual_path, parse_dates=['date'], index_col='date')[region_actual]
        df_model = pd.read_csv(model_path, parse_dates=['time'], index_col='time')
    except Exception as e:
        print(f"Error reading data for {region_actual}: {e}")
        continue

    # Monthly -> Annual BA (Mha)
    df_actual_annual = df_actual.resample('YE').sum()
    df_model_annual = df_model.resample('YE').sum().astype(float)
    
    # Get region area in Mha
    area_Mha = area_dict.get(region_actual)
    if area_Mha is None:
        print(f"Missing area for region: {region_actual}")
        continue

    # Compute mean FBA and FRI for historical (actual)
    hist_mean_fba = df_actual_annual['2001':'2024'].mean() / area_Mha
    hist_fri = 1 / hist_mean_fba if hist_mean_fba > 0 else float('inf')

    fri_records.append({
        'region': region_actual,
        'model': 'Observed',
        'period': '2001–2024',
        'mean_fba': hist_mean_fba,
        'fri': hist_fri
    })

    for model in models:
        model_series = df_model_annual[model]

        for period_label, (start, end) in periods.items():
            mean_ba = model_series[start:end].mean()
            mean_fba = mean_ba / area_Mha
            fri = 1 / mean_fba if mean_fba > 0 else float('inf')

            fri_records.append({
                'region': region_actual,
                'model': model_labels[model],
                'period': period_label,
                'mean_fba': mean_fba,
                'fri': fri
            })

# Convert to DataFrame
df_fri = pd.DataFrame(fri_records)

# Define FRI bins and labels
bins = [0, 100, 200, 300, 400, 500, 600, float('inf')]
labels = ['<100', '100–200', '200–300', '300–400', '400–500', '500–600', '600+']

# Add 'fri_grouped' column
df_fri['fri_grouped'] = pd.cut(df_fri['fri'], bins=bins, labels=labels, right=False)

# Optional: Save
#df_fri.to_csv('/home/users/clelland/Model/Analysis/fri_by_ecoregion.csv', index=False)
df_fri

## FRI - Model

In [None]:
df_fri = pd.read_csv('/home/users/clelland/Model/Analysis/fri_by_ecoregion.csv')

# Map fri values to selected_ecoregions
selected_ecoregions['region'] = [pair[0] for pair in region_mappings]

# Set categorical ordering for fri_grouped
fri_labels = ['<100', '100–200', '200–300', '300–400', '400–500', '500–600', '600+']
fri_labels_cat = pd.CategoricalDtype(categories=fri_labels, ordered=True)

# Plotting loop
#for model in model_labels.values():
for model in ['ACCESS_SSP126']:
    for period in periods.keys():
        
        # Filter for observed model and period only
        df_fri_sel = df_fri[(df_fri['model'] == model) & (df_fri['period'] == period)][['region', 'fri_grouped']].copy()
        df_fri_sel['fri_grouped'] = df_fri_sel['fri_grouped'].astype(fri_labels_cat)
        df_fri_sel['fri_grouped_int'] = df_fri_sel['fri_grouped'].cat.codes
        selected_ecoregions_merged = selected_ecoregions.merge(df_fri_sel, on='region', how='left')
       
        # Define colormap
        cmap = plt.get_cmap('coolwarm', len(fri_labels)).reversed()
        
        # Plotting
        fig, ax = plt.subplots(figsize=(10, 10))
        minx, miny, maxx, maxy = selected_ecoregions.total_bounds
        buffer = 300000
        ax.set_xlim(minx - buffer, maxx + buffer)
        ax.set_ylim(miny - buffer, maxy + buffer)
        
        # Light background
        ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer),
                                   (maxx - minx) + 2 * buffer, (maxy - miny) + 2 * buffer,
                                   facecolor='#f8fcff', zorder=0))
        
        # Background map
        gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)
        
        # Drop rows with NaN in fri_grouped before plotting
        selected_ecoregions_plot = selected_ecoregions_merged.dropna(subset=['fri_grouped'])

        # Plot FRI categories
        selected_ecoregions_plot.plot(
            column='fri_grouped_int',
            cmap=cmap,
            edgecolor='black',
            linewidth=0.2,
            ax=ax,
            categorical=True,
            legend=False
        )
        
        # Colorbar
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=Normalize(vmin=0, vmax=len(fri_labels)))
        sm._A = []
        cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01,
                            ticks=np.arange(len(fri_labels)) + 0.5)
        cbar.ax.set_xticklabels(fri_labels)
        cbar.set_label('Fire Return Interval (Years)')
        
        plt.title(f'Mean Fire Return Interval - {model} ({period})', fontsize=14)
        plt.axis('off')
        plt.tight_layout()

        # Save or show
        out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/FRI/ba_fri_{model}_{period}.png'
        #plt.savefig(out_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

In [None]:
new = df_fri.copy()
quantiles = new['fri'].quantile([0.05, 0.5, 0.95])
print("Min: ", new['fri'].min())
print(quantiles)
print("Max: ", new['fri'].max())

## FRI - Actual

In [None]:
df_fri = pd.read_csv('/home/users/clelland/Model/Analysis/fri_by_ecoregion.csv')

# Filter for observed model only
df_fri_obs = df_fri[df_fri['model'] == 'Observed'][['region', 'fri', 'fri_grouped']]

# Map fri values to selected_ecoregions
selected_ecoregions['region'] = [pair[0] for pair in region_mappings]
selected_ecoregions = selected_ecoregions.merge(df_fri_obs, on='region', how='left')

# Set categorical ordering for fri_grouped
fri_labels = ['<100', '100–200', '200–300', '300–400', '400–500', '500–600', '600+']
selected_ecoregions['fri_grouped'] = pd.Categorical(selected_ecoregions['fri_grouped'], categories=fri_labels, ordered=True)
selected_ecoregions['fri_grouped_int'] = selected_ecoregions['fri_grouped'].cat.codes

# Define colormap
cmap = plt.get_cmap('coolwarm', len(fri_labels)).reversed()

# Plotting
fig, ax = plt.subplots(figsize=(10, 10))
minx, miny, maxx, maxy = selected_ecoregions.total_bounds
buffer = 300000
ax.set_xlim(minx - buffer, maxx + buffer)
ax.set_ylim(miny - buffer, maxy + buffer)

# Light background
ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer),
                           (maxx - minx) + 2 * buffer, (maxy - miny) + 2 * buffer,
                           facecolor='#f8fcff', zorder=0))

# Background map
gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

# Drop rows with NaN in fri_grouped before plotting
selected_ecoregions_plot = selected_ecoregions.dropna(subset=['fri_grouped'])

# Plot FRI categories
selected_ecoregions_plot.plot(
    column='fri_grouped_int',
    cmap=cmap,
    edgecolor='black',
    linewidth=0.2,
    ax=ax,
    categorical=True,
    legend=False
)

# Colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=Normalize(vmin=0, vmax=len(fri_labels)))
sm._A = []
cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01,
                    ticks=np.arange(len(fri_labels)) + 0.5)
cbar.ax.set_xticklabels(fri_labels)
cbar.set_label('Fire Return Interval (Years)')

plt.title('Mean Fire Return Interval (FRI) - Observed (2001–2024)', fontsize=14)
plt.axis('off')
plt.tight_layout()

# Save or show
out_path = '/home/users/clelland/Model/Analysis/Summary stats/BA/ba_fri_actual.png'
plt.savefig(out_path, dpi=300, bbox_inches='tight')
plt.show()
plt.close()

In [None]:
new = selected_ecoregions_plot.copy()
quantiles = new['fri'].quantile([0.05, 0.5, 0.95])
print("Min: ", new['fri'].min())
print(quantiles)
print("Max: ", new['fri'].max())

## FRI - Change

In [None]:
# DISCRETE
df_fri = pd.read_csv('/home/users/clelland/Model/Analysis/fri_by_ecoregion.csv')
# Categorical FRI labels and codes
fri_labels = ['<100', '100–200', '200–300', '300–400', '400–500', '500–600', '600+']
fri_labels_cat = pd.CategoricalDtype(categories=fri_labels, ordered=True)

# Observed FRI mapping
df_fri_obs = df_fri[df_fri['model'] == 'Observed'][['region', 'fri_grouped']]
df_fri_obs['fri_grouped'] = pd.Categorical(df_fri_obs['fri_grouped'], categories=fri_labels, ordered=True)
df_fri_obs['fri_grouped_int'] = df_fri_obs['fri_grouped'].cat.codes

# Add region column to geometry
selected_ecoregions['region'] = [pair[0] for pair in region_mappings]

# Loop over future scenarios and periods
#for model in model_labels.values():
for model in ['ACCESS_SSP126']:
    for period in periods.keys():
        # Filter projected data
        df_fri_sel = df_fri[(df_fri['model'] == model) & (df_fri['period'] == period)][['region', 'fri_grouped']].copy()
        df_fri_sel['fri_grouped'] = pd.Categorical(df_fri_sel['fri_grouped'], categories=fri_labels, ordered=True)
        df_fri_sel['fri_grouped_int'] = df_fri_sel['fri_grouped'].cat.codes

        # Merge observed and projected with an outer join to catch NaNs
        df_diff = df_fri_sel.merge(df_fri_obs, on='region', how='left', suffixes=('_proj', '_obs'))
        
        # Assign category difference
        def compute_diff(row):
            if pd.isna(row['fri_grouped_obs']) and not pd.isna(row['fri_grouped_proj']):
                return -1
            elif not pd.isna(row['fri_grouped_obs']) and pd.isna(row['fri_grouped_proj']):
                return 1
            elif not pd.isna(row['fri_grouped_int_proj']) and not pd.isna(row['fri_grouped_int_obs']):
                return row['fri_grouped_int_proj'] - row['fri_grouped_int_obs']
            else:
                return np.nan
        
        df_diff['diff_cat'] = df_diff.apply(compute_diff, axis=1)

        # Merge into geodataframe
        gdf_diff = selected_ecoregions.merge(df_diff[['region', 'diff_cat']], on='region', how='left')

        # Plotting
        fig, ax = plt.subplots(figsize=(10, 10))
        minx, miny, maxx, maxy = gdf_diff.total_bounds
        buffer = 300000
        ax.set_xlim(minx - buffer, maxx + buffer)
        ax.set_ylim(miny - buffer, maxy + buffer)

        # Background
        ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer),
                                   (maxx - minx) + 2 * buffer, (maxy - miny) + 2 * buffer,
                                   facecolor='#f8fcff', zorder=0))
        gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

        # Drop NaNs
        gdf_diff_plot = gdf_diff.dropna(subset=['diff_cat'])

        # Colormap (diverging for ± values)
        bounds = np.arange(-6.5, 7.5, 1)  # This creates 14 boundaries, which makes 13 bins
        cmap = plt.get_cmap('coolwarm', 13).reversed()
        norm = BoundaryNorm(boundaries=bounds, ncolors=cmap.N)

        gdf_diff_plot.plot(
            column='diff_cat',
            cmap=cmap,
            norm=norm,
            edgecolor='black',
            linewidth=0.2,
            ax=ax,
            legend=False
        )

        # Colorbar
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm._A = []
        cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01, ticks=np.arange(-6, 7, 1))
        cbar.set_label('Change in FRI Category (Projected - Observed)', fontsize=11)

        # Title and save
        plt.title(f'FRI Category Change - {model} ({period})', fontsize=14)
        plt.axis('off')
        plt.tight_layout()

        out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/FRI/Change/ba_fri_change_{model}_{period}.png'
        #plt.savefig(out_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

In [None]:
# CONTINUOUS
# Load data
df_fri = pd.read_csv('/home/users/clelland/Model/Analysis/fri_by_ecoregion.csv')

# Observed FRI (assumes numeric FRI value is in a column named 'fri')
df_fri_obs = df_fri[df_fri['model'] == 'Observed'][['region', 'fri']].rename(columns={'fri': 'fri_obs'})

# Add region column to geometry
selected_ecoregions['region'] = [pair[0] for pair in region_mappings]

# Loop over future scenarios and periods
for model in model_labels.values():
#for model in ['ACCESS_SSP126']:
    for period in periods.keys():
        # Select projected FRI data
        df_fri_sel = df_fri[(df_fri['model'] == model) & (df_fri['period'] == period)][['region', 'fri']].copy()
        df_fri_sel.rename(columns={'fri': 'fri_proj'}, inplace=True)

        # Merge observed and projected
        df_diff = df_fri_sel.merge(df_fri_obs, on='region', how='left')

        # Calculate continuous difference
        df_diff['diff_fri'] = df_diff['fri_proj'] - df_diff['fri_obs']

        # Merge into geodataframe
        gdf_diff = selected_ecoregions.merge(df_diff[['region', 'diff_fri']], on='region', how='left')

        # Plotting
        fig, ax = plt.subplots(figsize=(10, 10))
        minx, miny, maxx, maxy = gdf_diff.total_bounds
        buffer = 300000
        ax.set_xlim(minx - buffer, maxx + buffer)
        ax.set_ylim(miny - buffer, maxy + buffer)

        # Background
        ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer),
                                   (maxx - minx) + 2 * buffer, (maxy - miny) + 2 * buffer,
                                   facecolor='#f8fcff', zorder=0))
        gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

        # Drop NaNs
        gdf_diff_plot = gdf_diff.dropna(subset=['diff_fri'])

        # Continuous colormap: -600 to +600
        vmin, vmax = -600, 600
        norm = Normalize(vmin=vmin, vmax=vmax)
        cmap = plt.get_cmap('coolwarm').reversed()

        gdf_diff_plot.plot(
            column='diff_fri',
            cmap=cmap,
            norm=norm,
            edgecolor='black',
            linewidth=0.2,
            ax=ax,
            legend=False
        )

        # Colorbar
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm._A = []
        cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01)
        cbar.set_label('Change in FRI (Years)', fontsize=11)

        # Title and save
        plt.title(f'FRI Change (Years) - {model} ({period})', fontsize=14)
        plt.axis('off')
        plt.tight_layout()

        out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/FRI/Change/ba_fri_change_{model}_{period}_continuous.png'
        #plt.savefig(out_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

In [None]:
# CONTINUOUS - log transformed
# Load data
df_fri = pd.read_csv('/home/users/clelland/Model/Analysis/fri_by_ecoregion.csv')

# Observed FRI (assumes numeric FRI value is in a column named 'fri')
df_fri_obs = df_fri[df_fri['model'] == 'Observed'][['region', 'fri']].rename(columns={'fri': 'fri_obs'})

# Add region column to geometry
selected_ecoregions['region'] = [pair[0] for pair in region_mappings]

# Loop over future scenarios and periods
#for model in model_labels.values():
for model in ['ACCESS_SSP126']:
    for period in periods.keys():
        # Select projected FRI data
        df_fri_sel = df_fri[(df_fri['model'] == model) & (df_fri['period'] == period)][['region', 'fri']].copy()
        df_fri_sel.rename(columns={'fri': 'fri_proj'}, inplace=True)

        # Merge observed and projected
        df_diff = df_fri_sel.merge(df_fri_obs, on='region', how='left')

        # Calculate continuous difference
        df_diff['diff_fri'] = df_diff['fri_proj'] - df_diff['fri_obs']

        # Log-transform while preserving sign
        def signed_log_transform(x):
            if pd.isna(x):
                return np.nan
            return np.sign(x) * np.log10(1 + abs(x))

        df_diff['diff_fri_log'] = df_diff['diff_fri'].apply(signed_log_transform)

        # Merge into geodataframe
        gdf_diff = selected_ecoregions.merge(df_diff[['region', 'diff_fri_log']], on='region', how='left')

        # Plotting
        fig, ax = plt.subplots(figsize=(10, 10))
        minx, miny, maxx, maxy = gdf_diff.total_bounds
        buffer = 300000
        ax.set_xlim(minx - buffer, maxx + buffer)
        ax.set_ylim(miny - buffer, maxy + buffer)

        # Background
        ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer),
                                   (maxx - minx) + 2 * buffer, (maxy - miny) + 2 * buffer,
                                   facecolor='#f8fcff', zorder=0))
        gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

        # Drop NaNs
        gdf_diff_plot = gdf_diff.dropna(subset=['diff_fri_log'])

        # Continuous colormap: -6 to +6
        vmin, vmax = -6, 6
        norm = Normalize(vmin=vmin, vmax=vmax)
        cmap = plt.get_cmap('coolwarm').reversed()

        gdf_diff_plot.plot(
            column='diff_fri_log',
            cmap=cmap,
            norm=norm,
            edgecolor='black',
            linewidth=0.2,
            ax=ax,
            legend=False
        )

        # Colorbar
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm._A = []
        cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01)
        cbar.set_label('Log Change in FRI', fontsize=11)

        # Title and save
        plt.title(f'Log Change in FRI - {model} ({period})', fontsize=14)
        plt.axis('off')
        plt.tight_layout()

        out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/FRI/Change/ba_fri_log_change_{model}_{period}_continuous.png'
        #plt.savefig(out_path, dpi=300, bbox_inches='tight')
        plt.show()
        plt.close()

## Plot basic ecoregion maps

### Orginal

In [None]:
# Boreal/tundra map
fig, ax = plt.subplots(figsize=(10, 10))
minx, miny, maxx, maxy = selected_ecoregions.total_bounds
buffer = 300000
ax.set_xlim(minx - buffer, maxx + buffer)
ax.set_ylim(miny - buffer, maxy + buffer)

# Background
ax.add_patch(plt.Rectangle(
    (minx - buffer, miny - buffer), 
    (maxx - minx) + 2 * buffer, 
    (maxy - miny) + 2 * buffer, 
    facecolor='#f8fcff', zorder=0)
)

# Plot world outline
gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

# Default color for other biomes
selected_ecoregions.loc[selected_ecoregions['BIOME_NUM'] == 6, 'biome_color'] = 'green'
selected_ecoregions.loc[selected_ecoregions['BIOME_NUM'] == 11, 'biome_color'] = 'blue'

# Plot the ecoregions with assigned colors
selected_ecoregions.plot(
    color=selected_ecoregions['biome_color'],
    edgecolor='black',
    linewidth=0.2,
    ax=ax,
    alpha=0.7
)

# Add legend manually
legend_handles = [
    mpatches.Patch(color='green', label='Boreal', alpha=0.7),
    mpatches.Patch(color='blue', label='Tundra', alpha=0.7),
]
ax.legend(handles=legend_handles, loc='lower left')

plt.title('Ecoregions by Biome', fontsize=14)
plt.axis('off')
plt.tight_layout()

# Save or show
#plt.savefig('/home/users/clelland/Model/Analysis/ecoregions_by_biome.png', dpi=300)
plt.show()
plt.close()

In [None]:
# Basic map with numbered ecoregions
fig, ax = plt.subplots(figsize=(10, 10))

# Set up map extent with buffer
minx, miny, maxx, maxy = selected_ecoregions.total_bounds
buffer = 300000
ax.set_xlim(minx - buffer, maxx + buffer)
ax.set_ylim(miny - buffer, maxy + buffer)

# Background rectangle
ax.add_patch(plt.Rectangle(
    (minx - buffer, miny - buffer),
    (maxx - minx) + 2 * buffer,
    (maxy - miny) + 2 * buffer,
    facecolor='#f8fcff', zorder=0)
)

# Plot world outline
gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

# Plot all ecoregions in a neutral color
selected_ecoregions.loc[selected_ecoregions['BIOME_NUM'] == 6, 'biome_color'] = 'green'
selected_ecoregions.loc[selected_ecoregions['BIOME_NUM'] == 11, 'biome_color'] = 'blue'
selected_ecoregions.plot(
    #color='#a6cee3', edgecolor='black', linewidth=0.2, ax=ax, alpha=0.5
    color=selected_ecoregions['biome_color'], edgecolor='black', linewidth=0.2, ax=ax, alpha=0.2
)

texts = []
# Annotate each ecoregion
for i, row in selected_ecoregions.iterrows():
    label = i + 1
    # Place label at centroid
    if row.geometry.centroid.is_empty:
        continue
    x, y = row.geometry.centroid.coords[0]
    texts.append(ax.text(x, y, str(label), fontsize=7, ha='center', va='center', zorder=5, weight='bold'))
adjust_text(texts, ax=ax, arrowprops=dict(arrowstyle='-', color='gray', lw=0.5))

# Final styling
plt.title('Ecoregions with Index Labels', fontsize=14)
plt.axis('off')
plt.tight_layout()
#plt.savefig('/home/users/clelland/Model/Analysis/basic_ecoregions_with_numbers_coloured.png', dpi=300)
plt.show()
plt.close()

### With permafrost

In [None]:
pfrost_path = '/home/users/clelland/Model/Analysis/Permafrost shapefile/Brown_et_al_permafrost.shp'
gdf_pfrost = gpd.read_file(pfrost_path)
gdf_pfrost = gdf_pfrost.to_crs(epsg=6931)

In [None]:
# Boreal map with permafrost
fig, ax = plt.subplots(figsize=(10, 10))
minx, miny, maxx, maxy = selected_ecoregions.total_bounds
buffer = 300000
ax.set_xlim(minx - buffer, maxx + buffer)
ax.set_ylim(miny - buffer, maxy + buffer)

# Background
ax.add_patch(plt.Rectangle(
    (minx - buffer, miny - buffer), 
    (maxx - minx) + 2 * buffer, 
    (maxy - miny) + 2 * buffer, 
    facecolor='#f0f8ff', zorder=0)
)

# Plot world outline
gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

# Default color for other biomes
selected_ecoregions.loc[selected_ecoregions['BIOME_NUM'] == 6, 'biome_color'] = 'green'
pfrost_in_boreal = gpd.overlay(gdf_pfrost, selected_ecoregions.loc[selected_ecoregions['BIOME_NUM'] == 6], how='intersection')

# Plot the ecoregions with assigned colors
pfrost_in_boreal.plot(
    color='blue',
    ax=ax
)

selected_ecoregions.loc[selected_ecoregions['BIOME_NUM'] == 6].plot(
    color=selected_ecoregions['biome_color'],
    ax=ax,
    alpha=0.7
)

# Add legend manually
#legend_handles = [
#    mpatches.Patch(color='green', label='Boreal', alpha=0.7),
#    mpatches.Patch(color='blue', label='Permafrost', alpha=0.6),
#]
#ax.legend(handles=legend_handles, loc='lower left')

plt.title('Boreal Biome with Permafrost', fontsize=14)
plt.axis('off')
plt.tight_layout()

# Save or show
#plt.savefig('/home/users/clelland/Model/Analysis/boreal_with_permafrost.png', dpi=300)
plt.show()
plt.close()

## Correlation plots

### Processing

In [None]:
# Define your variables and time periods
climate_vars = ['rh', 'tp', 'rlds', 'rsds', 'wsp', 't2m', 'mx2t', 'mn2t']
fwi_vars = ['BUI', 'DC', 'DMC', 'FFMC', 'FWI', 'ISI']
all_vars = climate_vars + fwi_vars

periods = {
    'historical': ('2001-01-01', '2023-12-31'),
    '2025_2050': ('2025-01-01', '2050-12-31'),
    '2051_2075': ('2051-01-01', '2075-12-31'),
    '2076_2100': ('2076-01-01', '2100-12-31')
}

# Model mapping
model_groups = {
    'ssp': ['ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
            'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370']
}

# Provide region list as tuples: (region_code, region_model_code)
region_pairs = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr'), ('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor'), ('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')]

for region, region_model in region_pairs:
    print(f"Processing {region}...")
    root = f'/home/users/clelland/Model/Analysis/CMIP and FWI time series/Ecoregion CSVs/{region}'

    # Load CSVs
    csvs = {}
    for var in all_vars:
        def read_df(suffix):  # Helper to build file path
            return pd.read_csv(
                f'{root}/{suffix}_{region}.csv', parse_dates=['date'], index_col='date'
            )

        if var in climate_vars:
            csvs[var] = {
                'Observed': read_df('e5l_2001_2023'),
                'ACCESS_SSP126': read_df('access_ssp126_climate_2015_2100'),
                'ACCESS_SSP245': read_df('access_ssp245_climate_2015_2100'),
                'ACCESS_SSP370': read_df('access_ssp370_climate_2015_2100'),
                'MRI_SSP126': read_df('mri_ssp126_climate_2015_2100'),
                'MRI_SSP245': read_df('mri_ssp245_climate_2015_2100'),
                'MRI_SSP370': read_df('mri_ssp370_climate_2015_2100'),
            }
        else:
            csvs[var] = {
                'Observed': read_df('cems_2001_2023'),
                'ACCESS_SSP126': read_df('access_ssp126_fwi_2015_2100'),
                'ACCESS_SSP245': read_df('access_ssp245_fwi_2015_2100'),
                'ACCESS_SSP370': read_df('access_ssp370_fwi_2015_2100'),
                'MRI_SSP126': read_df('mri_ssp126_fwi_2015_2100'),
                'MRI_SSP245': read_df('mri_ssp245_fwi_2015_2100'),
                'MRI_SSP370': read_df('mri_ssp370_fwi_2015_2100'),
            }

    # Output container for plotting
    results = []
    raw_means = []

    for var in all_vars:
        df_all = pd.DataFrame({model: df[var] for model, df in csvs[var].items()})
        df_all.index = pd.to_datetime(df_all.index)
        df_all['month'] = df_all.index.month

        # --- Save raw historical mean (2001–2023) before any bias correction ---
        raw_historical_mask = (df_all.index >= periods['historical'][0]) & (df_all.index <= periods['historical'][1])
        raw_historical_mean = df_all.loc[raw_historical_mask, 'Observed'].mean()

        # Compute monthly bias correction for SSPs
        bias_diffs = {}
        for model in model_groups['ssp']:
            if model in df_all.columns:
                period = ('2015-01-01', '2023-12-31')
                period_mask = (df_all.index >= period[0]) & (df_all.index <= period[1])
                diff = df_all.loc[period_mask, 'Observed'] - df_all.loc[period_mask, model]
                bias_diffs[model] = diff.groupby(df_all.loc[period_mask, 'month']).mean()

        # Apply correction
        corrected = {}
        corrected['Observed'] = df_all['Observed']
        for model in df_all.columns.drop(['Observed', 'month']):
            corrected_series = df_all[model].copy()
            corrected_series.index = pd.to_datetime(corrected_series.index)
            corrected_series = corrected_series + df_all['month'].map(bias_diffs[model])
            corrected[model] = corrected_series

        corrected_df = pd.DataFrame(corrected)

In [None]:
# Initialize nested dictionary
correlation_dict = defaultdict(lambda: defaultdict(dict))  # model -> period -> region -> {var: corr}

for region_actual, region_model in region_pairs:
    print(f"Calculating correlations for {region_actual}...")

    # Load actual burned area
    actual_path = '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv'
    model_path = f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv'
    try:
        df_ba = pd.read_csv(model_path, parse_dates=['time'], index_col='time')
    except Exception as e:
        print(f"Skipping {region_actual}: {e}")
        continue

    for var in all_vars:
        if var not in csvs:
            print(f"Missing variable: {var}")
            continue

        for model in model_groups['ssp']:
            if model not in csvs[var]:
                continue

            df_var = csvs[var][model][var].copy()
            df_var.index = pd.to_datetime(df_var.index)
            df_ba_model = df_ba['mean_ba'] if 'mean_ba' in df_ba else df_ba.iloc[:, 0]

            for period_name, (start, end) in periods.items():
                if period_name == 'historical':
                    continue  # Skip historical

                # Slice both time series
                var_period = df_var.loc[start:end]
                ba_period = df_ba_model.loc[start:end]

                # Align by date
                df_combined = pd.concat([var_period, ba_period], axis=1, keys=[var, 'BA']).dropna()

                if df_combined.empty:
                    corr = np.nan
                else:
                    corr = df_combined[var].corr(df_combined['BA'], method='spearman')

                # Store
                if region_actual not in correlation_dict[model][period_name]:
                    correlation_dict[model][period_name][region_actual] = {}
                correlation_dict[model][period_name][region_actual][var] = corr

In [None]:
# Loop over variables and save correlation values for each
for var in all_vars:
    combined_data = []

    for model in model_groups['ssp']:
        for period in ['2025_2050', '2051_2075', '2076_2100']:
            for region, value_dict in correlation_dict[model][period].items():
                if var in value_dict:
                    combined_data.append({
                        'region': region,
                        'scenario': model,
                        'period': period,
                        'correlation': value_dict[var]
                    })

    # Create DataFrame and save to CSV
    df_combined = pd.DataFrame(combined_data)
    out_path = f'/home/users/clelland/Model/Analysis/Summary stats/Correlations/{var}/correlations_{var}_spearman.csv'
    #df_combined.to_csv(out_path, index=False)

### Plots

In [None]:
# Define your variables and time periods
climate_vars = ['rh', 'tp', 'rlds', 'rsds', 'wsp', 't2m', 'mx2t', 'mn2t']
fwi_vars = ['BUI', 'DC', 'DMC', 'FFMC', 'FWI', 'ISI']
all_vars = climate_vars + fwi_vars

# Provide region list as tuples: (region_code, region_model_code)
region_pairs = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr'), ('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor'), ('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')]

selected_ecoregions['region'] = [pair[0] for pair in region_pairs]

correlation_data = {}
for var in all_vars:
    csv_path = f'/home/users/clelland/Model/Analysis/Summary stats/Correlations/{var}/correlations_{var}_spearman.csv'
    correlation_data[var] = pd.read_csv(csv_path)

# Loop through models, periods, and variables to filter the relevant data
#for model in model_groups['ssp']:
for model in['ACCESS_SSP126']:
    #for period in ['2025_2050', '2051_2075', '2076_2100']:
    for period in ['2025_2050']:
        #for var in all_vars:
        for var in ['rh']:
            df_var = correlation_data[var]
            df_corr = df_var[
                (df_var['scenario'] == model) &
                (df_var['period'] == period)
            ][['region', 'correlation']]

            # Join to shapefile (must match by region name)
            plot_gdf = selected_ecoregions.merge(df_corr, left_on='region', right_on='region', how='left')  # adjust 'region_col'

            fig, ax = plt.subplots(figsize=(10, 10))
            minx, miny, maxx, maxy = plot_gdf.total_bounds
            buffer = 300000

            ax.set_xlim(minx-buffer, maxx+buffer)
            ax.set_ylim(miny-buffer, maxy+buffer)
            ax.add_patch(plt.Rectangle((minx - buffer, miny - buffer), (maxx - minx) + 2*buffer, (maxy - miny) + 2*buffer, 
                                       facecolor='#f8fcff', zorder=0))
            gdf_world.plot(ax=ax, color='white', edgecolor='black', linewidth=0.1)

            norm = Normalize(vmin=-1, vmax=1)
            plot_gdf.plot(
                column='correlation',
                cmap='coolwarm',
                edgecolor='black',
                linewidth=0.2,
                norm=norm,
                ax=ax
            )

            sm = plt.cm.ScalarMappable(cmap='coolwarm', norm=norm)
            sm._A = []
            cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.01)
            cbar.set_label('Correlation')

            plt.title(f'Spearman Correlation of {var} with Burned Area\n{model}, {period}', fontsize=14)
            plt.axis('off')
            plt.tight_layout()

            out_path = f'/home/users/clelland/Model/Analysis/Summary stats/Correlations/{var}/corr_spearman_{var}_{model}_{period}.png'
            #plt.savefig(out_path, dpi=300)
            plt.show()
            plt.close()

### Table

In [None]:
# Define your variables and time periods
climate_vars = ['rh', 'tp', 'rlds', 'rsds', 'wsp', 't2m', 'mx2t', 'mn2t']
fwi_vars = ['BUI', 'DC', 'DMC', 'FFMC', 'FWI', 'ISI']
all_vars = climate_vars + fwi_vars

# Grouped ecoregions
nabor  = ['alaspen', 'centcan', 'cookinl', 'copppla', 'eastcan', 'eashti', 'inteala', 'mid-bor', 
          'midwcan', 'musklak', 'nortcan', 'southud', 'watshig', 'nortcor', 'nortter']
eubor  = ['eastsib', 'icelbor', 'kamcmea', 'kamctai', 'nesibta', 'okhotai', 
          'sakhisl', 'trancon', 'westsib', 'scanand', 'uralmon']
tundra  = ['ahkland', 'berilow', 'brooran', 'kalanun', 'pacicoa', 'novoisl', 'wranisl', 'alaseli', 'arctcoa', 'arctfoo',
               'beriupl', 'canalow', 'davihig', 'canahig', 'inteyuk', 'canamid', 'ogilalp', 'tornmou', 'kalste', 'russarc', 'russber', 'chermou',
               'chukpen', 'kolapen', 'nortsib', 'nortrus', 'scanmon', 'taimsib', 'tranbal', 'yamatun', 'kamctun']

# Combine all groups
all_groups = [('nabor', nabor), ('eubor', eubor), ('tundra', tundra)]

correlation_data = {}
for var in all_vars:
    csv_path = f'/home/users/clelland/Model/Analysis/Summary stats/Correlations/{var}/correlations_{var}_spearman.csv'
    correlation_data[var] = pd.read_csv(csv_path)

# Output containers
group_means_by_var = {}  # Stores a dict of DataFrames for each variable
all_results = []         # List of rows to build final combined DataFrame

# Loop over variables
for var, df in correlation_data.items():
    group_dfs = {}  # Temporary dict to store per-group results for this variable

    for group_name, region_list in all_groups:
        # Filter DataFrame to just regions in the group
        group_df = df[df['region'].isin(region_list)]

        # Group by scenario and period, then take mean correlation
        #grouped_mean = group_df.groupby(['scenario', 'period'])['correlation'].mean().reset_index()
        grouped_mean = group_df.groupby(['scenario'])['correlation'].mean().reset_index()
        grouped_mean['group'] = group_name
        grouped_mean['variable'] = var

        # Store result
        group_dfs[group_name] = grouped_mean
        all_results.append(grouped_mean)

    # Save per-variable grouped DataFrames
    group_means_by_var[var] = pd.concat(group_dfs.values(), ignore_index=True)

# Combine all results into one big DataFrame
combined_df = pd.concat(all_results, ignore_index=True)

# Now you have:
# - group_means_by_var['pr']['nabor'] for example
# - combined_df for all in one place

In [None]:
combined_df