Notebook which tests across ecoregions for normality using the Shapiro-Wilk test, conducts a Mann-Kendall trend analysis, makes seasonality heatmaps (use only for normally-distributed data), moving window monthly boxplots, finds the annual IQR and the max month of burn.

Ecoregions are grouped by North America boreal, Eurasia boreal, and tundra (all).

Edit as necessary

In [None]:
import geopandas as gpd
import os
import pandas as pd
import numpy as np
import pymannkendall as mk
from scipy.stats import shapiro
import warnings
import matplotlib.pyplot as plt
import seaborn as sns

## Grouped regions

In [None]:
# Load shapefile
shp_path = '/home/users/clelland/Model/Analysis/RESOLVE shapefile from GEE/resolve_shapefile_from_gee.shp'
gdf = gpd.read_file(shp_path)
gdf = gdf.to_crs(epsg=6931)
selected_ecoregions = gdf[gdf['BIOME_NAME'].isin(['Boreal Forests/Taiga', 'Tundra'])].reset_index(drop=True)

world_path = '/home/users/clelland/Model/Analysis/Countries shapefile/world-administrative-boundaries.shp'
gdf_world = gpd.read_file(world_path)
gdf_world = gdf_world.to_crs(epsg=6931)

# List of region names — must match order of selected_ecoregions
region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr'), ('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor'), ('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')]

# Model/scenario combinations
models = ['access 126', 'access 245', 'access 370', 'mri 126', 'mri 245', 'mri 370']
model_labels = {
    'access 126': 'ACCESS_SSP126',
    'access 245': 'ACCESS_SSP245',
    'access 370': 'ACCESS_SSP370',
    'mri 126': 'MRI_SSP126',
    'mri 245': 'MRI_SSP245',
    'mri 370': 'MRI_SSP370'
}

# Time periods for projections
periods = {
    '2025_2050': ('2025', '2050'),
    '2051_2075': ('2051', '2075'),
    '2076_2100': ('2076', '2100')
}

### Shapiro-Wilk test

In [None]:
# Ignore large N warnings
warnings.filterwarnings("ignore", message="scipy.stats.shapiro: For N > 5000, computed p-value may not be accurate.")

for region_actual, region_model in region_mappings:
    actual_path = '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv'
    model_path = f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv'
    
    if not os.path.exists(model_path):
        print(f"Missing model file for region: {region_model}")
        continue

    try:
        df_actual = pd.read_csv(actual_path, parse_dates=['date'], index_col='date')
        df_model = pd.read_csv(model_path, parse_dates=['time'], index_col='time')

        print(f"\nRegion: {region_actual} / {region_model}")

        # Run Shapiro-Wilk test for all columns in actual data
        if region_actual in df_actual.columns:
            data = df_actual[region_actual].dropna()
            stat, p = shapiro(data)
            print(f"  Actual - {region_actual}: W={stat:.4f}, p={p:.4g} → {'Normal' if p > 0.05 else 'Not normal'}")
        else:
            for col in df_actual.columns:
                data = df_actual[col].dropna()
                if len(data) > 3:
                    stat, p = shapiro(data)
                    print(f"  Actual - {col}: W={stat:.4f}, p={p:.4g} → {'Normal' if p > 0.05 else 'Not normal'}")

        # Run Shapiro-Wilk test for all columns in model data
        for col in df_model.columns:
            data = df_model[col].dropna()
            if len(data) > 3:
                stat, p = shapiro(data)
                print(f"  Model  - {col}: W={stat:.4f}, p={p:.4g} → {'Normal' if p > 0.05 else 'Not normal'}")

    except Exception as e:
        print(f"Error processing region {region_actual}: {e}")
        continue

### Mann-Kendall trend analysis

In [None]:
# Mann-Kendall trend analysis - whole time period
#region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
#               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
#               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
#region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
#               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
#               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
#               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
#               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
#               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

# Step 1: Load all region files into a list of DataFrames
models = ['ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
          'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370']

# Placeholder for BA data
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual data
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load modeled data
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Resample to annual and apply scaling
        df_model_annual = df_model.resample('YE').sum().astype(float)

        # Resample actual to annual
        df_actual_annual = df_actual.resample('YE').sum()

        # Combine both
        df_ba = pd.concat([df_actual_annual, df_model_annual], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# Combine all regions and sum across them
df_ba_all = pd.concat(ba_dfs, axis=0).groupby(level=0).sum(min_count=1)
df_ba_all.index = pd.to_datetime(df_ba_all.index)
df_ba_all.sort_index(inplace=True)

periods = {
    '2025-2050': (2025, 2050),
    '2051-2075': (2051, 2075),
    '2076-2100': (2076, 2100),
}

trend_records_full = []

for model in models:
    if model not in df_ba_all.columns:
        continue

    df_model = df_ba_all[[model]].copy()
    df_model['year'] = df_model.index.year
    df_annual = df_model.groupby('year')[model].sum().reset_index()

    if len(df_annual) < 10:
        print(f"Skipping {model}: insufficient data")
        continue

    result = mk.original_test(df_annual[model].values)

    trend_records_full.append({
        'Model': model,
        'Period': '2025–2100',
        'Trend': result.trend,
        'H': result.h,
        'PValue': result.p,
        'Tau': result.Tau,
        'Slope': result.slope
    })

# Compile results
df_mk_full = pd.DataFrame(trend_records_full)
df_mk_full = df_mk_full[['Model', 'Period', 'Trend', 'H', 'PValue', 'Tau', 'Slope']]

# Save or display
#df_mk_full.to_csv('/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/mk_trends_2025_2100_full.csv', index=False)
df_mk_full

### Yearly/decadal heatmaps - ONLY USE FOR NORMALLY-DISTRIBUTED DATA

In [None]:
# Seasonality by year heatmap
#region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
#               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
#               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
#region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
#               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
#               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
#               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
#               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
#               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

# List to store individual region DataFrames
ba_dfs = []

for region, region_model in region_mappings:
    root = f'/home/users/clelland/Model/Analysis/CMIP and FWI time series/Ecoregion CSVs/{region}'

    try:
        # Load actual burned area
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load modeled burned area
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Combine actual and model
        df_ba = pd.concat([df_actual, df_model], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# Combine and sum across all regions by timestamp
df_ba_all = pd.concat(ba_dfs, axis=0).groupby(level=0).sum(min_count=1)
df_ba_all.index = pd.to_datetime(df_ba_all.index)
df_ba_all.sort_index(inplace=True)

# Create heatmaps for each model
model_columns = [col for col in df_ba_all.columns if col.startswith('Model')]
heatmap_data_by_model = {}

for burn_column in model_columns:
    df_season = df_ba_all.copy().reset_index().rename(columns={'index': 'time'})
    df_season['time'] = pd.to_datetime(df_season['time'])
    df_season.set_index('time', inplace=True)
    df_season = df_season.loc['2025-01-01':]

    df_season['year'] = df_season.index.year
    df_season['month'] = df_season.index.month

    # Total burn per year
    yearly_totals = df_season.groupby(['year'])[burn_column].sum()

    # Monthly burn
    monthly_burn = df_season.groupby(['year', 'month'])[burn_column].sum()

    # Convert to DataFrame for easier handling
    monthly_burn_df = monthly_burn.reset_index()
    monthly_burn_df['yearly_total'] = monthly_burn_df['year'].map(yearly_totals)

    # Compute percent
    monthly_burn_df['percent'] = monthly_burn_df[burn_column] / monthly_burn_df['yearly_total'] * 100

    # Add date for plotting
    monthly_burn_df['date'] = pd.to_datetime(dict(year=monthly_burn_df['year'],
                                                  month=monthly_burn_df['month'],
                                                  day=15))
    monthly_burn_df.set_index('date', inplace=True)

    # Pivot the data to have years as columns, months as rows
    heatmap_data = monthly_burn_df.pivot(index='month', columns='year', values='percent')
    heatmap_data_by_model[burn_column] = heatmap_data

# Plot the heatmaps
for model in model_columns:
    heatmap_data = heatmap_data_by_model[model]
    plt.figure(figsize=(16, 6))
    sns.heatmap(heatmap_data, cmap='YlOrRd', linewidths=0.5, linecolor='grey', annot=False, fmt=".1f",
                cbar_kws={'label': '% of Yearly Burn'})
    
    plt.yticks(ticks=np.arange(12) + 0.5, labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                                                  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], rotation=0)
    plt.xlabel('Year')
    plt.ylabel('Month')
    plt.title(f'Monthly Contribution to Yearly Burn (2025–2100) for Tundra ecoregions - {model}')
    plt.tight_layout()
    out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/Seasonality by year/ba_seasonal_heatmap_tundra_{model}.png'
    #plt.savefig(out_path, dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
# Seasonality by decade heatmap
# Can't use mean and standard deviation as data are not normally distributed
#region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
#               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
#               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
#region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
#               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
#               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
#               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
#               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
#               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

models = ['ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
          'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370']

# Step 1: Load all region files into a list of DataFrames
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual burned area
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load modeled burned area
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Combine actual and model
        df_ba = pd.concat([df_actual, df_model], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# Step 2: Combine all DataFrames by aligning on time index
df_ba_all = pd.concat(ba_dfs, axis=1)

# Step 3: Sum each model's columns across regions
df_ba = pd.DataFrame(index=df_ba_all.index)

for model in models:
    # Get all columns corresponding to this model from different regions
    model_cols = [col for col in df_ba_all.columns if col == model]
    if not model_cols:
        continue
    df_ba[model] = df_ba_all[model_cols].sum(axis=1)

# Step 4: Now plot monthly mean ± std dev per decade for each model
for model in models:
    if model not in df_ba.columns:
        continue

    burn_column = model

    df_season = df_ba[[burn_column]].copy().reset_index().rename(columns={'index':'time'})
    df_season['time'] = pd.to_datetime(df_season['time'])
    df_season.set_index('time', inplace=True)
    df_season = df_season.loc['2025-01-01':]

    df_season['year'] = df_season.index.year
    df_season['month'] = df_season.index.month
    df_season['decade'] = ((df_season['year'] - 1) // 10) * 10 + 1

    # Group by month and year for each decade
    monthly_grouped = df_season.groupby(['decade', 'year', 'month'])[burn_column].sum()
    yearly_grouped = df_season.groupby(['decade', 'year'])[burn_column].sum()

    monthly_percent = monthly_grouped / yearly_grouped.loc[monthly_grouped.index.droplevel('month')] * 100

    mean_percent = monthly_percent.groupby(['decade', 'month']).mean()
    std_percent = monthly_percent.groupby(['decade', 'month']).std()

    # Merge into one DataFrame
    mean_df = mean_percent.reset_index()
    std_df = std_percent.reset_index()
    combined_df = pd.merge(mean_df, std_df, on=['decade', 'month'], suffixes=('_mean', '_std'))

    # Pivot for plotting
    mean_pivot = combined_df.pivot(index='month', columns='decade', values=f'{burn_column}_mean')
    std_pivot = combined_df.pivot(index='month', columns='decade', values=f'{burn_column}_std')

    formatted_columns = [f"{int(dec) - 1}s" for dec in mean_pivot.columns]
    mean_pivot.columns = formatted_columns
    std_pivot.columns = formatted_columns

    # Round mean and std for formatting
    mean_rounded = mean_pivot.round(1)
    std_rounded = std_pivot.round(1)
    
    # Create annotation DataFrame with "mean ± std", but use "0" if both are 0
    annot = mean_rounded.astype(str) + " ± " + std_rounded.astype(str)
    annot[(mean_rounded == 0) & (std_rounded == 0)] = "0"

    # Plot
    plt.figure(figsize=(12, 8))
    sns.heatmap(mean_pivot, cmap='YlOrRd', annot=annot, fmt='', linewidths=0.5, linecolor='grey',
                cbar_kws={'label': 'Mean % of Yearly Burn'})

    plt.yticks(ticks=np.arange(12) + 0.5, labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                                                  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], rotation=0)
    plt.xlabel('Decade')
    plt.ylabel('Month')
    plt.title(f'Monthly Mean ± Std Dev Burn (% of Yearly) by Decade\nEurasia boreal ecoregions - {model}')
    plt.tight_layout()
    out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/Seasonality by decade/ba_seasonal_decade_with_sd_EUbor_{model}.png'
    #plt.savefig(out_path, dpi=300, bbox_inches='tight')
    plt.show()

### Moving window monthly boxplots

In [None]:
# Moving window monthly boxplots - NOT WINTER
region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
#region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
#               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
#               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
#region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
#               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
#               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
#               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
#               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
#               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

models = ['ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
          'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370']

# Step 1: Load all region files into a list of DataFrames
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual burned area
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load modeled burned area
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Combine actual and model
        df_ba = pd.concat([df_actual, df_model], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# Step 2: Combine all DataFrames by aligning on time index
df_ba_all = pd.concat(ba_dfs, axis=1)

# Step 3: Sum each model's columns across regions
df_ba = pd.DataFrame(index=df_ba_all.index)

for model in models:
    # Get all columns corresponding to this model from different regions
    model_cols = [col for col in df_ba_all.columns if col == model]
    if not model_cols:
        continue
    df_ba[model] = df_ba_all[model_cols].sum(axis=1)

window_size = 30

for model in models:
#for model in ['ACCESS_SSP126']:
    if model not in df_ba.columns:
        continue

    df_season = df_ba[[model]].copy().reset_index().rename(columns={'index': 'time'})
    df_season['time'] = pd.to_datetime(df_season['time'])
    df_season.set_index('time', inplace=True)
    df_season = df_season.loc['2025-01-01':]

    df_season['year'] = df_season.index.year
    df_season['month'] = df_season.index.month

    # Group by year and month
    monthly_grouped = df_season.groupby(['year', 'month'])[model].sum()
    yearly_grouped = df_season.groupby('year')[model].sum()

    # Normalize by year total to get percentage
    monthly_percent = monthly_grouped / monthly_grouped.index.get_level_values('year').map(yearly_grouped) * 100
    monthly_percent = pd.DataFrame({model: monthly_percent}).reset_index()

    # Months to include (March to November)
    months_to_plot = list(range(3, 12))
    
    fig, axes = plt.subplots(3, 3, figsize=(15, 10), sharey=True, sharex=True)
    fig.suptitle(f'Seasonality Boxplots (30-yr Moving Window)\nNorth America boreal ecoregions - {model}', fontsize=16)
    axes = axes.flatten()
    
    month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                   'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

    for idx, month in enumerate(months_to_plot):
        ax = axes[idx]
        data_month = monthly_percent[monthly_percent['month'] == month].copy()
    
        # Build sliding windows
        box_data = []
        window_labels = []
        for start_year in range(data_month['year'].min(), data_month['year'].max() - window_size + 1, 5): # 5-yearly
            window = data_month[(data_month['year'] >= start_year) & (data_month['year'] < start_year + window_size)]
            if len(window) == window_size:
                box_data.append(window[model].values)
                window_labels.append(f"{start_year}-{start_year + window_size - 1}")

        # Boxplot with transparent boxes and blue median line
        box = ax.boxplot(box_data, labels=window_labels, patch_artist=True,
                         flierprops={'marker': 'o', 'markersize': 5})

        for patch in box['boxes']:
            patch.set(facecolor='none', edgecolor='black', linewidth=1)

        for median in box['medians']:
            median.set(color='blue', linewidth=2)

    
        ax.set_title(month_names[month - 1], fontsize=14)
        ax.set_ylabel('% of Annual Burn' if idx % 3 == 0 else '', fontsize=14)
        ax.set_ylim(-5, 100)
        
        # Set xticks every 5 labels only
        xticks = np.arange(1, len(box_data) + 1)
        ax.set_xticks(xticks)
        ax.set_xticklabels(window_labels, rotation=45, ha='right', fontsize=13)
    
    # Remove any unused axes (e.g., if fewer than 9 months due to data constraints)
    for ax in axes[len(months_to_plot):]:
        fig.delaxes(ax)
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/Moving window boxplots/ba_seasonal_boxplot_NAbor_{model}.png'
    #plt.savefig(out_path, dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
# Find medians
window_size = 30
step = 5
months_to_include = list(range(3, 12))  # March–November
summary_medians = []

for model in models:
    if model not in df_ba.columns:
        continue

    df_model = df_ba[[model]].copy()
    df_model = df_model.loc['2025-01-01':]
    df_model['year'] = df_model.index.year
    df_model['month'] = df_model.index.month

    # Group by year and month, sum monthly burn
    monthly_grouped = df_model.groupby(['year', 'month'])[model].sum()

    # Group by year to get annual totals
    yearly_totals = df_model.groupby('year')[model].sum()

    # Normalize monthly by annual total (%)
    monthly_percent = (monthly_grouped / yearly_totals).dropna() * 100

    # Convert to DataFrame with 'year' and 'month' as columns
    monthly_percent = monthly_percent.reset_index()

    # Filter to relevant months
    monthly_percent = monthly_percent[monthly_percent['month'].isin(months_to_include)]

    for month in months_to_include:
        data_month = monthly_percent[monthly_percent['month'] == month]

        for start_year in range(data_month['year'].min(), data_month['year'].max() - window_size + 1, step):
            window = data_month[(data_month['year'] >= start_year) & (data_month['year'] < start_year + window_size)]

            if len(window) == window_size:
                median_val = window[model].median()
                summary_medians.append({
                    'model': model,
                    'month': month,
                    'window_start': start_year,
                    'window_end': start_year + window_size - 1,
                    'median_percent': median_val
                })

# Create final summary DataFrame
df_medians = pd.DataFrame(summary_medians)

# Optional: Sort for readability
df_medians.sort_values(by=['model', 'month', 'window_start'], inplace=True)

# Save or inspect
#df_medians.to_csv('/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/ba_median_percent_by_model_month_window.csv', index=False)
df_medians[(df_medians['month']==5) & (df_medians['model'] == 'MRI_SSP370')]

### Annual IQR

In [None]:
# Annual IQR - Set 25-yr periods
#region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
#               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
#               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
#region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
#               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
#               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

# Step 1: Load all region files into a list of DataFrames
models = ['ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
          'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370']

# Placeholder for BA data
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual data
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load modeled data
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Resample to annual and apply scaling
        df_model_annual = df_model.resample('YE').sum().astype(float)

        # Resample actual to annual
        df_actual_annual = df_actual.resample('YE').sum()

        # Combine both
        df_ba = pd.concat([df_actual_annual, df_model_annual], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# Combine all regions and sum across them
df_ba_all = pd.concat(ba_dfs, axis=0).groupby(level=0).sum(min_count=1)
df_ba_all.index = pd.to_datetime(df_ba_all.index)
df_ba_all.sort_index(inplace=True)

# Define the 25-year periods
periods = {
    '2025–2050': (2025, 2050),
    '2051–2075': (2051, 2075),
    '2076–2100': (2076, 2100)
}

# Calculate IQR for each model and time period
iqr_records = []

for model in models:
    if model not in df_ba_all.columns:
        continue

    df_model = df_ba_all[[model]].copy()
    df_model = df_model[df_model.index.year >= 2025]
    df_model['year'] = df_model.index.year

    # Get annual sums (already annual, so just group to be safe)
    df_annual = df_model.groupby('year')[model].sum().reset_index()

    for period_name, (start_year, end_year) in periods.items():
        window = df_annual[(df_annual['year'] >= start_year) & (df_annual['year'] <= end_year)][model]
        if not window.empty:
            q1 = np.percentile(window, 25)
            q3 = np.percentile(window, 75)
            extreme = np.percentile(window, 95)
            iqr = q3 - q1
            iqr_records.append({
                'Model': model,
                'Period': period_name,
                'IQR': iqr,
                '95th percentile': extreme
            })

# Compile results into DataFrame
df_iqr = pd.DataFrame(iqr_records)
df_iqr

# Optional export
#output_path = '/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/annual_iqr_by_period.csv'
#df_iqr.to_csv(output_path, index=False)
#print(f"Annual IQR results saved to {output_path}")

In [None]:
# Annual IQR - Moving 30-yr window
#region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
#               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
#               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
#region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
#               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
#               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
#               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
#               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
#               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

# Step 1: Load all region files into a list of DataFrames
models = ['ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
          'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370']

# Placeholder for BA data
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual data
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load modeled data
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Resample to annual and apply scaling
        df_model_annual = df_model.resample('YE').sum().astype(float)

        # Resample actual to annual
        df_actual_annual = df_actual.resample('YE').sum()

        # Combine both
        df_ba = pd.concat([df_actual_annual, df_model_annual], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# Combine all regions and sum across them
df_ba_all = pd.concat(ba_dfs, axis=0).groupby(level=0).sum(min_count=1)
df_ba_all.index = pd.to_datetime(df_ba_all.index)
df_ba_all.sort_index(inplace=True)

# Define the 25-year periods
periods = {
    '2025–2050': (2025, 2050),
    '2051–2075': (2051, 2075),
    '2076–2100': (2076, 2100)
}

# Calculate IQR for each model and time period
iqr_records = []

for model in models:
    if model not in df_ba_all.columns:
        continue

    df_model = df_ba_all[[model]].copy()
    df_model = df_model[df_model.index.year >= 2025]
    df_model['year'] = df_model.index.year

    # Get annual sums (already annual, so just group to be safe)
    df_annual = df_model.groupby('year')[model].sum().reset_index()

    # Apply 30-year moving window
    for start_year in range(2025, 2071, 5):  # Last window is 2070–2099
        end_year = start_year + 29
        window = df_annual[(df_annual['year'] >= start_year) & (df_annual['year'] <= end_year)][model]

        if not window.empty and len(window) >= 10:  # Require at least 10 years of data
            q1 = np.percentile(window, 25)
            q3 = np.percentile(window, 75)
            iqr = q3 - q1
            extreme = np.percentile(window, 95)

            iqr_records.append({
                'Model': model,
                'StartYear': start_year,
                'EndYear': end_year,
                'IQR': iqr,
                '95th percentile': extreme
            })

# Compile results into DataFrame
df_iqr = pd.DataFrame(iqr_records)
df_iqr

# Optional export
#output_path = '/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/annual_iqr_by_period.csv'
#df_iqr.to_csv(output_path, index=False)
#print(f"Annual IQR results saved to {output_path}")

In [None]:
# Mann-Kendall trend analysis - 25-year periods
#region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
#               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
#               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
#region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
#               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
#               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

# Step 1: Load all region files into a list of DataFrames
models = ['ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
          'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370']

# Placeholder for BA data
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual data
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load modeled data
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Resample to annual and apply scaling
        df_model_annual = df_model.resample('YE').sum().astype(float)

        # Resample actual to annual
        df_actual_annual = df_actual.resample('YE').sum()

        # Combine both
        df_ba = pd.concat([df_actual_annual, df_model_annual], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# Combine all regions and sum across them
df_ba_all = pd.concat(ba_dfs, axis=0).groupby(level=0).sum(min_count=1)
df_ba_all.index = pd.to_datetime(df_ba_all.index)
df_ba_all.sort_index(inplace=True)

periods = {
    '2025-2050': (2025, 2050),
    '2051-2075': (2051, 2075),
    '2076-2100': (2076, 2100),
}

trend_records = []

for model in models:
    if model not in df_ba_all.columns:
        continue

    df_model = df_ba_all[[model]].copy()
    df_model['year'] = df_model.index.year
    df_annual = df_model.groupby('year')[model].sum().reset_index()

    for period_name, (start_year, end_year) in periods.items():
        df_period = df_annual[(df_annual['year'] >= start_year) & (df_annual['year'] <= end_year)]

        if len(df_period) < 10:
            print(f"Skipping {model} in {period_name}: insufficient data")
            continue

        result = mk.original_test(df_period[model].values)

        trend_records.append({
            'Model': model,
            'Period': period_name,
            'Trend': result.trend,  # 'increasing', 'decreasing', 'no trend'
            'PValue': result.p,
            'Tau': result.Tau,      # Kendall's Tau
            'Slope': result.slope,
            'H': result.h           # True if trend is significant at alpha=0.05
        })

# Compile results
df_mk = pd.DataFrame(trend_records)
df_mk = df_mk[['Model', 'Period', 'Trend', 'H', 'PValue', 'Tau', 'Slope']]

# Save or display
#df_mk.to_csv('/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/mk_trends_2025_2100_by_period.csv', index=False)
df_mk

### Moving window monthly PDFs

In [None]:
# Moving window monthly PDFs - not great
#region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
#               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
#               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
#region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
#               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
#               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
#               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
#               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
#               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

models = ['ACCESS_SSP126', 'ACCESS_SSP245', 'ACCESS_SSP370',
          'MRI_SSP126', 'MRI_SSP245', 'MRI_SSP370']

# Step 1: Load all region files into a list of DataFrames
ba_dfs = []

for region, region_model in region_mappings:
    try:
        # Load actual burned area
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load modeled burned area
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Combine actual and model
        df_ba = pd.concat([df_actual, df_model], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# Step 2: Combine all DataFrames by aligning on time index
df_ba_all = pd.concat(ba_dfs, axis=1)

# Step 3: Sum each model's columns across regions
df_ba = pd.DataFrame(index=df_ba_all.index)

for model in models:
    # Get all columns corresponding to this model from different regions
    model_cols = [col for col in df_ba_all.columns if col == model]
    if not model_cols:
        continue
    df_ba[model] = df_ba_all[model_cols].sum(axis=1)

window_size = 30

#for model in models:
for model in ['ACCESS_SSP126']:
    if model not in df_ba.columns:
        continue

    df_season = df_ba[[model]].copy().reset_index().rename(columns={'index': 'time'})
    df_season['time'] = pd.to_datetime(df_season['time'])
    df_season.set_index('time', inplace=True)
    df_season = df_season.loc['2025-01-01':]

    df_season['year'] = df_season.index.year
    df_season['month'] = df_season.index.month

    # Group by year and month
    monthly_grouped = df_season.groupby(['year', 'month'])[model].sum()
    yearly_grouped = df_season.groupby('year')[model].sum()

    # Normalize by year total to get percentage
    monthly_percent = monthly_grouped / monthly_grouped.index.get_level_values('year').map(yearly_grouped) * 100
    monthly_percent = pd.DataFrame({model: monthly_percent}).reset_index()

    fig, axes = plt.subplots(3, 4, figsize=(20, 12))
    sns.set_style("whitegrid")
    
    for month in range(1, 13):
        ax = axes[(month - 1) // 4, (month - 1) % 4]
        data_month = monthly_percent[monthly_percent['month'] == month].copy()
    
        plotted = 0  # Counter for plotted KDEs
    
        for i, start_year in enumerate(range(data_month['year'].min(), data_month['year'].max() - window_size + 1)):
            # To reduce clutter: plot every 5th window only
            if i % 5 != 0:
                continue
    
            window = data_month[
                (data_month['year'] >= start_year) & 
                (data_month['year'] < start_year + window_size)
            ]
    
            if len(window) == window_size:
                sns.kdeplot(
                    window[model].values, 
                    ax=ax, 
                    label=f"{start_year}-{start_year + window_size - 1}",
                    fill=True, 
                    alpha=0.3,
                    clip=(0, None),
                    warn_singular=False
                )
                plotted += 1
    
        ax.set_title(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                      'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][month - 1])
        ax.set_xlabel('% of Annual Burn')
        if month in [1, 5, 9]:
            ax.set_ylabel('Density')
        else:
            ax.set_ylabel('')
    
        if plotted > 0 and month == 1:
            ax.legend(loc='upper right', fontsize=8)
    
    plt.suptitle(f'Monthly Burn Distribution (KDE) in 30-Year Windows\nEurasia Boreal Ecoregions – {model}', fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    
    out_path = f'/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/KDE/ba_seasonal_kde_EUbor_{model}.png'
    #plt.savefig(out_path, dpi=300, bbox_inches='tight')
    plt.show()

### Grouped regions max month

In [None]:
region_mappings  = [('alaspen', 'alapen'), ('centcan', 'cancsh'), ('cookinl', 'cookin'), ('copppla', 'copper'), ('eastcan', 'eastcf'), ('eashti', 'eashti'),
               ('inteala', 'intlow'), ('mid-bor', 'midbor'), ('midwcan', 'midwes'), ('musklak', 'muslta'), ('nortcan', 'norths'), ('southud', 'sohudb'),
               ('watshig', 'watson'), ('nortcor', 'norcor'), ('nortter', 'nwterr')] # N America boreal
#region_mappings  = [('eastsib', 'eastsib'), ('icelbor', 'icelnd'), ('kamcmea', 'kamkurm'),
#               ('kamctai', 'kamtaig'), ('nesibta', 'nesibta'), ('okhotai', 'okhman'), ('sakhisl', 'sakhtai'), ('trancon', 'trzconf'), ('westsib', 'westsib'),
#               ('scanand', 'scrusta'), ('uralmon', 'uralfor')] # Eurasia boreal
#region_mappings  = [('ahkland', 'ahklun'), ('berilow', 'berlow'), ('brooran', 'brookr'), ('kalanun', 'kalhar'),
#               ('pacicoa', 'pacice'), ('novoisl', 'novoisl'), ('wranisl', 'wrangel'), ('alaseli', 'aleias'), ('arctcoa', 'arccoa'), ('arctfoo', 'arcfoo'),
#               ('beriupl', 'berupl'), ('canalow', 'canlow'), ('davihig', 'davish'), ('canahig', 'canhig'), ('inteyuk', 'intalp'), ('canamid', 'canmid'),
#               ('ogilalp', 'ogilvi'), ('tornmou', 'tornga'), ('kalste', 'kalste'), ('russarc', 'rusarc'), ('russber', 'rusbert'), ('chermou', 'cherski'),
#               ('chukpen', 'chukchi'), ('kolapen', 'kolapen'), ('nortsib', 'nesibco'), ('nortrus', 'nwrunz'), ('scanmon', 'scambf'), ('taimsib', 'taicens'),
#               ('tranbal', 'trzbald'), ('yamatun', 'yamalgy'), ('kamctun', 'kamtund')] # All tundra

# List to store individual region DataFrames
ba_dfs = []

for region, region_model in region_mappings:
    root = f'/home/users/clelland/Model/Analysis/CMIP and FWI time series/Ecoregion CSVs/{region}'

    try:
        # Load actual burned area
        df_actual = pd.read_csv(
            '/home/users/clelland/Model/Analysis/Fire actual 2001-2024.csv',
            parse_dates=['date'], index_col='date'
        )[[region]].rename(columns={region: 'Actual'})

        # Load modeled burned area
        df_model = pd.read_csv(
            f'/home/users/clelland/Model/Analysis/Ecoregion plots combined/area_timeseries_{region_model}_all.csv',
            parse_dates=['time'], index_col='time'
        ).rename(columns={
            'access 126': 'ACCESS_SSP126',
            'access 245': 'ACCESS_SSP245',
            'access 370': 'ACCESS_SSP370',
            'mri 126': 'MRI_SSP126',
            'mri 245': 'MRI_SSP245',
            'mri 370': 'MRI_SSP370'
        })

        # Combine actual and model
        df_ba = pd.concat([df_actual, df_model], axis=1)
        ba_dfs.append(df_ba)

    except Exception as e:
        print(f"Skipping region {region} ({region_model}) due to error: {e}")

# Combine and sum across all regions by timestamp
df_ba_all = pd.concat(ba_dfs, axis=0).groupby(level=0).sum(min_count=1)
df_ba_all.index = pd.to_datetime(df_ba_all.index)
df_ba_all.sort_index(inplace=True)

# Year range
year_range = pd.Index(range(df_ba_all.index.year.min(), df_ba_all.index.year.max() + 1))

# Get month of max BA, resolving ties using proximity to June
max_month_df = pd.DataFrame(index=year_range)

for column in df_ba_all.columns:
    grouped = df_ba_all[column].groupby(df_ba_all.index.year)

    def get_max_month(group):
        if group.isna().all() or group.sum() == 0:
            return np.nan
        max_val = group.max()
        max_months = group[group == max_val].index.month
        return max_months[np.abs(max_months - 6).argmin()]  # Closest to June

    max_month_df[column] = grouped.apply(get_max_month).reindex(year_range)

# Plot
plt.figure(figsize=(14, 6))

for column in max_month_df.columns:
    # Raw line
#    plt.plot(
#        max_month_df.index,
#        max_month_df[column],
#        linestyle='-',
#        label=f"{column} raw",
#        alpha=0.4
#    )
    # 5-year rolling mean
    rolling = max_month_df[column].rolling(window=5, min_periods=3).mean()
    plt.plot(
        rolling.index,
        rolling,
        label=column,
        linewidth=2
    )

plt.yticks(ticks=range(1, 13), labels=[
    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
])
plt.ylim(10.5, 2.5)
plt.xticks(ticks=range(2000, 2101, 5))

plt.title('5-year Rolling Mean of Month of Maximum Burned Area – Tundra ecoregions')
plt.xlabel('Year')
plt.ylabel('Month of Max Burn')
plt.grid(True)
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15),
           ncol=3, fancybox=True, shadow=True)
plt.tight_layout()
out_path = '/home/users/clelland/Model/Analysis/Summary stats/BA/Seasonality/ba_max_month_tundra.png'
#plt.savefig(out_path, dpi=300, bbox_inches='tight')
plt.show()