# BIOCLIM Mapping – Notebook Guide

This notebook maps model predictions across geographic extents.

## What this notebook does
- Load predictor rasters and trained model outputs
- Generate spatial prediction maps
- Post-process and export map layers

## Inputs
- Predictor rasters, trained model, and thresholds
- Optional: masks, study area extent, CRS, and resolution settings

## Workflow
1. Load rasters and model
2. Predict scores across grid
3. Apply thresholds/masks and smoothing if needed
4. Save map layers and figures

## Outputs
- Raster layers of predictions
- Summary maps/figures
- Exported files for GIS use

## Notes
- Ensure CRS/resolution alignment
- Be mindful of memory when tiling large rasters


# Notebook Overview

This notebook maps BIOCLIM predictions to geospatial outputs. It loads fitted BIOCLIM models and variable stacks, then generates raster maps and visualizations.

- Key steps: load model and rasters, predict across grid, postprocess thresholds, plot/export maps
- Inputs: trained BIOCLIM model, aligned raster stack
- Outputs: prediction rasters, thresholded presence/absence maps, figures
- Run order: After BIOCLIM training/evaluation.


In [None]:
# =============================================================================
# BIOCLIMATIC VARIABLES MAPPING AND VISUALIZATION
# =============================================================================

# Purpose
# - Create publication-quality maps of historical vs. future BIOCLIM variables
# - Visualize per-variable differences to highlight spatial change
# - Export figures for reporting/GIS use
#
# Requirements
# - Predictor rasters must be spatially aligned (same CRS, resolution, extent)
# - Rasters follow the naming scheme used later in this notebook
# - Variables `models`, `specie`, `region`, `savefig` are defined upstream
#
# Notes
# - Large rasters: consider tiling/chunked reads if memory is constrained
# - Colormaps are chosen to reflect variable types (temp vs precip) and differences

import os  # File system operations
import matplotlib.pyplot as plt  # Plotting and visualization
import rioxarray  # Raster I/O for xarray
import geopandas as gpd  # Geospatial data handling
import numpy as np  # Numerical operations
import pandas as pd  # Data analysis and tables

############### CONFIGURATION - MODIFY AS NEEDED ###############

# specie = 'leptocybe-invasa'  # Target species; example values shown
# region = region_train        # Study region identifier (string)
# training = False             # Whether this refers to training-region data
# savefig = True               # If True, figures are written to `figs_path`

###########################################################

In [None]:
# Paths
# - `figs_path`: where figures are saved if `savefig` is True
# - `out_path`: root output directory for the current `specie`
# - `input_path`: location of input rasters used for plotting
figs_path = os.path.join(os.path.dirname(os.getcwd()), 'figs')
out_path = os.path.join(os.path.dirname(os.getcwd()), 'out', specie)
input_path = os.path.join(out_path, 'input')


In [None]:
# =============================================================================
# BIOCLIMATIC VARIABLE SELECTION
# =============================================================================

# Choose which WorldClim bioclimatic variables to visualize.
# Convention: 1–11 are temperature-related, 12–19 are precipitation-related.
# Adjust `bioclim_plot` to a subset (e.g., [1, 5, 12]) to limit output.
bioclim_plot = [i for i in range(1, 20)]

# Human-readable names for figure titles and legend context.
bioclim_names = {
    1: 'Annual Mean Temperature',
    2: 'Mean Diurnal Range',
    3: 'Isothermality (×100)',
    4: 'Temperature Seasonality (standard deviation ×100)',
    5: 'Max Temperature of Warmest Month',
    6: 'Min Temperature of Coldest Month',
    7: 'Temperature Annual Range',
    8: 'Mean Temperature of Wettest Quarter',
    9: 'Mean Temperature of Driest Quarter',
    10: 'Mean Temperature of Warmest Quarter',
    11: 'Mean Temperature of Coldest Quarter',
    12: 'Annual Precipitation',
    13: 'Precipitation of Wettest Month',
    14: 'Precipitation of Driest Month',
    15: 'Precipitation Seasonality',
    16: 'Precipitation of Wettest Quarter',
    17: 'Precipitation of Driest Quarter',
    18: 'Precipitation of Warmest Quarter',
    19: 'Precipitation of Coldest Quarter'
}

# Figure layout: one row per variable; three columns for Hist, Future, Difference
nrows = len(bioclim_plot)
ncols = 3
figsize = (18, 4 * nrows)

# Initialize list to collect all statistics across all models
all_stats_list = []

# REQUIRE: `models` is an iterable of model prefixes that match input filenames
for model_prefix in models:
    print(f"\nProcessing model: {model_prefix}")
    # Shared axis and constrained layout to align panels and colorbars
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, sharex=True, sharey=True, constrained_layout=True)
    
    for idx, bc_no in enumerate(bioclim_plot):
        # Construct file paths using convention: {model}_bio_{N}_{region}[,_future].tif
        hist_file = os.path.join(input_path, f'{model_prefix}_bio_{bc_no}_{region}.tif')
        future_file = os.path.join(input_path, f'{model_prefix}_bio_{bc_no}_{region}_future.tif')
    
        # Read rasters (masked=True ensures nodata is handled for plotting/ops)
        hist_ds = rioxarray.open_rasterio(hist_file, masked=True)
        future_ds = rioxarray.open_rasterio(future_file, masked=True)
    
        # Select colormap by variable group: temperatures vs precipitation vs others
        if bc_no < 12:
            cmap = plt.cm.Spectral_r
        elif bc_no == 12:
            cmap = plt.cm.BuGn
        else:
            cmap = plt.cm.BrBG
    
        # Column 1: Historical
        raster_data_hist = hist_ds.isel()
        pcol_hist = raster_data_hist.plot(
            ax=ax[idx, 0],
            cmap=cmap,
            add_colorbar=False,
            add_labels=False,
        )
        ax[idx, 0].set_title(f'BIO {bc_no}: {bioclim_names[bc_no]} - Hist')
    
        # Column 2: Future
        raster_data_future = future_ds.isel()
        pcol_future = raster_data_future.plot(
            ax=ax[idx, 1],
            cmap=cmap,
            add_colorbar=False,
            add_labels=False,
        )
        ax[idx, 1].set_title(f'BIO {bc_no}: {bioclim_names[bc_no]} - Future')
    
        # Column 3: Difference (Future − Hist)
        difference = future_ds - hist_ds

        # Diverging map: negatives blue, positives red, centered at 0
        from matplotlib.colors import TwoSlopeNorm
        absmax = float(np.nanmax(np.abs(difference.values)))
        norm = TwoSlopeNorm(vmin=-absmax, vcenter=0.0, vmax=absmax)
        pcol_diff = difference.plot(
            ax=ax[idx, 2],
            cmap='seismic',
            norm=norm,
            add_colorbar=False,
            add_labels=False,
        )
        ax[idx, 2].set_title(f'BIO {bc_no}: {bioclim_names[bc_no]} - Difference')
    
        # Colorbars: vertical for Difference, shared horizontal for Hist/Future
        # More detailed colorbar for difference with more ticks
        cbar = fig.colorbar(pcol_diff, ax=ax[idx, 2], orientation='vertical', fraction=0.075, pad=0.01)
        cbar.ax.tick_params(labelsize=10)
        # Set more detailed tick locations
        vmin, vmax = -absmax, absmax
        num_ticks = 11  # More ticks for detailed range
        ticks = np.linspace(vmin, vmax, num_ticks)
        cbar.set_ticks(ticks)
        cbar.set_ticklabels([f'{tick:.2f}' for tick in ticks])
        
        # More detailed colorbar for historical/future
        cbar = fig.colorbar(pcol_hist, ax=[ax[idx, 0], ax[idx, 1]], orientation='horizontal', fraction=0.05, pad=0.1)
        cbar.ax.tick_params(labelsize=8)
        # Set more detailed tick locations for hist/future
        hist_vmin = float(np.nanmin(raster_data_hist.values))
        hist_vmax = float(np.nanmax(raster_data_hist.values))
        num_ticks_hist = 9  # More ticks for detailed range
        ticks_hist = np.linspace(hist_vmin, hist_vmax, num_ticks_hist)
        cbar.set_ticks(ticks_hist)
        cbar.set_ticklabels([f'{tick:.2f}' for tick in ticks_hist])
    
    # Save one figure per model prefix (all BIO variables stacked vertically)
    if savefig:
        file_path = os.path.join(figs_path, '07_Bioclim_%s_%s_%s.png' % (model_prefix, specie, region))   
        fig.savefig(file_path, transparent=True)
    
    # =============================================================================
    # BIOCLIM STATISTICAL ANALYSIS TABLE
    # =============================================================================
    # Generate summary statistics table comparing Historical vs Future for each variable
    stats_list = []
    
    for bc_no in bioclim_plot:
        # Construct file paths
        hist_file = os.path.join(input_path, f'{model_prefix}_bio_{bc_no}_{region}.tif')
        future_file = os.path.join(input_path, f'{model_prefix}_bio_{bc_no}_{region}_future.tif')
        
        # Debug: print file paths for first BIO variable only
        if bc_no == bioclim_plot[0]:
            print(f"  Checking files for model {model_prefix}:")
            print(f"    Historical: {hist_file}")
            print(f"    Future: {future_file}")
        
        # Check if files exist
        if not os.path.exists(hist_file):
            print(f"WARNING: File not found: {hist_file}")
            continue
        if not os.path.exists(future_file):
            print(f"WARNING: File not found: {future_file}")
            continue
        
        # Read rasters
        try:
            hist_ds = rioxarray.open_rasterio(hist_file, masked=True)
            future_ds = rioxarray.open_rasterio(future_file, masked=True)
        except Exception as e:
            print(f"ERROR reading files for BIO {bc_no}: {e}")
            continue
        
        # Calculate statistics for Historical
        # Load data into memory to ensure fresh data for each model
        hist_values = hist_ds.load().values.flatten()  # .load() forces data into memory
        hist_values = hist_values[~np.isnan(hist_values)]
        if len(hist_values) == 0:
            print(f"WARNING: No valid data in historical file for BIO {bc_no}")
            continue
        hist_mean = float(np.mean(hist_values))
        
        # Calculate statistics for Future
        future_values = future_ds.load().values.flatten()  # .load() forces data into memory
        future_values = future_values[~np.isnan(future_values)]
        if len(future_values) == 0:
            print(f"WARNING: No valid data in future file for BIO {bc_no}")
            continue
        future_mean = float(np.mean(future_values))
        
        # Calculate difference (Future - Historical)
        diff_mean = future_mean - hist_mean
        
        # Calculate percentage change
        pct_change_mean = (diff_mean / hist_mean * 100) if hist_mean != 0 else np.nan
        
        # Debug: print values for first BIO variable only
        if bc_no == bioclim_plot[0]:
            print(f"  Model {model_prefix}, BIO {bc_no}:")
            print(f"    Files: {os.path.basename(hist_file)} | {os.path.basename(future_file)}")
            print(f"    Hist_Mean={hist_mean:.2f}, Future_Mean={future_mean:.2f}, Diff_Mean={diff_mean:.2f}")
        
        # Store statistics (only Diff_Mean and Pct_change_mean) with model name
        stats_list.append({
            'Model': model_prefix,
            'BIO': bc_no,
            'Variable': bioclim_names[bc_no],
            'Diff_Mean': round(diff_mean, 2),
            'Pct_Change_Mean': round(pct_change_mean, 2) if not np.isnan(pct_change_mean) else np.nan
        })
        
        # Also add to combined list
        all_stats_list.append({
            'Model': model_prefix,
            'BIO': bc_no,
            'Variable': bioclim_names[bc_no],
            'Diff_Mean': round(diff_mean, 2),
            'Pct_Change_Mean': round(pct_change_mean, 2) if not np.isnan(pct_change_mean) else np.nan
        })
    
    # Create DataFrame for this model
    if stats_list:
        stats_df = pd.DataFrame(stats_list)
        
        # Display the table
        print(f"\n{'='*80}")
        print(f"BIOCLIM STATISTICAL ANALYSIS: {model_prefix} - {specie} - {region}")
        print(f"{'='*80}\n")
        print(stats_df.to_string(index=False))
        print(f"\n{'='*80}\n")
        
        # Save table to CSV if savefig is True
        if savefig:
            csv_path = os.path.join(figs_path, f'07_Bioclim_stats_{model_prefix}_{specie}_{region}.csv')
            stats_df.to_csv(csv_path, index=False)
            print(f"Statistics table saved to: {csv_path}\n")
    else:
        print(f"WARNING: No statistics calculated for model {model_prefix}")

# =============================================================================
# COMBINED STATISTICAL ANALYSIS TABLE (ALL MODELS)
# =============================================================================
# Create combined DataFrame with all models
if all_stats_list:
    combined_stats_df = pd.DataFrame(all_stats_list)
    
    # Display combined table
    print(f"\n{'='*80}")
    print(f"COMBINED BIOCLIM STATISTICAL ANALYSIS (ALL MODELS): {specie} - {region}")
    print(f"{'='*80}\n")
    print(combined_stats_df.to_string(index=False))
    print(f"\n{'='*80}\n")
    
    # Save combined table to CSV if savefig is True
    if savefig:
        combined_csv_path = os.path.join(figs_path, f'07_Bioclim_stats_ALL_MODELS_{specie}_{region}.csv')
        combined_stats_df.to_csv(combined_csv_path, index=False)
        print(f"Combined statistics table saved to: {combined_csv_path}\n")
else:
    print("\nWARNING: No statistics collected from any models. Check file paths and model names.")