## Initialisation

In [None]:
import logging
import os
import re
import sys
import warnings
from collections import namedtuple
from functools import reduce
from itertools import combinations
from operator import mul

import cloudpickle
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import shap
from joblib import Memory, Parallel, delayed
from loguru import logger as loguru_logger
from matplotlib.patches import Rectangle
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from tqdm import tqdm

import wildfires.analysis
from alepython import ale_plot
from alepython.ale import _second_order_ale_quant
from wildfires.analysis import *
from wildfires.dask_cx1 import *
from wildfires.data import *
from wildfires.logging_config import enable_logging
from wildfires.qstat import get_ncpus
from wildfires.utils import *

loguru_logger.enable("alepython")
loguru_logger.remove()
loguru_logger.add(sys.stderr, level="WARNING")

logger = logging.getLogger(__name__)

enable_logging("jupyter")

warnings.filterwarnings("ignore", ".*Collapsing a non-contiguous coordinate.*")
warnings.filterwarnings("ignore", ".*DEFAULT_SPHERICAL_EARTH_RADIUS*")
warnings.filterwarnings("ignore", ".*guessing contiguous bounds*")

normal_coast_linewidth = 0.5
mpl.rc("figure", figsize=(14, 6))
mpl.rc("font", size=9.0)

save_name = "analysis_variable_diagnostics"

figure_saver = FigureSaver(directories=os.path.join("~", "tmp", save_name), debug=True,)
memory = get_memory(save_name, verbose=100)
CACHE_DIR = os.path.join(DATA_DIR, ".pickle", save_name)

### Load the customized `get_data()` function for this experiment.

In [None]:
from get_lags_rf_cross_val_data import get_data

## Creating the Data Structures used for Fitting

In [None]:
shift_months = [1, 3, 6, 9, 12, 18, 24]

(
    e_s_endog_data,
    e_s_exog_data,
    e_s_master_mask,
    e_s_filled_datasets,
    e_s_masked_datasets,
    e_s_land_mask,
) = get_data(shift_months=shift_months, selection_variables=None)

### Offset data from 12 or more months before the current month in order to ease analysis (interpretability).

We are interested in the trends in these properties, not their absolute values, therefore we subtract a recent 'seasonal cycle' analogue.
This hopefully avoids capturing the same relationships for a variable and its 12 month counterpart due to their high correlation.

In [None]:
to_delete = []
for column in e_s_exog_data:
    match = re.search(r"-\d{1,2}", column)
    if match:
        span = match.span()
        # Change the string to reflect the shift.
        original_offset = int(column[slice(*span)])
        if original_offset > -12:
            # Only shift months that are 12 or more months before the current month.
            continue
        comp = -(-original_offset % 12)
        new_column = " ".join(
            (
                column[: span[0] - 1],
                f"{original_offset} - {comp}",
                column[span[1] + 1 :],
            )
        )
        if comp == 0:
            comp_column = column[: span[0] - 1]
        else:
            comp_column = " ".join(
                (column[: span[0] - 1], f"{comp}", column[span[1] + 1 :])
            )
        print(column, comp_column)
        e_s_exog_data[new_column] = e_s_exog_data[column] - e_s_exog_data[comp_column]
        to_delete.append(column)
for column in to_delete:
    del e_s_exog_data[column]

## Mapping

In [None]:
FAPAR_lim = 0.39
DRY_DAY_lim = 20

fapar_data = np.ma.MaskedArray(
    np.zeros_like(e_s_master_mask, dtype=np.float64),
    mask=np.ones_like(e_s_master_mask, dtype=np.float64),
)
dry_day_data = fapar_data.copy()

fapar_data[~e_s_master_mask] = e_s_exog_data["FAPAR"].values
dry_day_data[~e_s_master_mask] = e_s_exog_data["Dry Day Period"].values

combined_mask = (
    dry_day_data.mask
    | fapar_data.mask
    | (dry_day_data < DRY_DAY_lim)
    | (fapar_data < FAPAR_lim)
)
combined_mask &= match_shape(get_land_mask(), dry_day_data.shape)
combined_mask |= ~match_shape(
    box_mask(lats=(-60, 90), lons=(-180, 180)), dry_day_data.shape
)

selection = match_shape(np.any(~combined_mask, axis=0), combined_mask.shape)
new_fapar_data = fapar_data.copy()
new_fapar_data.mask = ~selection

new_fapar = dummy_lat_lon_cube(new_fapar_data)

mpl.rc("figure", figsize=(9, 3.8), dpi=100)
with figure_saver("fapar_upper_quadrant_map"):
    _ = cube_plotting(
        new_fapar,
        select_valid=True,
        title=f"FAPAR > {FAPAR_lim} & Dry Day Period > {DRY_DAY_lim}",
        label="Mean FAPAR",
        coastline_kwargs={"linewidth": 0.3},
    )

In [None]:
dry_day_data18 = np.ma.MaskedArray(
    np.zeros_like(e_s_master_mask, dtype=np.float64),
    mask=np.ones_like(e_s_master_mask, dtype=np.float64),
)
agb_tree_data = dry_day_data18.copy()

dry_day_data18[~e_s_master_mask] = e_s_exog_data["Dry Day Period -18 - -6 Month"].values
agb_tree_data[~e_s_master_mask] = e_s_exog_data["AGB Tree"].values

dry_day_lim = 22
agb_tree_lims = (0.9, 20)

combined_mask = (
    dry_day_data18.mask
    | agb_tree_data.mask
    | (dry_day_data18 < dry_day_lim)
    | (agb_tree_data < agb_tree_lims[0])
    | (agb_tree_data > agb_tree_lims[1])
)
combined_mask &= match_shape(get_land_mask(), dry_day_data.shape)
combined_mask |= ~match_shape(
    box_mask(lats=(-60, 90), lons=(-180, 180)), dry_day_data.shape
)

selection = match_shape(np.any(~combined_mask, axis=0), combined_mask.shape)
show_data = agb_tree_data.copy()
show_data.mask = ~selection

show_data = dummy_lat_lon_cube(show_data)

mpl.rc("figure", figsize=(9, 3.8), dpi=100)
with figure_saver("tree_wedge_map"):
    _ = cube_plotting(
        show_data,
        select_valid=True,
        title=f"Dry Day Period Δ18 Month > {dry_day_lim} & {agb_tree_lims[0]} < AGBTree < {agb_tree_lims[1]}",
        label="Mean AGBTree",
        coastline_kwargs={"linewidth": 0.3},
    )

## Correlation Plot

In [None]:
X_corr = e_s_exog_data
with figure_saver("corr_plot_with_sif"):
    corr_plot(X_corr[X_corr.columns[:-12]], fig_kwargs={"figsize": (12, 8)})
print("Excluded columns:", X_corr.columns[-12:])

In [None]:
with figure_saver("corr_plot_full"):
    corr_plot(X_corr[X_corr.columns], fig_kwargs={"figsize": (12, 8)})