In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import logging
import logging.config
import os
import pickle
import warnings
from copy import deepcopy

import iris
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from IPython.display import HTML
from matplotlib import animation, rc
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm, tqdm_notebook

import wildfires.utils as utils
from joblib import Memory, Parallel, delayed
from wildfires.analysis.analysis import *
from wildfires.analysis.plotting import (
    cube_plotting,
    get_cubes_vmin_vmax,
    map_model_output,
    partial_dependence_plot,
)
from wildfires.analysis.processing import log_map, log_modulus, map_name, vif
from wildfires.data.cube_aggregation import *
from wildfires.data.cube_aggregation import Datasets, get_ncpus, prepare_selection
from wildfires.data.datasets import *
from wildfires.data.datasets import DATA_DIR, ERA5_DryDayPeriod, data_map_plot
from wildfires.logging_config import LOGGING
from wildfires.utils import Time, TqdmContext
from wildfires.utils import land_mask as get_land_mask
from wildfires.utils import match_shape

logger = logging.getLogger(__name__)
logging.config.dictConfig(LOGGING)

# tqdm_notebook does not work for some reason

ncpus = get_ncpus()
warnings.filterwarnings("ignore", ".*Collapsing a non-contiguous coordinate.*")
memory = Memory(DATA_DIR)

In [None]:
normal_size = 9.0
normal_coast_linewidth = 0.5
dpi = 600

In [None]:
# General setup.
logging.config.dictConfig(LOGGING)

FigureSaver.directory = "~/tmp/to_send"
FigureSaver.debug = True

# TODO: Plotting setup in a more rigorous manner.
normal_coast_linewidth = 0.5
mpl.rcParams["font.size"] = 9.0
verbose = True

target_variable = "monthly burned area"

# Creation of new variables.
transformations = {
    "Temp Range": lambda exog_data: (exog_data["Max Temp"] - exog_data["Min Temp"])
}
# Variables to be deleted after the aforementioned transformations.
deletions = ("Min Temp",)

# Carry out transformations, replacing old variables in the process.
log_var_names = ["Temp Range", "Dry Day Period"]
sqrt_var_names = [
    # "Lightning Climatology",
    "popd"
]

# Dataset selection.
# selection = get_all_datasets(ignore_names=IGNORED_DATASETS)
# selection.remove_datasets("GSMaP Dry Day Period")
selection = Datasets(
    (
        AvitabileThurnerAGB(),
        CHELSA(),
        Copernicus_SWI(),
        ERA5_CAPEPrecip(),
        ERA5_DryDayPeriod(),
        ESA_CCI_Landcover_PFT(),
        GFEDv4(),
        GlobFluo_SIF(),
        HYDE(),
        # LIS_OTD_lightning_climatology(),
        MOD15A2H_LAI_fPAR(),
        VODCA(),
    )
)

selection = selection.select_variables(
    [
        "AGBtree",
        "maximum temperature",
        "minimum temperature",
        "Soil Water Index with T=1",
        "Product of CAPE and Precipitation",
        "dry_day_period",
        "ShrubAll",
        "TreeAll",
        # "pftBare",
        "pftCrop",
        "pftHerb",
        "monthly burned area",
        "SIF",
        "popd",
        # "Combined Flash Rate Monthly Climatology",
        "Fraction of Absorbed Photosynthetically Active Radiation",
        # "Leaf Area Index",
        # "Vegetation optical depth Ku-band (18.7 GHz - 19.35 GHz)",
        "Vegetation optical depth X-band (10.65 GHz - 10.7 GHz)",
    ]
)
(
    endog_data,
    exog_data,
    master_mask,
    filled_datasets,
    masked_datasets,
    land_mask,
) = data_processing(
    selection,
    which="mean",
    transformations=transformations,
    deletions=deletions,
    log_var_names=log_var_names,
    sqrt_var_names=sqrt_var_names,
)

In [None]:
mpl.rcParams["figure.figsize"] = (20, 12)
plot_histograms(masked_datasets)

In [None]:
print_vifs(exog_data, thres=6)

In [None]:
corr_plot(exog_data)