In [None]:
import logging
import logging.config
import os
import pickle
import warnings
from copy import deepcopy

import cf_units
import ipdb
import iris
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from IPython.display import HTML
from joblib import Memory, Parallel, delayed
from matplotlib import animation, rc
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm, tqdm_notebook

import wildfires.utils as utils
from wildfires.analysis.analysis import get_no_fire_mask
from wildfires.analysis.plotting import (
    cube_plotting,
    get_cubes_vmin_vmax,
    map_model_output,
    partial_dependence_plot,
)
from wildfires.analysis.processing import log_map, log_modulus, map_name, vif
from wildfires.data.cube_aggregation import *
from wildfires.data.cube_aggregation import Datasets, get_ncpus, prepare_selection
from wildfires.data.datasets import *
from wildfires.data.datasets import DATA_DIR, GSMaP_dry_day_period, data_map_plot
from wildfires.logging_config import LOGGING
from wildfires.utils import Time, TqdmContext
from wildfires.utils import land_mask as get_land_mask
from wildfires.utils import match_shape

logger = logging.getLogger(__name__)
logging.config.dictConfig(LOGGING)

# tqdm_notebook does not work for some reason

warnings.filterwarnings("ignore", ".*Collapsing a non-contiguous coordinate.*")

In [None]:
normal_size = 9.0
normal_coast_linewidth = 0.5
dpi = 600

In [None]:
fire_mask = get_no_fire_mask()
_ = cube_plotting(fire_mask, title="No Fire Mask")

In [None]:
selection = Datasets((LIS_OTD_lightning_climatology(), ERA5_CAPEPrecip()))
selection.show("pretty")

min_time, max_time, times_df = dataset_times(selection.datasets)
print(times_df.to_string(index=False))
# print(times_df.to_latex(index=False))

monthly_datasets, mean_datasets, climatology_datasets = prepare_selection(selection)

# Get land mask.
land_mask = ~get_land_mask(n_lon=1440)

In [None]:
# Make a deep copy so that the original cubes are preserved.
masked_mean_datasets = mean_datasets.copy(deep=True)

# Apply the masks.
for cube in masked_mean_datasets.cubes:
    cube.data.mask |= match_shape(land_mask, cube.shape)

mpl.rcParams["figure.figsize"] = (11, 6)

for dataset in masked_mean_datasets:
    cube_plotting(
        dataset.cube,
        log=True,
        title=f"Mean ({dataset.name})",
        auto_log_title=True,
        transform_vmin_vmax=False,
    )

## Climatology without fire mask, only land mask

In [None]:
# Make a deep copy so that the original cubes are preserved.
masked_climatology_datasets = climatology_datasets.copy(deep=True)

# Apply the masks.
for cube in masked_climatology_datasets.cubes:
    cube.data.mask |= match_shape(land_mask, cube.shape)

mpl.rcParams["figure.figsize"] = (11, 6)

era5 = "ERA5_CAPEPrecip"
lis = "LIS_OTD_lightning_climatology"

era5_cube = masked_climatology_datasets.select_datasets(
    era5, inplace=False
).dataset.cube
lis_cube = masked_climatology_datasets.select_datasets(lis, inplace=False).dataset.cube

relative_climatology_differences = dummy_lat_lon_cube(
    100 * (lis_cube.data - era5_cube.data) / lis_cube.data,
    units=cf_units.Unit("%"),
    dim_coords_and_dims=(
        (era5_cube.coord("time"), 0),
        (era5_cube.coord("latitude"), 1),
        (era5_cube.coord("longitude"), 2),
    ),
)

mean_relative_climatology_differences = relative_climatology_differences.collapsed(
    "time", iris.analysis.MEAN
)
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([mean_relative_climatology_differences]), (10, 90)
)
cube_plotting(
    mean_relative_climatology_differences,
    log=False,
    title=f"Mean Relative Climatology Differences (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

min_relative_climatology_differences = dummy_lat_lon_cube(
    np.take_along_axis(
        relative_climatology_differences.data,
        np.expand_dims(
            np.argmin(np.abs(relative_climatology_differences.data), axis=0), axis=0
        ),
        axis=0,
    ),
    units=cf_units.Unit("%"),
)[0]
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([min_relative_climatology_differences]), (0, 100)
)
cube_plotting(
    min_relative_climatology_differences,
    log=False,
    title=f"(Best-Case) Non-local Min Relative Climatology Differences (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

max_relative_climatology_differences = dummy_lat_lon_cube(
    np.take_along_axis(
        relative_climatology_differences.data,
        np.expand_dims(
            np.argmax(np.abs(relative_climatology_differences.data), axis=0), axis=0
        ),
        axis=0,
    ),
    units=cf_units.Unit("%"),
)[0]
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([max_relative_climatology_differences]), (0, 100)
)
_ = cube_plotting(
    max_relative_climatology_differences,
    log=False,
    title=f"(Worst-Case) Non-local Max Relative Climatology Differences (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

min_relative_climatology_difference = relative_climatology_differences[
    np.argmin(np.linalg.norm(relative_climatology_differences.data, axis=(1, 2)))
]
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([min_relative_climatology_difference]), (2, 98)
)
cube_plotting(
    min_relative_climatology_difference,
    log=False,
    title=f"Min Relative Climatology Difference Single Time (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

max_relative_climatology_difference = relative_climatology_differences[
    np.argmax(np.linalg.norm(relative_climatology_differences.data, axis=(1, 2)))
]
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([max_relative_climatology_difference]), (2, 98)
)
_ = cube_plotting(
    max_relative_climatology_difference,
    log=False,
    title=f"Max Relative Climatology Difference Single Time (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

## Climatology with fire mask and land mask

In [None]:
# Make a deep copy so that the original cubes are preserved.
masked_climatology_datasets = climatology_datasets.copy(deep=True)

# Apply the masks.
for cube in masked_climatology_datasets.cubes:
    cube.data.mask |= match_shape(land_mask, cube.shape) | match_shape(
        fire_mask, cube.shape
    )

mpl.rcParams["figure.figsize"] = (11, 6)

era5 = "ERA5_CAPEPrecip"
lis = "LIS_OTD_lightning_climatology"

era5_cube = masked_climatology_datasets.select_datasets(
    era5, inplace=False
).dataset.cube
lis_cube = masked_climatology_datasets.select_datasets(lis, inplace=False).dataset.cube

relative_climatology_differences = dummy_lat_lon_cube(
    100 * (lis_cube.data - era5_cube.data) / lis_cube.data,
    units=cf_units.Unit("%"),
    dim_coords_and_dims=(
        (era5_cube.coord("time"), 0),
        (era5_cube.coord("latitude"), 1),
        (era5_cube.coord("longitude"), 2),
    ),
)

mean_relative_climatology_differences = relative_climatology_differences.collapsed(
    "time", iris.analysis.MEAN
)
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([mean_relative_climatology_differences]), (10, 90)
)
cube_plotting(
    mean_relative_climatology_differences,
    log=False,
    title=f"Mean Relative Climatology Differences (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

min_relative_climatology_differences = dummy_lat_lon_cube(
    np.take_along_axis(
        relative_climatology_differences.data,
        np.expand_dims(
            np.argmin(np.abs(relative_climatology_differences.data), axis=0), axis=0
        ),
        axis=0,
    ),
    units=cf_units.Unit("%"),
)[0]
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([min_relative_climatology_differences]), (0, 100)
)
cube_plotting(
    min_relative_climatology_differences,
    log=False,
    title=f"(Best-Case) Non-local Min Relative Climatology Differences (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

max_relative_climatology_differences = dummy_lat_lon_cube(
    np.take_along_axis(
        relative_climatology_differences.data,
        np.expand_dims(
            np.argmax(np.abs(relative_climatology_differences.data), axis=0), axis=0
        ),
        axis=0,
    ),
    units=cf_units.Unit("%"),
)[0]
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([max_relative_climatology_differences]), (0, 100)
)
_ = cube_plotting(
    max_relative_climatology_differences,
    log=False,
    title=f"(Worst-Case) Non-local Max Relative Climatology Differences (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

min_relative_climatology_difference = relative_climatology_differences[
    np.argmin(np.linalg.norm(relative_climatology_differences.data, axis=(1, 2)))
]
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([min_relative_climatology_difference]), (2, 98)
)
cube_plotting(
    min_relative_climatology_difference,
    log=False,
    title=f"Min Relative Climatology Difference Single Time (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

max_relative_climatology_difference = relative_climatology_differences[
    np.argmax(np.linalg.norm(relative_climatology_differences.data, axis=(1, 2)))
]
vmin, vmax = get_cubes_vmin_vmax(
    iris.cube.CubeList([max_relative_climatology_difference]), (2, 98)
)
_ = cube_plotting(
    max_relative_climatology_difference,
    log=False,
    title=f"Max Relative Climatology Difference Single Time (LIS/OTD - ERA5)",
    vmin=vmin,
    vmax=vmax,
)

In [None]:
# Make a deep copy so that the original cubes are preserved.
masked_climatology_datasets = climatology_datasets.copy(deep=True)

# Apply the masks.
for cube in masked_climatology_datasets.cubes:
    cube.data.mask |= match_shape(land_mask, cube.shape) | match_shape(
        fire_mask, cube.shape
    )

mpl.rcParams["figure.figsize"] = (11, 6)

era5 = "ERA5_CAPEPrecip"
lis = "LIS_OTD_lightning_climatology"

era5_cube = masked_climatology_datasets.select_datasets(
    era5, inplace=False
).dataset.cube
lis_cube = masked_climatology_datasets.select_datasets(lis, inplace=False).dataset.cube

era5_cube.data.mask |= lis_cube.data.mask
lis_cube.data.mask |= era5_cube.data.mask

# era5_cube.data /= np.mean(era5_cube.data)
# lis_cube.data /= np.mean(lis_cube.data)

# for cube in [era5_cube, lis_cube]:
#     cube.data.mask |= (era5_cube.data > 2) | (lis_cube.data > 2)

plt.figure()
plt.hexbin(
    np.log(era5_cube.data.data[~era5_cube.data.mask]),
    np.log(lis_cube.data.data[~lis_cube.data.mask]),
    bins="log",
)
plt.xlabel("ERA5")
plt.ylabel("LIS/OTD")
plt.axis("equal")
plt.xlim((-30, 0))
plt.ylim((-30, 0))
plt.colorbar()
plt.show()

era5_data = era5_cube.data.data[~era5_cube.data.mask]
lis_data = lis_cube.data.data[~lis_cube.data.mask]
mask1 = era5_cube.data.data < np.percentile(era5_data, 1)
mask1 |= lis_cube.data.data < np.percentile(lis_data, 1)

selected_era5_data = era5_cube.data.data[~(era5_cube.data.mask | mask1)]
selected_lis_data = lis_cube.data.data[~(lis_cube.data.mask | mask1)]

plt.figure()
plt.hexbin(np.log(selected_era5_data), np.log(selected_lis_data), bins="log")
plt.xlabel("ERA5")
plt.ylabel("LIS/OTD")
plt.axis("equal")
plt.xlim((-15, 0))
plt.ylim((-15, 0))
plt.colorbar()
plt.show()