In [None]:
import logging
import warnings
from copy import copy, deepcopy
from functools import partial, reduce
from itertools import islice
from pprint import pprint

import cartopy.crs as ccrs
import cf_units
import iris
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

from wildfires.analysis.plotting import cube_plotting
from wildfires.data.cube_aggregation import Datasets, prepare_selection
from wildfires.data.datasets import (
    MCD64CMQ_C6,
    CCI_BurnedArea_MERIS_4_1,
    CCI_BurnedArea_MODIS_5_1,
    GFEDv4,
    GFEDv4s,
    regions_GFED,
    translate_longitudes,
)
from wildfires.logging_config import enable_logging
from wildfires.utils import get_masked_array, get_ncpus, get_unmasked
from wildfires.utils import land_mask as get_land_mask
from wildfires.utils import match_shape, polygon_mask, select_valid_subset

In [None]:
enable_logging("jupyter")
warnings.filterwarnings("ignore", ".*Collapsing a non-contiguous coordinate.*")
warnings.filterwarnings("ignore", ".*DEFAULT_SPHERICAL_EARTH_RADIUS*")

In [None]:
fire_datasets = Datasets(
    map(
        lambda fire_dataset: fire_dataset(),
        (
            GFEDv4s,
            GFEDv4,
            CCI_BurnedArea_MODIS_5_1,
            MCD64CMQ_C6,
            CCI_BurnedArea_MERIS_4_1,
        ),
    )
).select_variables(
    ["CCI MODIS BA", "GFED4 BA", "GFED4s BA", "MCD64CMQ BA", "CCI MERIS BA"]
)

monthly, mean, climatology = prepare_selection(fire_datasets, which="all")
pprint(list(monthly))

land_mask = ~get_land_mask()

no_fire_mask = np.all(
    reduce(
        np.logical_and,
        map(partial(np.isclose, b=0), (cube.data for cube in monthly.cubes)),
    ),
    axis=0,
)

for fire_datasets in (monthly, mean, climatology):
    fire_datasets.homogenise_masks()
    for cube in fire_datasets.cubes:
        cube.data.mask |= reduce(
            np.logical_or,
            map(
                partial(match_shape, target_shape=cube.shape), (land_mask, no_fire_mask)
            ),
        )

In [None]:
for cube, name in zip(monthly.cubes, monthly.pretty_variable_names):
    print(name, np.unique(np.mean(cube.data.mask.astype(float), axis=0)))

In [None]:
mpl.rc("figure", figsize=(14, 6))
for cube, name in zip(monthly.cubes, monthly.pretty_variable_names):
    mask_avg_c = cube.copy()
    mask_avg_c.data = cube.data.mask.astype(float)
    mask_avg_c = mask_avg_c.collapsed("time", iris.analysis.MEAN)
    mask_avg_c.units = cf_units.Unit("1")

    cube_plotting(mask_avg_c, title=f"{name} Average Masked Fraction")

In [None]:
mpl.rc("figure", figsize=(14, 6))
for cube, name in zip(mean.cubes, mean.pretty_variable_names):
    # Get overall averaged burned area for each (masked) dataset.
    m = cube.collapsed(
        ("latitude", "longitude"),
        iris.analysis.MEAN,
        weights=iris.analysis.cartography.area_weights(cube),
    ).data
    cube_plotting(cube, log=True, title=f"{name} Average: {m:0.5f}")

## Seasonality

Seasonal variation of the different datasets.

In [None]:
regions = regions_GFED()
regions_dict = regions.attributes["regions"].copy()
# Skip region index 0, ie. the ocean..
del regions_dict[0]

for region_index, region_name in sorted(regions_dict.items()):
    region_mask = regions.data != region_index

    fig = plt.figure(figsize=(26, 6.5))
    axes = [plt.subplot(1, 3, 1)]

    for cube, name in zip(
        deepcopy(climatology.cubes), climatology.pretty_variable_names
    ):
        cube.data.mask |= match_shape(region_mask, cube.shape)
        axes[0].plot(
            range(1, 13),
            cube.collapsed(
                ("latitude", "longitude"),
                iris.analysis.MEAN,
                weights=iris.analysis.cartography.area_weights(cube),
            ).data,
            label=name,
        )
    axes[0].set_title("Climatological Time Dependence")
    axes[0].legend(loc="best")
    axes[0].set_ylabel("Average Burned Area Fraction")
    axes[0].set_xlabel("Month")
    axes[0].set_yscale("log")

    # Add the remaining 2 more axes with the proper projection, centred at the centre of the selected region.
    cube = deepcopy(mean.cubes[0])
    cube.data.mask |= match_shape(region_mask, cube.shape)
    cube, longitudes = select_valid_subset(
        cube, longitudes=cube.coord("longitude").points
    )

    central_longitude = np.mean(longitudes)

    axes.extend(
        [
            plt.subplot(
                1, 3, 2, projection=ccrs.Robinson(central_longitude=central_longitude)
            ),
            plt.subplot(
                1, 3, 3, projection=ccrs.Robinson(central_longitude=central_longitude)
            ),
        ]
    )

    all_mean_data = np.vstack(
        tuple(map(lambda cube: deepcopy(cube.data)[np.newaxis], mean.cubes))
    )
    all_mean_data.mask |= match_shape(region_mask, all_mean_data.shape)

    if np.all(all_mean_data.mask):
        print(f"No data for {region_name}")
    else:
        # Plot the mean and standard deviation for the selected region.
        rmean = all_mean_data.mean(axis=0)
        rstd = all_mean_data.std(axis=0)
        cube_plotting(
            rmean,
            ax=axes[1],
            log=True,
            title=None,
            select_valid=True,
            orientation="horizontal",
            label="1",
        )
        axes[1].set_title(f"Mean BA {region_name} {len(mean.cubes)} Datasets")
        # NOTE: This is an underestimation, as the monthly (raw) data has already been averaged!
        cube_plotting(
            rstd / rmean,
            ax=axes[2],
            log=True,
            title=None,
            select_valid=True,
            orientation="horizontal",
            label="1",
        )
        axes[2].set_title(f"BA STD / MEAN {region_name} {len(mean.cubes)} Datasets")