<img width="50" src="https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png" style="margin-left:0px;margin-top:20px"/>

# MTBS Fire Model

_by Jeremy Freeman (CarbonPlan), September 19, 2020_

This notebook loads downsampled MTBS fire data and TerraClimate climate data and
fits a logistical regression model.


In [None]:
import numpy as np
import pandas as pd
import scipy as sp
from carbonplan_forests import load, setup, plot, fit, utils, prepare, collect

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from showit import image, tile
import xarray as xr
from astropy.convolution import Gaussian2DKernel
from astropy.convolution import convolve, convolve_fft
import altair as alt

In [None]:
setup.plotting(remote=True)

In [None]:
%load_ext autoreload
%autoreload 2

First we set some top-level parameters: the level of spatial coarsening (which
can speed up fitting substantially, especially useful when testing), the time
range (MTBS spans 1984 to 2018), and the climatic variables we'll use in the
model.


In [None]:
coarsen = 4
full_climate_period = (1983, 2018)
analysis_tlim = (1984, 2018)
variables = ["ppt", "tmean", "cwd"]  # , "tmax", "tmin"]
store = "az"

First we load data from `nlcd` which we will use to mask out non-land areas and
data from `nftd` to use a regressors for forest type groups.


In [None]:
mask = (
    load.nlcd(store=store, year=2001).sel(band=[41, 42, 43, 90]).sum("band")
    > 0.25
).astype("float")

In [None]:
nlcd = load.nlcd(store=store, year=2001, coarsen=coarsen, mask=mask)
nftd = load.nftd(store=store, area_threshold=1500, coarsen=coarsen, mask=mask)

In [None]:
coarsened_conus_domain = load.mask(coarsen=coarsen)

Now we load the `terraclim` data from the same time range.


In [None]:
climate = load.terraclim(
    store=store,
    tlim=full_climate_period,
    coarsen=coarsen,
    variables=variables,
    mask=mask,
    sampling="monthly",
)

And finally load the `mtbs` data


In [None]:
mtbs = load.mtbs(store=store, coarsen=coarsen, tlim=analysis_tlim, mask=mask)

### Inspecting the data


We can plot the `mtbs` data for a specific year to get a sense of the raw data.


In [None]:
plot.fire.monthly(mtbs.sel(time="2018"), clim=(0, 0.2))

Here is the same thing but averaged over all years


In [None]:
plot.fire.monthly(mtbs, clim=(0.00005, 0.0015))

## We can plot spatially averaged data over time to see both seasonable trends, and

the fact that fire frequency and magnitude has increased over time.


In [None]:
x = mtbs["time"]
y = mtbs["monthly"].mean(["x", "y"])

plot.line(
    x=x, y=y, width=900, height=200, color="rgb(175,91,92)", strokeWidth=2
)

We can summarize the data by plotting annual, seasonal, and spatial trends by
averaging across different groupings and dimensions.


In [None]:
plot.fire.summary(mtbs, clim=(0, 0.005))

As a motivation for fitting fires as functions of climatic variables, we can
look at spatially averaged climatic variables alongside fires, e.g. average
tempreature, and see that there is a clear relationship (in this case a positive
correlation).


In [None]:
x = mtbs.groupby("time.year").mean()["year"]
y1 = mtbs["monthly"].groupby("time.year").sum().mean(["x", "y"])
y2 = climate["tmean"].groupby("time.year").max().mean(["x", "y"])

(
    plot.line(x=x, y=sp.stats.zscore(y1), color="rgb(175,91,92)")
    + plot.line(x=x, y=sp.stats.zscore(y2), color="rgb(175,91,92)", opacity=0.5)
)

# np.corrcoef(y1,y2)[0,1]

# Tmean precip comparisons


In [None]:
sl = slice("1984", "2008")

In [None]:
single_pixel = climate.isel(x=25, y=25)

In [None]:
plt.figure(figsize=(15, 8))
xr.DataArray(
    np.repeat(
        single_pixel["tmean"].sel(time=sl).groupby("time.year").max().values, 12
    ),
    coords=single_pixel.sel(time=sl).coords,
).plot(label="groupby")
single_pixel["tmean"].rolling(time=12).max().sel(time=sl).plot(label="rolling")
single_pixel["tmean"].sel(time=sl).plot(label="raw", alpha=0.5)
plt.legend()
plt.ylim(0, 18)

In [None]:
ts = climate["tmean"].groupby("time.year").max().mean(dim=["x", "y"]).values
groupby_global_max_then_mean = xr.DataArray(
    np.repeat(ts, 12), coords=single_pixel.coords
)
ts = climate["tmean"].mean(dim=["x", "y"]).groupby("time.year").max().values
groupby_global_mean_then_max = xr.DataArray(
    np.repeat(ts, 12), coords=single_pixel.coords
)

In [None]:
groupby_global_mean_then_max.plot(label="mean then max")
groupby_global_max_then_mean.plot(label="max then mean")
plt.legend()

In [None]:
rolling_global_max = (
    climate["tmean"]
    .mean(dim=["x", "y"])
    .rolling(dim={"time": 12}, min_periods=8, center=False)
    .max()
    .sel(time=inspection_slice)
)

In [None]:
plt.figure(figsize=(10, 8))
inspection_slice = slice("1984", "1990")
rolling_global_max.sel(time=inspection_slice).plot(label="rolling")
# groupby_global_max_annual.sel(time=inspection_slice).plot(label='groupby')

groupby_global_mean_then_max.sel(time=inspection_slice).plot(label="groupby")
climate["tmean"].sel(time=inspection_slice).mean(dim=["x", "y"]).plot(
    label="raw"
)
plt.ylim(16, 24)
plt.legend()

In [None]:
climate["ppt"].rolling(time=12, center=False).sum().mean(dim=["x", "y"]).plot()

In [None]:
climate["tmean"].rolling(time=12, center=False).max().isel(time=30).plot()

In [None]:
climate["ppt"].resample(time="AS").mean().isel(time=3).plot()

In [None]:
climate["ppt"].groupby("time.year").sum().mean(dim=["x", "y"]).plot()

In [None]:
rolling = (
    climate["ppt"]
    .rolling(time=12, center=False)
    .sum()
    .mean(dim=["x", "y"])
    .sel(time=inspection_slice)
)
rolling.plot(label="rolling")
xr.DataArray(
    np.repeat(
        climate["ppt"]
        .sel(time=inspection_slice)
        .groupby("time.year")
        .sum()
        .mean(dim=["x", "y"])
        .values,
        12,
    ),
    coords=rolling.coords,
).plot(label="groupby")
plt.legend()

# Original


In [None]:
climate.isel(time=3).tmean.plot()

In [None]:
rolling_period = slice("1984", "2018")

In [None]:
climate_prepend = climate.sel(time=slice("1983", "1983"))
x, y = prepare.fire(
    climate.sel(time=slice(*[str(t) for t in analysis_tlim])),  #
    nftd,
    mtbs,
    add_global_climate_trends={
        "tmean": {"climate_prepend": None, "gaussian_kernel_size": None},
        "ppt": {"climate_prepend": None, "gaussian_kernel_size": None},
    },
    add_local_climate_trends=None,
)
#                     {'tmean': {'climate_prepend': None,
#                                                        'gaussian_kernel_size': None},
#                                                 'ppt': {'climate_prepend': None,
#                                                        'gaussian_kernel_size': None}},)
#                    rolling_period=12)
x_z, x_mean, x_std = utils.zscore_2d(x)
model = fit.hurdle(x_z, y, log=False)
yhat = model.predict(x_z)
prediction_original = collect.fire(yhat, mtbs)
metrics, chart = plot.fire.full_eval(
    mtbs,
    prediction_original,
    "monthly",
    "prediction",
    clim=(-0.08, 0.08),
    cmap="purplegreen",
)
chart

In [None]:
climate_prepend = climate.sel(time=slice("1983", "1983"))
x, y = prepare.fire(
    climate.sel(time=slice(*[str(t) for t in analysis_tlim])),  #
    nftd,
    mtbs,
    add_global_climate_trends={
        "tmean": {
            "climate_prepend": climate_prepend,
            "gaussian_kernel_size": None,
        },
        "ppt": {
            "climate_prepend": climate_prepend,
            "gaussian_kernel_size": None,
        },
    },
    add_local_climate_trends=None,
    #                     {'tmean': {'climate_prepend': None,
    #                                                        'gaussian_kernel_size': None},
    #                                                 'ppt': {'climate_prepend': None,
    #                                                        'gaussian_kernel_size': None}},)
    rolling_period=8,
)
x_z, x_mean, x_std = utils.zscore_2d(x)
model = fit.hurdle(x_z, y, log=False)
yhat = model.predict(x_z)
prediction_original = collect.fire(yhat, mtbs)
metrics, chart = plot.fire.full_eval(
    mtbs,
    prediction_original,
    "monthly",
    "prediction",
    clim=(-0.08, 0.08),
    cmap="purplegreen",
)
chart

In [None]:
climate_prepend = climate.sel(time=slice("1983", "1983"))
x, y = prepare.fire(
    climate.sel(time=slice(*[str(t) for t in analysis_tlim])),  #
    nftd,
    mtbs,
    add_global_climate_trends={
        "tmean": {
            "climate_prepend": climate_prepend,
            "gaussian_kernel_size": None,
        },
        "ppt": {
            "climate_prepend": climate_prepend,
            "gaussian_kernel_size": None,
        },
    },
    add_local_climate_trends=None,
    #                     {'tmean': {'climate_prepend': None,
    #                                                        'gaussian_kernel_size': None},
    #                                                 'ppt': {'climate_prepend': None,
    #                                                        'gaussian_kernel_size': None}},)
    rolling_period=11,
)
x_z, x_mean, x_std = utils.zscore_2d(x)
model = fit.hurdle(x_z, y, log=False)
yhat = model.predict(x_z)
prediction = collect.fire(yhat, mtbs)

In [None]:
plot.fire.monthly(prediction, "prediction", clim=(0.00005, 0.0015))

In [None]:
plot.fire.evaluation(mtbs, prediction, "monthly", "prediction", clim=(0, 0.005))

In [None]:
x_mean

We can also compare the predicted and real probabilities over time.


In [None]:
x = mtbs["time"]
y1 = mtbs["monthly"].mean(["x", "y"])
y2 = prediction["prediction"].mean(["x", "y"])

(
    plot.line(
        x=x,
        y=y1,
        width=900,
        height=200,
        opacity=0.5,
        color="rgb(175,91,92)",
        strokeWidth=2,
    )
    + plot.line(
        x=x, y=y2, width=900, height=200, color="rgb(175,91,92)", strokeWidth=2
    )
)

Given that we fit a logistic regression, the natural model performance metric is
area under the ROC curve, which we computed on the training data (you can use
the variable `crossval` to repeat the model fit and compute a score on 25% held
out data).


Finally, we can compute correlations between model and prediction for annual,
seasonal, and spatial trends. Note that these were not metrics used to
explicitly fit the model, but in general a better fitting model ought to
reproduce at least some of these trends.


In [None]:
print("correlation of annual trends")
np.corrcoef(
    mtbs["monthly"].groupby("time.year").mean().mean(["x", "y"]),
    prediction["prediction"].groupby("time.year").mean().mean(["x", "y"]),
)[0, 1]

In [None]:
print("correlation of monthly trends")
np.corrcoef(
    mtbs["monthly"].groupby("time.month").mean().mean(["x", "y"]),
    prediction["prediction"].groupby("time.month").mean().mean(["x", "y"]),
)[0, 1]

In [None]:
print("correlation of spatial trends")
a = mtbs["monthly"].mean("time").values.flatten()
b = prediction["prediction"].mean("time").values.flatten()
inds = ~np.isnan(a) & ~np.isnan(b)
np.corrcoef(a[inds], b[inds])[0, 1]

### Prediction on future climate data


In [None]:
cmip_model = "CanESM5"
scenario = "ssp245"
target = 2025

In [None]:
cmip_models = [
    ("CanESM5-CanOE", "r3i1p2f1"),
    ("MIROC-ES2L", "r1i1p1f2"),  #
    ("ACCESS-CM2", "r1i1p1f1"),  #
    ("ACCESS-ESM1-5", "r10i1p1f1"),
    ("MRI-ESM2-0", "r1i1p1f1"),
    ("MPI-ESM1-2-LR", "r10i1p1f1"),
]

In [None]:
import zarr

In [None]:
downscaling = "bias-corrected"
sampling = "monthly"
model = "CanESM5-CanOE"
scenario = "ssp245"
member = "r3i1p2f1"
store = "az"

In [None]:
downscaling = "quantile-mapping"
sampling = "monthly"
for (model, member) in cmip_models:
    for scenario in ["historical", "ssp245", "ssp370", "ssp585"]:

        prefix = f"cmip6/{downscaling}/conus/4000m/{sampling}/{model}.{scenario}.{member}.zarr".format()

        if store == "az":
            mapper = zarr.storage.ABSStore(
                "carbonplan-downscaling",
                prefix=prefix,
                account_name="carbonplan",
            )
        ds = xr.open_zarr(mapper, consolidated=True)

In [None]:
model = "ACCESS-CM2"
scenario = "ssp370"  #'r1i1p1f1'
future_climate = load.cmip(
    store=store,
    model=cmip_model,
    coarsen=16,
    scenario=scenario,
    tlim=(2020, 2100),
    variables=variables,
    mask=mask,
    sampling="monthly",
)

In [None]:
x = prepare.fire(future_climate, nftd, eval_only=True)
x_z = utils.zscore_2d(x, mean=x_mean, std=x_std)

In [None]:
yhat = model.predict(x_z)
prediction = collect.fire(yhat, future_climate)

In [None]:
plot.fire.summary(prediction, "prediction", clim=(0.0006, 0.008))

In [None]:
x = prediction["time"]
y2 = prediction["prediction"].mean(["x", "y"])

(
    plot.line(
        x=x, y=y2, width=900, height=200, color="rgb(175,91,92)", strokeWidth=2
    )
)