<img width="50" src="https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png" style="margin-left:0px;margin-top:20px"/>

# MTBS Fire Model

_by Jeremy Freeman (CarbonPlan), September 19, 2020_

This notebook loads downsampled MTBS fire data and TerraClimate climate data and
fits a logistical regression model.


In [None]:
import numpy as np
import pandas as pd
import scipy as sp
from carbonplan_forests import load, setup, plot, fit, utils, prepare, collect
import xarray as xr

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from showit import image, tile

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
store = "az"

In [None]:
import altair as alt

alt.themes.enable("carbonplan_light")

### wind analysis code


In [None]:
# from carbonplan.data import cat
# ds_dict = {}
# for year in np.arange(1984,2019):
#     print(year)
#     ds_dict[year] = cat.gridmet.raw_gridmet(variable='vs',
#                             year=year).to_dask().wind_speed.groupby('day.month').max().load()

# ds_concat = xr.concat(ds_dict.values(), dim='year')#, coords=ds_dict.keys())

# ds_concat = ds_concat.assign_coords({'year': list(ds_dict.keys())})

# full_ds_wind = ds_concat.stack(time=('year', 'month'))

# full_ds_wind = full_ds_wind.transpose('time', 'lat', 'lon')

# wind_climatology = ds_concat.mean(dim='year')

# monthly_max = ds_concat.max(dim='year')
# wind_climatology.to_netcdf('wind_climatology.nc')
# monthly_max.to_netcdf('period_max.nc')
# target_grid = "gs://carbonplan-data/processed/grids/conus/4000m/domain.zarr"
# mapper = fsspec.get_mapper(target_grid)
# target_ds = xr.open_zarr(
#     mapper, consolidated=True)
# regridder = xe.Regridder(wind_climatology, target_ds, method='bilinear', reuse_weights=True)
# regridded_wind = regridder(wind_climatology)
# regridded_full_wind = regridder(full_ds_wind)
# regridded_full_wind = regridded_full_wind.assign_coords({'x': target_ds.x,
#                              'y': target_ds.y})
# regridded_wind = regridded_wind.assign_coords({'x': target_ds.x,
#                              'y': target_ds.y})

First we set some top-level parameters: the level of spatial coarsening (which
can speed up fitting substantially, especially useful when testing), the time
range (MTBS spans 1984 to 2018), and the climatic variables we'll use in the
model.


In [None]:
coarsen = 16
tlim = (1984, 2018)
variables = ["ppt", "tmean", "cwd", "pdsi", "tmax", "tmin"]
store = "local"

First we load data from `nlcd` (National Land Cover Database) which we will use
to mask out non-land areas and data from `nftd` (National Forest Type Database)
to use a regressors for forest type groups.


In [None]:
mask = load.mask(store=store, year=2001)

In [None]:
nlcd = load.nlcd(store=store, year=2001, coarsen=coarsen, mask=mask)
nftd = load.nftd(store=store, area_threshold=1500, coarsen=coarsen, mask=mask)

Now we load the `terraclim` climate data from the same time range.


In [None]:
climate = load.terraclim(
    store=store,
    tlim=tlim,
    coarsen=coarsen,
    variables=variables,
    mask=mask,
    sampling="monthly",
).assign_coords({"x": nftd.x, "y": nftd.y})

And finally load the `mtbs` data (Monitoring Trends in Burn Severity) which will
gives us a record of all the fires we'll want to model.


In [None]:
mtbs = load.mtbs(store=store, coarsen=coarsen, tlim=tlim, mask=mask)

### Inspecting the data


We can plot the `mtbs` data for a specific year to get a sense of the raw data.


In [None]:
plot.fire.monthly(mtbs.sel(time="2018"), clim=(0, 0.2))

Here is the same thing but averaged over all years


In [None]:
plot.fire.monthly(mtbs, clim=(0.00005, 0.0015))

## We can plot spatially averaged data over time to see both seasonable trends, and

the fact that fire frequency and magnitude has increased over time.


In [None]:
x = mtbs["time"]
y = mtbs["monthly"].mean(["x", "y"])

plot.line(
    x=x, y=y, width=900, height=200, color="rgb(175,91,92)", strokeWidth=2
)

We can summarize the data by plotting annual, seasonal, and spatial trends by
averaging across different groupings and dimensions.


In [None]:
plot.fire.summary(mtbs, clim=(0, 0.005))

As a motivation for fitting fires as functions of climatic variables, we can
look at spatially averaged climatic variables alongside fires, e.g. average
tempreature, and see that there is a clear relationship (in this case a positive
correlation).


In [None]:
x = mtbs.groupby("time.year").mean()["year"]
y1 = mtbs["monthly"].groupby("time.year").sum().mean(["x", "y"])
y2 = climate[temp_var].groupby("time.year").max().mean(["x", "y"])

(
    plot.line(x=x, y=sp.stats.zscore(y1), color="rgb(175,91,92)")
    + plot.line(x=x, y=sp.stats.zscore(y2), color="rgb(175,91,92)", opacity=0.5)
)

# np.corrcoef(y1,y2)[0,1]

We could even look at how fires at the individual pixel level are related to the
annual temperature (since we will likely include larger scale climate averages
as variables in the model.) So we'll take the correlation along the time
dimension between the CONUS average temperature (`y2` from above) and the `mtbs`
monthly data.


All this is is a measure of how correlated a given location is to the greater
climate.


If instead we wanted to look at how correlated the fires were with their
specific climate maxes....


In [None]:
# Hacky way to make every month look like the annual average

In [None]:
max_temp = (
    climate[temp_var]
    .groupby("time.year")
    .max()
    .assign_coords({"year": mtbs["monthly"].time[np.arange(0, 420, 12)].values})
    .rename({"year": "time"})
    .resample(time="MS")
    .ffill()
)
max_temp = max_temp.reindex({"time": mtbs["monthly"].time}, method="ffill")
conus_average_max_temp = xr.ones_like(max_temp) * max_temp.mean(dim=["x", "y"])

In [None]:
model

In [None]:
yhat = model.predict(x_z)
prediction = collect.fire(yhat, mtbs)

In [None]:
plot.fire.monthly(prediction, "prediction", clim=(0.00005, 0.0015))

In [None]:
plot.fire.evaluation(mtbs, prediction, "monthly", "prediction", clim=(0, 0.005))

Then we can look at the same analysis except make it local temperature changes.
This will likely have a stronger climate change signal.


In [None]:
plot.fire.simple_map(
    local_temp_corr, clabel="correlation", clim=(-0.2, 0.2), cmap="purplegreen"
)

We can subtract the two to see where we have differences between the different
methods.


So, we deduce that the local changes in fire _are_ more strongly correlated with
the local changes in temperature. But mainly in Texas. We could do a difference
between the two to see the regions that would likely be impacted if we were to
change something in the model.


In [None]:
plot.fire.simple_map(
    local_temp_corr - conus_average_temp_corr,
    clabel="correlation",
    clim=(-0.2, 0.2),
    cmap="purplegreen",
)

Hmmm now I'm a little confused. Let's try it with precip.


Let's try out precipitation now too!


In [None]:
max_var = (
    climate["ppt"]
    .groupby("time.year")
    .max()
    .assign_coords({"year": mtbs["monthly"].time[np.arange(0, 420, 12)].values})
    .rename({"year": "time"})
    .resample(time="MS")
    .ffill()
)
max_var = max_var.reindex({"time": mtbs["monthly"].time}, method="ffill")
conus_average_max_var = xr.ones_like(max_var) * max_var.mean(dim=["x", "y"])

conus_average_var_corr = xr.corr(
    mtbs["monthly"], conus_average_max_var, dim="time"
)
local_var_corr = xr.corr(mtbs["monthly"], max_var, dim="time")

So first we'll look at correlation of local fire w conus average precip change


In [None]:
plot.fire.simple_map(
    conus_average_var_corr,
    clabel="correlation",
    clim=(-0.2, 0.2),
    cmap="purplegreen",
)

Then we'll look at correlation of local fire w local precip change.


In [None]:
plot.fire.simple_map(
    local_var_corr, clabel="correlation", clim=(-0.2, 0.2), cmap="purplegreen"
)

### Let's instead try sum precip (instead of annual max) to try to get total precip trends


In [None]:
sum_var = (
    climate["ppt"]
    .groupby("time.year")
    .sum()
    .assign_coords({"year": mtbs["monthly"].time[np.arange(0, 420, 12)].values})
    .rename({"year": "time"})
    .resample(time="MS")
    .ffill()
)
sum_var = sum_var.reindex({"time": mtbs["monthly"].time}, method="ffill")
conus_average_sum_var = xr.ones_like(sum_var) * sum_var.mean(dim=["x", "y"])

conus_average_var_corr = xr.corr(
    mtbs["monthly"], conus_average_sum_var, dim="time"
)
local_var_corr = xr.corr(mtbs["monthly"], sum_var, dim="time")

In [None]:
plot.fire.simple_map(
    conus_average_var_corr,
    clabel="correlation",
    clim=(-0.2, 0.2),
    cmap="purplegreen",
)

In [None]:
plot.fire.simple_map(
    local_var_corr, clabel="correlation", clim=(-0.2, 0.2), cmap="purplegreen"
)

## Finding #2: annual sum precip better inidcator than annual max precip.


How much of this is just because the trends in max temperature are stronger in
certain places? Let's make a map of the pixel-level trends in fire and temp


In [None]:
cmip_model = "CanESM5"
scenario = "ssp245"
target = 2025

In [None]:
future_climate = load.cmip(
    store=store,
    model=cmip_model,
    coarsen=16,
    scenario=scenario,
    tlim=(2020, 2100),
    variables=variables,
    mask=mask,
    sampling="monthly",
)

In [None]:
plot.fire.simple_map(
    precip_for_regression.polyfit(dim="time", deg=1)
    .sel(degree=1)
    .polyfit_coefficients.assign_coords({"lat": precip_for_regression.lat})
    / sum_var.mean(dim="time"),
    clim=(-0.02, 0.02),
    clabel="trend in precip [%/year]",
    cmap="brownbluegreen",
)

FINDING #3: Looks like spatial trends in precip haven't been observed in
historic record, so we're not giving it any knowledge of changing precip. Does
this mean that the model will be entirely temperature driven?


Now, let's look at four different regions to get a sense of the variance in the
variables compared to the variance in the mtbs fire.


In [None]:
geographic_boxes = {
    "Southeast": {"x": slice(45, 70), "y": slice(30, 50)},
    "California": {"x": slice(0, 10), "y": slice(15, 35)},
    "Intermountain West": {"x": slice(10, 20), "y": slice(15, 35)},
    "Northwest": {"x": slice(0, 20), "y": slice(0, 15)},
}

In [None]:
data = plot.fire.package_for_altair(
    mtbs.isel(geographic_boxes["California"]).sel(time=slice("1985", "2018")),
    climate.isel(geographic_boxes["California"]).sel(
        time=slice("1985", "2018")
    ),
    prediction=prediction.isel(geographic_boxes["California"]).sel(
        time=slice("1985", "2018")
    ),
    label="California",
)
for region in ["Intermountain West", "Northwest", "Southeast"]:
    data[region] = plot.fire.package_for_altair(
        mtbs.isel(geographic_boxes[region]).sel(time=slice("1985", "2018")),
        climate.isel(geographic_boxes[region]).sel(time=slice("1985", "2018")),
        prediction=prediction.isel(geographic_boxes[region]).sel(
            time=slice("1985", "2018")
        ),
        label=region,
    )[region]

In [None]:
plot.fire.summary(prediction, "prediction", clim=(0.0006, 0.008))

In [None]:
x = prediction["time"]
y2 = prediction["prediction"].mean(["x", "y"])

(
    plot.line(
        x=x, y=y2, width=900, height=200, color="rgb(175,91,92)", strokeWidth=2
    )
)