<img width="50" src="https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png" style="margin-left:0px;margin-top:20px"/>

# MTBS Perimeters to Zarr

_by Joe Hamman (CarbonPlan), November 3, 2020_

This notebook converts MTBS fire perimeters to monthly burned area rasters

**Inputs:**

- MTBS fire perimeters shapefile

**Outputs:**

- 1 Zarr archive:
  `gs://carbonplan-data/processed/mtbs/conus/{res}m/monthly_perims_raster.zarr`

**Notes:**

- Text defining large and very large fires from Barbero et al. (2015):
  > The Monitoring Trends in Burn Severity (MTBS) data- base was used to acquire
  > fire location, fire discovery date and burned area for LFs over the
  > contiguous US from 1984 to 2010. We excluded fires smaller than 404ha and
  > further eliminated 'unburned to low' burned area for each fire as classified
  > by MTBS to more accurately portray the true area burned (Kolden et al 2012).
  > While the definition of VLFs is subjective and likely geographically
  > dependent, we define VLFs as fires whose size exceeds the 90th percentile
  > (5073 ha) of MTBS fires greater than 404 ha (n = 927) (figure 1(b)) and LF
  > as fires whose size was below the 90th percentile but greater than 404 ha (n
  > = 8343)(figure 1(c)).


In [None]:
from carbonplan.data import cat
import xarray as xr
import numpy as np

import pandas as pd

import geopandas

import rasterio
from rasterio import Affine
from rasterio.transform import rowcol
from rasterio.features import rasterize
from rasterio.transform import from_bounds

import matplotlib.pyplot as plt
import zarr

import hvplot.pandas  # noqa

In [None]:
months = pd.date_range("1984-01", "2018-12", freq="MS")

In [None]:
# mask = cat.nlcd.raster.read().squeeze(drop=True)

region = "conus"

mask = rasterio.open(cat.mtbs.raw_raster._urlpath)
transform = mask.transform
shape = mask.shape
src_profile = mask.profile

# TODO: replace with intake use
perims = geopandas.GeoDataFrame.from_file(
    "mtbs_perimeter_data/mtbs_perims_DD/mtbs_perims_DD.shp"
)

In [None]:
# note we set all start days to 1 (so we can easily group by month later)
dates = pd.DatetimeIndex(
    [pd.to_datetime(f"{r.Year}-{r.StartMonth}-1") for _, r in perims.iterrows()]
)
perims.index = dates
perims = perims.sort_index()
perims["ha"] = perims["Acres"] * 0.40468564224
perims["ym"] = dates
perims = perims.to_crs(crs=mask.crs)
perims

In [None]:
pattern = "Wild*|Out*|Unknown|Complex"
perims = perims[perims.Fire_Type.str.contains(pattern)]

perims_lf = perims[perims.ha.between(404, 5073)]
perims_vlf = perims[perims.ha > 5073]
perims_vlf

In [None]:
def rasterize_geom(geoms):

    r = rasterize(
        [(geom, 1) for geom in geoms],
        out_shape=shape,
        transform=transform,
        fill=0,
        merge_alg=rasterio.enums.MergeAlg.replace,
        all_touched=True,
        dtype=rasterio.uint8,
    )
    return r

In [None]:
perims_vlf[["ha", "geometry", "ym"]]["2018":"2018"].to_crs("EPSG:4326").hvplot(
    c="ha", geo=True, coastline=True
)

In [None]:
from rio_cogeo.profiles import cog_profiles
from rasterio.io import MemoryFile
from rio_cogeo.cogeo import cog_translate

In [None]:
from gcsfs import GCSFileSystem


def copy_to_fs(source, dst, fs):

    with open(source, "rb") as fsource:
        with fs.open(dst, "wb") as fdst:
            fdst.write(fsource.read())


def numpy_to_cog(data, out_fname="temp_cog.tif"):
    with MemoryFile() as memfile:
        with memfile.open(**src_profile) as mem:
            # Populate the input file with numpy array
            mem.write(r, indexes=1)

            dst_profile = cog_profiles.get("deflate")
            cog_translate(
                mem,
                out_fname,
                dst_profile,
                in_memory=True,
                quiet=True,
            )


fs = GCSFileSystem()

In [None]:
# unocomment to start over
# paths = fs.glob('carbonplan-data/processed/mtbs/conus/30m/*f_????.??.tif')
# fs.rm(paths)

In [None]:
# make an empty file we can copy to each month without any fires
r = np.zeros(shape, dtype=rasterio.uint8)
numpy_to_cog(r, "empty_cog.tif")

In [None]:
dst_profile = cog_profiles.get("deflate")

for month in months:

    for name, df in [("lf", perims_lf), ("vlf", perims_vlf)]:

        out_fname = f"carbonplan-data/processed/mtbs/{region}/30m/{name}_{month.strftime('%Y.%m')}.tif"

        if fs.exists(out_fname):
            print(f"{out_fname} exists, skipping...")
            continue

        try:
            geom = df.loc[[month]].geometry
            print(geom)
            print(f"rasterizing {month}")
            r = rasterize_geom(geom)
            numpy_to_cog(r, "temp_cog.tif")
            copy_to_fs("temp_cog.tif", out_fname, fs)
        except (KeyError, ValueError) as e:
            print(f"raised error: {e}")
            print(f"copying empty cog to {out_fname}")
            copy_to_fs("empty_cog.tif", out_fname, fs)

In [None]:
import intake
import xarray as xr
from dask.diagnostics import ProgressBar

cat2 = intake.open_catalog(
    "https://raw.githubusercontent.com/carbonplan/data/master/carbonplan_data/catalogs/mtbs.yaml"
)
dates = [f"2018.{m:02d}" for m in range(1, 13)]
da = xr.concat(
    [
        cat2.rasterized_perims(size="vlf", date=d).to_dask().squeeze(drop=True)
        for d in dates
    ],
    dim=xr.Variable("time", dates),
)

with ProgressBar():
    da_sum = da.sum("time").coarsen(x=133, y=133, boundary="trim").mean().load()
da_sum

In [None]:
da_sum.where(da_sum).plot(vmax=0.01, vmin=0, cmap="Greys")