In [None]:
import xarray as xr
from numcodecs.zlib import Zlib
import pandas as pd
import numpy as np

In [None]:
version = "v1.2"
final_lat_size = 200
final_lon_size = 300
time_coords = {"time": pd.date_range(str(2014), str(2021), freq="A")}
chunk_dict = {"lat": -1, "lon": -1, "time": -1}

In [None]:
def make_abs_diff(ds):
    diff = ds - ds.shift(time=1)
    diff = diff.isel(time=slice(1, None))
    return diff


def make_pct_diff(ds):
    diff = ds - ds.shift(time=1)
    diff = diff.isel(time=slice(1, None))
    return diff / ds.shift(time=1).isel(time=slice(1, None)) * 100

## example for deforestation in Amazon


In [None]:
tile = "00N_060W"
name = "amazon"

ds = xr.open_zarr(f"s3://carbonplan-climatetrace/{version}/results/tiles/{tile}.zarr")
sub = ds.sel(lat=slice(-6.4, -6.2), lon=slice(-53.55, -53.25))
# sub = ds.sel(lat=slice(-6.7, -6.2), lon=slice(-53.6, -53.1))

In [None]:
# coarsen_lat = len(sub.lat) / final_lat_size
# coarsen_lon = len(sub.lon) / final_lon_size
# assert coarsen_lat Z== coarsen_lon
# final_res = int(30 * coarsen_lat)
# print(final_res)

In [None]:
sub["total_biomass"] = sub["AGB"] + sub["BGB"] + sub["dead_wood"] + sub["litter"]
# coarsen = 10
# s = sub[['total_biomass']].coarsen(lat=coarsen, lon=coarsen).mean()

abs_diff = make_abs_diff(sub)
pct_diff = make_pct_diff(sub)

In [None]:
abs_diff["total_biomass"].plot(col="time", col_wrap=3, vmax=0)

In [None]:
pct_diff["total_biomass"].plot(col="time", col_wrap=3, vmax=-10, vmin=-80, levels=8, cmap="RdBu")

In [None]:
s = sub[["AGB"]].coarsen(lat=int(coarsen_lat), lon=int(coarsen_lon)).mean()
s = s.chunk(chunk_dict)
s = s.assign_coords(time_coords)

s.AGB.plot(col="time", col_wrap=3, vmin=0)

In [None]:
diff = make_diff(s)
diff.AGB.plot(col="time", col_wrap=3)

In [None]:
s.isel(time=slice(1, None)).to_zarr(
    f"s3://carbonplan-climatetrace/{version}/examples/{name}_biomass_{final_res}m.zarr",
    mode="w",
    encoding={"AGB": {"compressor": Zlib()}},
)

In [None]:
diff.to_zarr(
    f"s3://carbonplan-climatetrace/{version}/examples/{name}_diff_{final_res}m.zarr",
    mode="w",
    encoding={"AGB": {"compressor": Zlib()}},
)

## example of forest fire in Mendocino


In [None]:
tile = "40N_130W"
name = "mendocino"

ds = xr.open_zarr(f"s3://carbonplan-climatetrace/{version}/results/tiles/{tile}.zarr")
sub = ds.sel(lat=slice(39, 39.6), lon=slice(-123.4, -122.5))

In [None]:
coarsen_lat = len(sub.lat) / final_lat_size
coarsen_lon = len(sub.lon) / final_lon_size
assert coarsen_lat == coarsen_lon
final_res = int(30 * coarsen_lat)

s = sub[["AGB"]].coarsen(lat=int(coarsen_lat), lon=int(coarsen_lon)).mean()
s = s.chunk(chunk_dict)
s = s.assign_coords(time_coords)

s.AGB.plot(col="time", col_wrap=3, vmin=0)

In [None]:
diff = make_diff(s)
diff.AGB.plot(col="time", col_wrap=3)

In [None]:
s.isel(time=slice(1, None)).to_zarr(
    f"s3://carbonplan-climatetrace/{version}/examples/{name}_biomass_{final_res}m.zarr",
    mode="w",
    encoding={"AGB": {"compressor": Zlib()}},
)

In [None]:
diff.to_zarr(
    f"s3://carbonplan-climatetrace/{version}/examples/{name}_diff_{final_res}m.zarr",
    mode="w",
    encoding={"AGB": {"compressor": Zlib()}},
)

# get stats


## fraction of each model class


In [None]:
with open("value_counts.csv", "w") as f:
    f.write("tile_id,raw_not_null,final_not_null,with_break,no_break,no_model\n")

for tile in tiles:
    print(tile)
    ds = open_biomass_tile(tile, version)[["AGB", "AGB_raw", "breakpoint", "pvalue"]]
    raw_not_null = ds.AGB_raw.notnull().sum().values
    final_not_null = ds.AGB.notnull().sum().values
    with_break = (ds.breakpoint.notnull() & (ds.pvalue < 0.05)).astype(int).sum().values
    no_break = (ds.breakpoint.isnull() & (ds.pvalue < 0.05)).astype(int).sum().values
    no_model = (ds.breakpoint.isnull() & (ds.pvalue >= 0.05)).astype(int).sum().values
    with open("value_counts.csv", "a") as f:
        f.write(f"{tile},{raw_not_null},{final_not_null},{with_break},{no_break},{no_model}\n")

In [None]:
df = pd.read_csv("value_counts.csv")

In [None]:
print("fraction of filled pixels")
print(round(100 * (1.0 - (df.raw_not_null.sum() / df.final_not_null.sum())), 2))

total = df.with_break.sum() + df.no_break.sum() + df.no_model.sum()

print("fraction of pixels classified as with change point")
print(round(100 * df.with_break.sum() / total, 1))

print("fraction of pixels classified as with linear trend")
print(round(100 * df.no_break.sum() / total, 1))

print("fraction of pixels classified as with no trend")
print(round(100 * df.no_model.sum() / total, 1))

## fraction of pixels identified to have experienced stand replacing disturbances


In [None]:
from s3fs import S3FileSystem
import geopandas
from carbonplan_trace.v1 import utils
import regionmask

fs = S3FileSystem()
with fs.open(f"s3://carbonplan-climatetrace/{version}/masks/valid_landsat.shp.zip") as f:
    landsat_shape = geopandas.read_file(f)
landsat_shape["valid_landsat"] = 1

In [None]:
with open("hansen_value_counts.csv", "w") as f:
    f.write("tile_id,all_pixel,with_change\n")

for tile in tiles:
    print(tile)
    ds = xr.open_zarr(f"s3://carbonplan-climatetrace/v0.4/tiles/30m/{tile}_tot.zarr").sel(
        year=slice(2014, 2021)
    )

    # use igbp land cover as a land mask
    lat, lon = utils.get_lat_lon_tags_from_tile_path(tile)
    bounding_box = utils.parse_bounding_box_from_lat_lon_tags(lat, lon)
    igbp = utils.open_global_igbp_data(lat_lon_box=bounding_box)
    land_mask = (igbp.igbp > 0).any(dim="year")
    land_mask = utils.find_matching_records(data=land_mask, lats=ds.lat, lons=ds.lon)
    ds = ds.where(land_mask)

    # use landsat mask
    example = ds.isel(year=0)[["emissions"]].drop("year")
    landsat_mask = regionmask.mask_geopandas(
        landsat_shape, numbers="valid_landsat", lon_or_obj=example.lon, lat=example.lat
    )
    ds = ds.where(landsat_mask == 1)

    with_change = (ds.emissions.sum(dim="year") > 0).astype(int).sum().values
    all_pixel = ds.emissions.isel(year=0).notnull().sum().values

    with open("hansen_value_counts.csv", "a") as f:
        f.write(f"{tile},{all_pixel},{with_change}\n")

In [None]:
df = pd.read_csv("hansen_value_counts.csv")

print("% of pixels with stand replacing disturbances")
print(round(100 * df.with_change.sum() / df.all_pixel.sum(), 2))

## average % of biomass change within pixels experiencing stand replacing disturbances and growth rate


In [None]:
import dask

dask.config.set({"array.slicing.split_large_chunks": False})
from carbonplan_trace.v0.core import compute_grid_area

In [None]:
with open("growth_rate_plus.csv", "w") as f:
    f.write("tile_id,area_summed,pct_change_summed,growth_rate_summed\n")

In [None]:
# this should be combined with the above section

for tile in tiles:
    print(tile)

    # open hansen and crop to landsat area
    # doesn't need land mask because emissions there should be 0
    hansen = xr.open_zarr(f"s3://carbonplan-climatetrace/v0.4/tiles/30m/{tile}_tot.zarr").sel(
        year=slice(2015, 2021)
    )["emissions"]
    example = hansen.isel(year=0).drop("year")
    landsat_mask = regionmask.mask_geopandas(
        landsat_shape, numbers="valid_landsat", lon_or_obj=example.lon, lat=example.lat
    )
    hansen = hansen.where(landsat_mask == 1)
    disturbed = (hansen > 0).any(dim="year")
    if disturbed.lat[0] > disturbed.lat[-1]:
        disturbed = disturbed.reindex(lat=disturbed.lat[::-1])

    # open v1, constrain to hansen emission > 0, and calculate % change of each pixel
    v1 = xr.open_zarr(f"s3://carbonplan-climatetrace/{version}/results/tiles/{tile}.zarr")
    disturbed = disturbed.assign_coords({"lat": v1.lat, "lon": v1.lon})

    v1 = v1.where(disturbed)
    vmax = v1.AGB.max(dim="time")
    vmin = v1.AGB.min(dim="time")
    pct_change = (vmax - vmin) / vmax * 100
    da_area = compute_grid_area(pct_change)
    da_area = da_area.where(disturbed)

    # calculate summary states
    # we want the weighted average of pct change
    # sum of area * pct change divided by sum of area
    area_summed = da_area.sum().values
    pct_change_summed = (da_area * pct_change).sum().values

    # growth rate
    flatline_and_linear_positive = v1.breakpoint.isnull() & (
        v1.AGB.isel(time=-1) >= v1.AGB.isel(time=0)
    )
    growth_rate = (v1.AGB.isel(time=-1) - v1.AGB.isel(time=0)).where(
        flatline_and_linear_positive
    ) / 6.0
    growth_rate_summed = (da_area * growth_rate).sum().values

    with open("growth_rate_plus.csv", "a") as f:
        f.write(f"{tile},{area_summed},{pct_change_summed},{growth_rate_summed}\n")

In [None]:
df = pd.read_csv("growth_rate_plus.csv")

print("pct change within v0 stand replacing change areas")
print(df.pct_change_summed.sum() / df.area_summed.sum())

print("growth rate")
print(df.growth_rate_summed.sum() / df.growth_area_summed.sum())