In [None]:
%load_ext autoreload
%autoreload 2

import xarray as xr
from matplotlib import cm
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from carbonplan_styles.mpl import set_theme

set_theme()
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error
import numpy as np
import seaborn as sns
import pandas as pd
from carbonplan_trace.tiles import tiles
from carbonplan_trace.v0.core import coarsen_emissions
from carbonplan_data.utils import set_zarr_encoding
import rasterio

# Get comparison data


In [None]:
from carbonplan_trace.v1.landsat_preprocess import access_credentials
import fsspec

access_key_id, secret_access_key = access_credentials()
fs = fsspec.get_filesystem_class("s3")(
    key=access_key_id,
    secret=secret_access_key,
)

In [None]:
version = "v1.2"

In [None]:
log_bucket = f"s3://carbonplan-climatetrace/{version}/changepoint_log2/"
completed_subtiles = fs.ls(log_bucket)
completed_subtiles = [subtile.split("/")[-1].split(".txt")[0] for subtile in completed_subtiles]
len(completed_subtiles)

In [None]:
ds = xr.open_zarr(
    f"s3://carbonplan-climatetrace/{version}/results/global/3000m/raster_biomass.zarr"
)
# average carbonplan data for comparison since all other datasets are ~snapshots
ds = ds.mean(dim="time").compute()
# rename
ds = ds[["AGB"]].rename({"AGB": "CarbonPlan"})
ds = ds.assign_coords({"lat": ds.lat.round(4), "lon": ds.lon.round(4)})

In [None]:
comps = ["Harris", "Spawn", "GEDI", "Xu"]
for name in comps + ["training_biomass"]:
    temp = xr.open_zarr(f"s3://carbonplan-climatetrace/validation/{name.lower()}_3000m_agbd.zarr")
    temp = temp.assign_coords({"lat": ds.lat, "lon": ds.lon})
    if name == "training_biomass":
        ds[name] = temp["biomass"]
    elif name == "Xu":
        temp = temp.drop("spatial_ref").sel(year=slice(2014, 2020)).mean(dim="year").compute()
    else:
        ds[name] = temp["agbd"]

# comparison to Xu dataset at 10km


In [None]:
# xu dataset obtained from https://zenodo.org/record/4161694#.YVToCGZueX2
ds10km = xr.open_rasterio("/home/jovyan/test10a_cd_ab_pred_corr_2000_2019_v2.tif")
ds10km = (
    ds10km.assign_coords({"band": np.arange(2000, 2020)})
    .rename({"band": "time", "y": "lat", "x": "lon"})
    .sel(time=slice(2014, None))
    .to_dataset(name="Xu")
)
if ds10km.lat[0] > ds10km.lat[-1]:
    ds10km = ds10km.reindex(lat=ds10km.lat[::-1])
ds10km = ds10km.sel(lat=slice(-60, 80))

In [None]:
carbonplan = xr.open_zarr(
    f"s3://carbonplan-climatetrace/{version}/results/global/3000m/raster_biomass.zarr"
)[["AGB"]]

coarsening_factor = 4
carbonplan10km = coarsen_emissions(
    carbonplan, factor=coarsening_factor, mask_var="AGB", method="mean"
)
carbonplan10km = carbonplan10km.assign_coords(
    {"lat": ds10km.lat, "lon": ds10km.lon, "time": np.arange(2014, 2021)}
).sel(time=slice(2014, 2020))
ds10km["CarbonPlan"] = carbonplan10km["AGB"]

In [None]:
v0_emission = xr.open_zarr(f"s3://carbonplan-climatetrace/v0.4/global/3000m/raster_tot.zarr")
v0_emission10km = coarsen_emissions(
    v0_emission, factor=coarsening_factor, mask_var="emissions", method="mean"
).rename({"year": "time"})
v0_emission10km = v0_emission10km.assign_coords({"lat": ds10km.lat, "lon": ds10km.lon}).sel(
    time=slice(2014, 2019)
)

In [None]:
# pick an arbitrary high emission value
high_change = (v0_emission10km.emissions > 20000).any(dim="time")
high_change.plot()

In [None]:
lats, lons = np.where(high_change)
len(lats)

In [None]:
vmax = ds10km.max(dim="time")
vmin = ds10km.min(dim="time")
pct_change = (vmax - vmin) / vmax * 100

In [None]:
avg_change = pct_change.where(high_change).mean()
print(avg_change.Xu.values)
print(avg_change.CarbonPlan.values)

In [None]:
import random

# instead of using emission amount to pick pixels, use % of area disturbed to pick

for _ in range(50):
    ind = random.randint(a=0, b=len(lats))
    lat = lats[ind]
    lon = lons[ind]
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    v0_emission10km.emissions.isel(lat=lat, lon=lon).plot(label="emission")
    plt.subplot(1, 2, 2)
    pixel = ds10km.isel(lat=lat, lon=lon).load()
    (pixel.CarbonPlan / pixel.CarbonPlan.max()).plot(label="CarbonPlan")
    (pixel.Xu / pixel.Xu.max()).plot(label="Xu")
    plt.legend()
    plt.show()
    plt.close()

In [None]:
# also load realm
realms = xr.open_zarr("s3://carbonplan-climatetrace/validation/realm_mask.zarr")
realms = realms.assign_coords({"lat": realms.lat.round(4), "lon": realms.lon.round(4)})
ds["realm"] = realms.realm

ar6_mask = xr.open_zarr(f"s3://carbonplan-climatetrace/validation/ar6_mask.zarr")
ar6_mask = ar6_mask.assign_coords({"lat": ar6_mask.lat.round(4), "lon": ar6_mask.lon.round(4)})
ds["ar6_mask"] = ar6_mask.ar6_mask

In [None]:
# mask to land only
land_mask = xr.open_zarr("s3://carbonplan-climatetrace/validation/land_mask.zarr")
land_mask = land_mask.assign_coords({"lat": land_mask.lat.round(4), "lon": land_mask.lon.round(4)})
ds = ds.where(land_mask.land_mask == 0)

# Maps


In [None]:
from cartopy.io import shapereader
import geopandas as gpd


def cartopy_proj_plate_carree():
    return ccrs.PlateCarree()


def cartopy_borders_global():
    states_df = gpd.read_file(
        shapereader.natural_earth("50m", "cultural", "admin_1_states_provinces")
    )
    states = states_df.set_crs(epsg=4326).to_crs(epsg=32662)["geometry"].values

    countries_df = gpd.read_file(shapereader.natural_earth("50m", "cultural", "admin_0_countries"))
    countries = countries_df.set_crs(epsg=4326).to_crs(epsg=32662)["geometry"].values

    return states, countries

In [None]:
import matplotlib as mpl
from mpl_toolkits.axes_grid1 import make_axes_locatable


def map_pretty(ax, title="", min_lat=-90, max_lat=90, min_lon=-180, max_lon=180):
    state_borders, country_borders = cartopy_borders_global()

    ax.add_geometries(
        state_borders,
        facecolor="none",
        edgecolor="k",
        crs=cartopy_proj_plate_carree(),
        linewidth=0.1,
        zorder=0,
    )
    ax.add_geometries(
        country_borders,
        facecolor="none",
        edgecolor="k",
        crs=cartopy_proj_plate_carree(),
        linewidth=0.3,
        zorder=0,
    )
    ax.axis("off")
    ax.set_extent([min_lon, max_lon, min_lat, max_lat])
    ax.text(0.35, 1.05, title, transform=ax.transAxes)


def add_colorbar(
    fig,
    to_plot=None,
    x_location=1.08,
    y_location=0.76,
    height=0.12,
    width=0.018,
    vmin=None,
    vmax=None,
    cbar_label="",
    cmap="viridis",
):

    cax = fig.add_axes([x_location, y_location, width, height])
    cax.text(
        0.5,
        -0.08,
        vmin,
        transform=cax.transAxes,
        horizontalalignment="center",
        verticalalignment="center",
    )
    cax.text(
        0.5,
        1.08,
        vmax,
        transform=cax.transAxes,
        horizontalalignment="center",
        verticalalignment="center",
    )
    cax.text(
        1.8,
        0.5,
        cbar_label,
        transform=cax.transAxes,
        verticalalignment="center",
        multialignment="center",
        rotation=-90,
    )
    if to_plot is not None:
        cbar = fig.colorbar(to_plot, cax=cax, orientation="vertical")
    else:
        norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
        cbar = fig.colorbar(
            mpl.cm.ScalarMappable(norm=norm, cmap=cmap),
            cax=cax,
            orientation="vertical",
        )
    cbar.outline.set_visible(False)
    cbar.set_ticks([])
    return cbar

In [None]:
def plot_map(data, names, titles, plot_params, add_colorbar=False, figsize=(20, 10)):
    plt.figure(figsize=figsize)
    for i, name in enumerate(names):
        print(name)
        if name is None:
            continue
        da = data[name]
        plt.subplot(nrows, ncols, i + 1, projection=cartopy_proj_plate_carree())
        ax = plt.gca()

        if not isinstance(plot_params["cmap"], list):
            cmap = plot_params["cmap"]
            vmin, vmax = plot_params["var_lims"][0], plot_params["var_lims"][1]
            label = plot_params["label"]
        else:
            cmap = plot_params["cmap"][i]
            vmin, vmax = plot_params["var_lims"][i][0], plot_params["var_lims"][i][1]
            label = plot_params["label"][i]

        map_plot = da.plot.imshow(
            ax=ax,
            cmap=cmap,
            vmin=vmin,
            vmax=vmax,
            add_colorbar=add_colorbar,
            cbar_kwargs=None if not add_colorbar else {"label": label, "shrink": 0.5},
            transform=ccrs.PlateCarree(),
        )

        map_pretty(
            ax,
            title=titles[i],
        )

    if not add_colorbar:
        fig = plt.gcf()
        cax = fig.add_axes([1.05, 0.33, 0.03, 0.35])
        cbar = fig.colorbar(map_plot, cax=cax, orientation="vertical")
        cax.text(
            0.5,
            -0.12,
            plot_params["var_lims"][0],
            transform=cax.transAxes,
            horizontalalignment="center",
        )
        cax.text(
            0.5,
            1.05,
            plot_params["var_lims"][1],
            transform=cax.transAxes,
            horizontalalignment="center",
        )
        cax.text(
            1.8,
            0.5,
            plot_params["label"],
            transform=cax.transAxes,
            verticalalignment="center",
            multialignment="center",
            rotation=-90,
        )
        cbar.outline.set_visible(False)
        cbar.set_ticks([])
    plt.tight_layout()

In [None]:
nrows = 3
ncols = 2
names = [None, "CarbonPlan"] + comps
titles = [
    None,
    "CarbonPlan (2014-2020)",
    "Harris (2000)",
    "GlobBiomass (2010)",
    "GEDI (2019-2020)",
    "Xu (2014-2019)",
]
plot_params = {
    "cmap": cm.Greens,
    "var_lims": (0, 400),
    "label": "Aboveground Woody Biomass Density (Mg/ha)",
}

plot_map(data=ds, names=names, titles=titles, plot_params=plot_params)
plt.savefig("biomass_maps.png")
plt.show()
plt.close()

In [None]:
# training biomass difference plot

# diff = xr.Dataset()
# diff['training_biomass'] = ds['training_biomass']
# diff['Harris'] = ds['training_biomass'] - ds['Harris']

# nrows = 2
# ncols = 1
# names = ['training_biomass', 'Harris']
# titles = [
#     "This Study",
#     "Difference to Harris",
# ]
# plot_params = {
#     "cmap": [cm.Greens, cm.RdBu],
#     "var_lims": [(0, 500), (-200, 200)],
#     "label": "Aboveground Woody Biomass Density (Mg/ha)",
# }

# plot_map(data=diff, names=names, titles=titles, plot_params=plot_params, add_colorbar=True)

In [None]:
abs_diff = xr.Dataset()
for name in comps:
    abs_diff[name] = ds["CarbonPlan"] - ds[name]

abs_diff["CarbonPlan"] = ds["CarbonPlan"]

In [None]:
abs_diff

In [None]:
nrows = 3
ncols = 2
names = [None, None] + comps
titles = [
    None,
    None,
    "Difference to Harris",
    "Difference to Spawn",
    "Difference to GEDI",
    "Difference to Xu",
]
plot_params = {
    "cmap": cm.RdBu,
    "var_lims": (-200, 200),
    "label": "Absolute Difference (Mg/ha)",
}

plot_map(
    data=abs_diff,
    names=names,
    titles=titles,
    plot_params=plot_params,
    add_colorbar=True,
    figsize=(10, 15),
)
plt.savefig("biomass_diff_maps.png")
plt.show()
plt.close()

In [None]:
# roll up to 0.5 degree

coarsening_factor = 20
half_degree_diff = coarsen_emissions(
    abs_diff[["Harris", "GlobBiomass", "GEDI"]],
    factor=coarsening_factor,
    mask_var="Harris",
    method="mean",
)
half_degree_diff = half_degree_diff.to_array(dim="variable")
half_degree_diff = half_degree_diff.to_dataset(name="AGB_diff")
half_degree_diff = half_degree_diff.chunk({"lat": -1, "lon": -1, "variable": -1}).rename(
    {"lat": "y", "lon": "x"}
)
half_degree_diff = set_zarr_encoding(
    half_degree_diff, codec_config={"id": "zlib", "level": 1}, float_dtype="float32", int_dtype="i4"
)
half_degree_diff.to_zarr(
    "s3://carbonplan-climatetrace/v1.2/map/v2/half_degree_AGB_diff.zarr",
    mode="w",
    consolidated=True,
)
half_degree_diff

In [None]:
pct_diff = xr.Dataset()
for name in comps:
    pct_diff[name] = 100.0 * (ds["CarbonPlan"] - ds[name]) / ds[name]

In [None]:
nrows = 3
ncols = 2
names = [None, None] + comps
titles = [
    None,
    None,
    "Difference to Harris",
    "Difference to Spawn",
    "Difference to GEDI",
    "Difference to Xu",
]
plot_params = {
    "cmap": cm.RdBu,
    "var_lims": (-200, 200),
    "label": "Percent Difference in Biomass Density (%)",
}

plot_map(data=pct_diff, names=names, titles=titles, plot_params=plot_params)

# Scatter plots


In [None]:
df = ds.to_dataframe()
df["realm"] = df.realm.replace(["nan", "ice"], np.nan)

In [None]:
def subplot_hist(
    ax,
    data,
    x_col_name,
    y_col_name,
    x_name,
    y_name,
    plot_params,
    hue_name=None,
    plot_metrics=True
    #     c="b",
    #     s=0.002,
    #     alpha=0.1,
):
    #     tot = np.hstack((x_col, y_col))
    #     xmax = np.percentile(tot, 99.5)
    xmin = plot_params["xmin"]
    xmax = plot_params["xmax"]
    ymin, ymax = None, None
    try:
        ymin = plot_params["ymin"]
        ymax = plot_params["ymax"]
    except:
        pass
    unit = plot_params["unit"]

    ax.plot([xmin, xmax], [xmin, xmax], "0.5")
    x_col = data[x_col_name].values
    y_col = data[y_col_name].values
    bias = np.mean(y_col - x_col)
    r2 = r2_score(x_col, y_col)
    mae = mean_absolute_error(x_col, y_col)

    #     ind = (xcol < xmax) & (ycol < xmax)
    sub = data.loc[(data[x_col_name] < xmax) & (data[y_col_name] < xmax)]
    sns.histplot(
        data=sub, ax=ax, x=x_col_name, y=y_col_name, hue=hue_name, bins=100, pthresh=0.01, pmax=0.3
    )
    ax.text(plot_params["text_x"], xmax * 0.9, f"bias = {round(bias, 2)} {unit}")
    ax.text(plot_params["text_x"], xmax * 0.81, f"MAE = {round(mae, 2)} {unit}")
    ax.text(plot_params["text_x"], xmax * 0.72, f"R$^2$ = {round(r2, 2)}")
    if unit != "":
        unit_str = f"({unit})"
    else:
        unit_str = ""
    ax.set_xlabel(f"Biomass from {x_name} {unit_str}")
    ax.set_ylabel(f"Biomass from {y_name} {unit_str}")
    ax.set_xlim(xmin, xmax)
    if ymin is not None:
        ax.set_ylim(ymin, ymax)
    else:
        ax.set_ylim(xmin, xmax)
    if xmax > 100:
        step = 100
    else:
        step = 1

    #     ticks = np.arange(0, xmax, step)
    #     ax.set_xticks(ticks)
    #     ax.set_yticks(ticks)
    return {"r2": r2, "bias": bias, "mae": mae}

In [None]:
plot_params = {
    "xmin": -10,
    "xmax": 410,
    "unit": "Mg/ha",
    "text_x": 10,
    #     "text_y1": 450,
    #     "text_y2": 420,
    #     "text_y3": 390,
    #     "ticks": np.arange(0, 510, 100),
}

In [None]:
nonull = df.dropna()

In [None]:
y = nonull.loc[nonull.Harris < 1000].CarbonPlan.values
x = nonull.loc[nonull.Harris < 1000].Harris.values

In [None]:
r2_score(x, y)

In [None]:
fig, axarr = plt.subplots(nrows=1, ncols=4, figsize=(15, 5))
out_stats = {}
for i, name in enumerate(comps):
    sub = df[["CarbonPlan", name]].dropna(how="any")
    out_stats[name] = subplot_hist(
        ax=axarr[i],
        data=sub,
        x_col_name=name,
        y_col_name="CarbonPlan",
        x_name=name,
        y_name="CarbonPlan",
        plot_params=plot_params,
    )
plt.tight_layout()
plt.savefig("biomass_comparison_scatter.png")
plt.show()
plt.close()

In [None]:
import json

json.dumps(out_stats)

In [None]:
with fs.open("s3://carbonplan-climatetrace/v1.2/map/v2/diff_metrics.json", "w") as f:
    json.dump(out_stats, f)

In [None]:
fig, axarr = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))
for i, name in enumerate(comps):
    sub = df[["CarbonPlan", name, "realm"]].dropna(how="any")
    subplot_hist(
        ax=axarr[i],
        data=sub,
        x_col_name=name,
        y_col_name="CarbonPlan",
        x_name=name,
        y_name="CarbonPlan",
        plot_params=plot_params,
        hue_name="realm",
    )
plt.tight_layout()
plt.show()
plt.close()

In [None]:
realms = df.realm.dropna().unique()

In [None]:
fig, axarr = plt.subplots(nrows=6, ncols=4, figsize=(15, 30))
for i, realm in enumerate(realms):
    for j, name in enumerate(comps):
        sub = df.loc[df.realm == realm][["CarbonPlan", name]].dropna(how="any")
        ycol = sub["CarbonPlan"].values
        xcol = sub[name].values
        subplot_hist(
            ax=axarr[i, j],
            x_col=xcol,
            y_col=ycol,
            x_name=name,
            y_name="CarbonPlan",
            plot_params=plot_params,
        )
        if j == 1:
            axarr[i, j].set_title(realm)
plt.tight_layout()
plt.show()
plt.close()

In [None]:
def subplot_kde(
    ax, data, x_col_name, y_col_name, hue_name, x_name, y_name, plot_params, sample_size=int(1e6)
):
    xmin = plot_params["xmin"]
    xmax = plot_params["xmax"]
    unit = plot_params["unit"]

    ax.plot([xmin, xmax], [xmin, xmax], "0.5")
    x_col = data[x_col_name].values
    y_col = data[y_col_name].values
    bias = np.mean(ycol - xcol)
    r2 = r2_score(x_col, y_col)
    mae = mean_absolute_error(x_col, y_col)

    sub = data.loc[(data[x_col_name] < xmax) & (data[y_col_name] < xmax)]
    if len(sub) > sample_size:
        sub = sub.sample(sample_size)
    sns.kdeplot(data=sub, x=x_col_name, y=y_col_name, hue="realm", levels=7, linewidth=0.5, ax=ax)
    ax.text(plot_params["text_x"], xmax * 0.9, f"bias = {round(bias, 2)} {unit}")
    ax.text(plot_params["text_x"], xmax * 0.81, f"MAE = {round(mae, 2)} {unit}")
    ax.text(plot_params["text_x"], xmax * 0.72, f"R$^2$ = {round(r2, 2)}")
    if unit != "":
        unit_str = f"({unit})"
    else:
        unit_str = ""
    ax.set_xlabel(f"Biomass from {x_name} {unit_str}")
    ax.set_ylabel(f"Biomass from {y_name} {unit_str}")
    ax.set_xlim(xmin, xmax)
    ax.set_ylim(xmin, xmax)
    if xmax > 100:
        step = 100
    else:
        step = 1


#     ticks = np.arange(0, xmax, step)
#     ax.set_xticks(ticks)
#     ax.set_yticks(ticks)

In [None]:
fig, axarr = plt.subplots(nrows=1, ncols=4, figsize=(15, 5))
for i, name in enumerate(comps):
    sub = df[["CarbonPlan", name, "realm"]].dropna(how="any")

    subplot_kde(
        ax=axarr[i],
        data=sub,
        x_col_name=name,
        y_col_name="CarbonPlan",
        hue_name="realm",
        x_name=name,
        y_name="CarbonPlan",
        plot_params=plot_params,
    )
plt.tight_layout()
plt.show()
plt.close()

### Training data comparison


In [None]:
plt.figure(figsize=(6, 6))
sub = df[["training_biomass", "Harris", "realm"]].dropna(how="any")
ax = plt.gca()
subplot_kde(
    ax=ax,
    data=sub,
    x_col_name="Harris",
    y_col_name="training_biomass",
    hue_name="realm",
    x_name=name,
    y_name="this study",
    plot_params=plot_params,
    sample_size=int(1e5),
)
ax.legend_.set_bbox_to_anchor((1.2, 0.6))
plt.tight_layout()
plt.show()
plt.close()

In [None]:
plt.figure(figsize=(6, 6))
sub = df[["training_biomass", "Harris", "realm"]].dropna(how="any")
ax = plt.gca()
subplot_hist(
    ax=ax,
    data=sub,
    x_col_name="Harris",
    y_col_name="training_biomass",
    x_name="Harris",
    y_name="This study",
    plot_params=plot_params,
    hue_name="realm",
)
plt.tight_layout()
plt.show()
plt.close()

In [None]:
import regionmask

sub = df[["training_biomass", "Harris", "ar6_mask"]].dropna(how="any")
ar6_names = regionmask.defined_regions.ar6.all
ar6_nums = sub.ar6_mask.dropna().astype(int).unique()
ar6_mapping = {i: ar6_names[i].name for i in ar6_nums}
sub["ar6_region"] = sub.ar6_mask.apply(lambda x: ar6_mapping[x])

In [None]:
len(ar6_nums)

In [None]:
xmin = plot_params["xmin"]
xmax = plot_params["xmax"]
unit = plot_params["unit"]


plt.figure(figsize=(16, 17))
for i, num in enumerate(sorted(ar6_mapping)):
    plt.subplot(8, 7, i + 1)
    to_plot = sub.loc[sub.ar6_mask == num]
    plt.title(ar6_mapping[num])
    ax = plt.gca()
    sns.histplot(
        data=to_plot, ax=ax, x="Harris", y="training_biomass", bins=100, pthresh=0.05, pmax=0.7
    )
    n = len(to_plot)
    ax.text(plot_params["text_x"], xmax * 0.85, f"n = {round(n, 2)}")
    ax.set(xlabel=None, ylabel=None)
    plt.plot([xmin, xmax], [xmin, xmax], "0.5")
    plt.xlim(xmin, xmax)
    plt.ylim(xmin, xmax)

plt.tight_layout()
plt.savefig("ar6_region_comparison_training.png")
plt.show()
plt.close()

In [None]:
sub["diff"] = sub["training_biomass"] - sub["Harris"]

plt.figure(figsize=(12, 5))
sns.boxplot(ax=plt.gca(), x="ar6_region", y="diff", data=sub, whis=1.5, width=0.6)
plt.ylim(-200, 200)
plt.plot([-0.5, 51], [0, 0], "0.5")
plt.xlim(-0.5, 51)
plt.xticks(rotation=90)
plt.xlabel("AR6 Regions")
plt.ylabel("Biomass Density Difference")
plt.savefig("ar6_region_comparison_boxplot_training.png")
plt.show()
plt.close()

# country level comparisons


In [None]:
df = pd.read_csv(f"s3://carbonplan-climatetrace/{version}/country_rollups_AGB.csv")
df = df.groupby("iso3_country").agbd.mean().reset_index()
df = df.rename(columns={"agbd": "CarbonPlan"})

In [None]:
comps = ["Harris", "Spawn", "GEDI", "Xu"]
for name in comps:
    temp = pd.read_csv(
        f"s3://carbonplan-climatetrace/validation/{name.lower()}_country_rollups_agbd.csv"
    )
    temp = temp.rename(columns={"agbd": name})
    df = df.merge(temp, on=["iso3_country"])

In [None]:
df.head()

In [None]:
plt.figure(figsize=(5, 5))
for name in comps:
    plt.scatter(df[name].values, df.CarbonPlan.values, label=name, s=11)

xmin = -10
xmax = 350
plt.plot([xmin, xmax], [xmin, xmax], "0.5")
plt.legend()

plt.xlim(xmin, xmax)
plt.ylim(xmin, xmax)
plt.xlabel("Average AGBD from other studies (Mg/ha)")
plt.ylabel("Average AGBD from CarbonPlan (Mg/ha)")
plt.show()
plt.close()

In [None]:
df.loc[(df.CarbonPlan < 10) & (df.Harris > 50)]

In [None]:
df.loc[(df.CarbonPlan < 10) & (df.Spawn > 50)]

# Compare with Xu 2021 to assess interannual biomass changes and emissions


In [None]:
ds_time_varying = xr.Dataset()

In [None]:
temp = xr.open_zarr(
    f"s3://carbonplan-climatetrace/{version}/results/global/3000m/raster_biomass.zarr"
)

In [None]:
ds_time_varying["CarbonPlan"] = temp.rename({"time": "year"}).assign_coords(
    {"year": np.arange(2014, 2021)}
)["AGB"]

In [None]:
temp = xr.open_zarr("s3://carbonplan-climatetrace/validation/xu_3000m_agbd.zarr/")
ds_time_varying["Xu"] = temp["agbd"]

In [None]:
cm = plt.get_cmap("RdBu")
cm.set_bad("lightgray")

In [None]:
delta_biomass_absolute = ds_time_varying - ds_time_varying.shift(year=1)

In [None]:
delta_biomass_percentage = delta_biomass_absolute / ds_time_varying.shift(year=1) * 100

In [None]:
delta_ds = (
    xr.concat([delta_biomass_absolute, delta_biomass_percentage], dim="metric")
    .assign_coords({"metric": ["Mg/ha", "%"]})
    .sel(year=slice(2015, 2020))
    .compute()
)

In [None]:
# mask to land only
delta_ds = delta_ds.where(land_mask.land_mask == 0).compute()
delta_ds["realm"] = realms.realm

In [None]:
delta_df = delta_ds.sel(metric="Mg/ha").to_dataframe()

In [None]:
delta_df["realm"] = delta_df.realm.replace(["nan", "ice"], np.nan)

In [None]:
delta_df = delta_df[["CarbonPlan", "Xu", "realm"]].dropna(how="any")

### for small changes, xu's change is much higher


In [None]:
plot_params = {"xmin": -5, "xmax": 5, "unit": "Mg/ha", "text_x": 5, "ymin": -1, "ymax": 1}

In [None]:
fig, axarr = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))
for i, name in enumerate(["Xu"]):
    sub = delta_df[["CarbonPlan", name]].loc[
        delta_df["CarbonPlan"].between(-1, 1) & delta_df["Xu"].between(-5, 5)
    ]
    ycol = sub["CarbonPlan"].values
    xcol = sub[name].values
    subplot_hist(
        ax=axarr,
        x_col=xcol,
        y_col=ycol,
        x_name=name,
        y_name="CarbonPlan",
        plot_params=plot_params,
        plot_metrics=False,
    )
plt.tight_layout()
plt.show()
plt.close()

In [None]:
# our stdev is much lower than Xu's

In [None]:
ds_time_varying["CarbonPlan"].std(dim="year").plot(robust=True)

In [None]:
ds_time_varying["Xu"].std(dim="year").plot(robust=True)

# assess emissions from Xu 2021 and compare with v0 and v1


In [None]:
ds_10km = xr.open_zarr("s3://carbonplan-climatetrace/v1.2/results/global/10km/raster_biomass.zarr")
ds_10km = ds_10km.rename({"AGB": "CarbonPlan"}).rename({"x": "lon", "y": "lat"}).drop("spatial_ref")
xu = xr.open_zarr("s3://carbonplan-climatetrace/validation/xu_10km_agbd.zarr/")["agbd"]
ds_10km["Xu"] = xu.assign_coords({"lat": ds_10km.lat.values, "lon": ds_10km.lon.values})
ds_10km = ds_10km.chunk({"year": -1, "lat": 400, "lon": 400}).load()

In [None]:
v0_emissions = xr.open_zarr("s3://carbonplan-climatetrace/v0.4/global/3000m/raster_tot.zarr/")

In [None]:
v0_emissions = (
    v0_emissions.sel(year=slice(2014, 2020))
    .rio.write_crs("EPSG:4326")
    .rename({"lat": "y", "lon": "x"})
    .transpose("year", "y", "x")
)

v0_10km = v0_emissions.rio.reproject_match(
    ds_10km.sel(year=2015).rename({"lat": "y", "lon": "x"}),
    resampling=rasterio.enums.Resampling.sum,
)

In [None]:
xu_emissions = xr.Dataset()
mechanisms = ["deforestation", "degradation", "fireforest", "firenonforest"]
for variable in mechanisms:
    xu_emissions[variable] = xr.open_rasterio(
        f"s3://carbonplan-climatetrace/validation/xu2021/{variable}_emission_0119_v2_inTg.tif"
    )

In [None]:
xu_emissions = xu_emissions.rename({"band": "year", "x": "lon", "y": "lat"}).assign_coords(
    {"year": np.arange(2001, 2020)}
)

In [None]:
xu_emissions *= 3.67 * 1e12 / 1e6  # xu reports emissions in Tg C so we convert to tCO2

In [None]:
v1_emissions = xr.open_zarr(
    "s3://carbonplan-climatetrace/v1.2/results/global/3000m/raster_split.zarr"
)

In [None]:
v1_emissions = (
    v1_emissions.sel(year=slice(2014, 2020))
    .rio.write_crs("EPSG:4326")
    .rename({"lat": "y", "lon": "x"})
    .transpose("year", "y", "x")
)
v1_10km = v1_emissions.rio.reproject_match(
    ds_10km.sel(year=2015).rename({"lat": "y", "lon": "x"}),
    resampling=rasterio.enums.Resampling.sum,
)

### for a regional subset look at how the emissions differ among v0, v1, and Xu


In [None]:
subset = {"lat": slice(39.7, 38.9), "lon": slice(-123.3, -122.6)}  # mendocino
# subset = {'lat': slice(34.9,34.2), 'lon': slice(-120.2, -119.5)} #thomas

In [None]:
v0_10km.sel({"x": subset["lon"], "y": subset["lat"]}).emissions.plot(col="year", vmax=1.25e6)

In [None]:
for mechanism in mechanisms:
    xu_emissions[mechanism].sel(subset).sel(year=slice(2014, 2019)).plot(col="year", vmax=1.25e6)

In [None]:
(v1_10km["emissions_from_fire"] + v1_10km["emissions_from_clearing"]).sel(
    {"x": subset["lon"], "y": subset["lat"]}
).sel(year=slice(2014, 2019)).plot(col="year", vmax=1.25e6)

### for some sample gridcells look at how the biomass in xu fluctuates and the resulting emissions


In [None]:
comparison_cells = [(52.55, 107.65), (-15.95, -62.45), (0.35, 111.55), (63.25, 128.75)]
fig, axarr = plt.subplots(nrows=2, ncols=4, figsize=(20, 4), sharex=True)
for i, (lat, lon) in enumerate(comparison_cells):
    for mechanism in mechanisms:
        ds_10km.rename({"Xu": "Biomass [t/ha]"})["Biomass [t/ha]"].sel(year=slice(2014, 2019)).sel(
            lat=lat, lon=lon, method="nearest"
        ).plot(ax=axarr[0, i], label=mechanism)

        xu_emissions[mechanism].sel(year=slice(2014, 2019)).sel(
            lat=lat, lon=lon, method="nearest"
        ).plot(ax=axarr[1, i], label=mechanism)
        axarr[1, i].set_ylabel("Emissions [tCO2]")
plt.legend()
plt.tight_layout()

# change point detection validation


1. First randomly select from the 280 tiles, then within that tile randomly select a pixel
2. If that pixel is all null, discard and repeat from step 1, do not increment pixel counter
3. If that pixel is not null, plot the fillna version and the smoothed version,
4. Increment pixel counter for with/without break point
5. Repeat until we get 100 points for each


In [None]:
import geopandas as gpd

shapes_file = "s3://carbonplan-climatetrace/inputs/shapes/countries.shp"
shapes_df = gpd.read_file(shapes_file)

In [None]:
shapes_df["country_area"] = shapes_df.geometry.area

In [None]:
bounds = shapes_df.geometry.bounds

In [None]:
for col in bounds.columns:
    shapes_df[col] = bounds[col]

In [None]:
df = df.merge(
    shapes_df[["alpha3", "country_area"] + list(bounds.columns)],
    left_on="iso3_country",
    right_on="alpha3",
)

In [None]:
df.loc[(df.CarbonPlan == 0)].sort_values(by="country_area", ascending=False)