<img width="100" src="https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png" style="margin-left:0px;margin-top:20px"/>

# Forest Emissions Tracking - Validation

_CarbonPlan ClimateTrace Team_

This notebook compares our estimates of country-level forest emissions to prior estimates from other
groups. The notebook currently compares againsts:

- Global Forest Watch (Zarin et al. 2016)
- Global Carbon Project (Friedlingstein et al. 2020)


In [None]:
import geopandas
import pandas as pd
from io import StringIO
import matplotlib.pyplot as plt
import numpy as np
from carbonplan_styles.mpl import set_theme

set_theme()

In [None]:
axis_name_size = 12

In [None]:
# country shapes from GADM36
countries = geopandas.read_file("s3://carbonplan-climatetrace/inputs/shapes/countries.shp")

# CarbonPlan's emissions
emissions = pd.read_csv("s3://carbonplan-climatetrace/v0.4/country_rollups_emissions.csv")

In [None]:
agb = pd.read_csv("s3://carbonplan-climatetrace/v0.4/country_rollups_agb.csv")

In [None]:
# Input data
# ----------

# GFW emissions
gfw_emissions = pd.read_excel(
    "s3://carbonplan-climatetrace/validation/gfw_global_emissions.xlsx",
    sheet_name="Country co2 emissions",
).dropna(axis=0)
gfw_emissions = gfw_emissions[gfw_emissions["threshold"] == 10]  # select threshold

# rename
gfw_emissions.loc[gfw_emissions.country == "Republic of Congo", "country"] = "Congo"
gfw_emissions.loc[
    gfw_emissions.country == "Bolivia", "country"
] = "Bolivia (Plurinational State of)"
gfw_emissions.loc[gfw_emissions.country == "Brunei", "country"] = "Brunei Darussalam"
gfw_emissions.loc[gfw_emissions.country == "Côte d'Ivoire", "country"] = "Côte dIvoire"
gfw_emissions.loc[gfw_emissions.country == "Laos", "country"] = "Lao Peoples Democratic Republic"
gfw_emissions.loc[gfw_emissions.country == "Swaziland", "country"] = "Eswatini"
gfw_emissions.loc[gfw_emissions.country == "Tanzania", "country"] = "United Republic of Tanzania"
gfw_emissions.loc[
    gfw_emissions.country == "Venezuela", "country"
] = "Venezuela (Bolivarian Republic of)"
gfw_emissions.loc[gfw_emissions.country == "Vietnam", "country"] = "Viet Nam"
gfw_emissions.loc[
    gfw_emissions.country == "Virgin Islands, U.S.", "country"
] = "United States Virgin Islands"
gfw_emissions.loc[gfw_emissions.country == "Zimbabwe", "country"] = "Zimbabwe)"

In [None]:
emissions.groupby("begin_date").sum().mean() / 1e9

In [None]:
# Merge emissions dataframes with countries GeoDataFrame
gfw_countries = countries.merge(gfw_emissions.rename(columns={"country": "name"}), on="name")
trace_countries = countries.merge(emissions.rename(columns={"iso3_country": "alpha3"}), on="alpha3")
agb_countries = countries.merge(agb.rename(columns={"iso3_country": "alpha3"}), on="alpha3")

In [None]:
agb = pd.merge(
    left=agb_countries.rename(columns={"agb": "trace_agb"}),
    right=gfw_countries[["alpha3", "abg_co2_stock_2000__Mg"]].rename(
        columns={"abg_co2_stock_2000__Mg": "gfw_agb_co2"}
    ),
    on="alpha3",
)
agb["trace_agb_co2"] = agb.trace_agb * 0.5 * 3.67

agb["trace_agb_co2"] = agb.trace_agb_co2 / 1e6
agb["gfw_agb_co2"] = agb.gfw_agb_co2 / 1e6

agb = agb[["name", "alpha3", "geometry", "trace_agb_co2", "gfw_agb_co2"]]

In [None]:
# reformat to "wide" format (time x country)
trace_wide = (
    emissions.drop(columns=["end_date"])
    .pivot(index="begin_date", columns="iso3_country")
    .droplevel(0, axis=1)
)
trace_wide.index = pd.to_datetime(trace_wide.index)

gfw_wide = gfw_emissions.set_index("country").filter(regex="whrc_aboveground_co2_emissions_Mg_.*").T
gfw_wide.index = [pd.to_datetime(f"{l[-4:]}-01-01") for l in gfw_wide.index]

gfw_wide.head()

In [None]:
df = pd.read_csv("s3://carbonplan-climatetrace/v0.4/country_rollups_emissions_from_clearing.csv")

In [None]:
df.head()

In [None]:
df.loc[df.iso3_country == "AGO"].tCO2eq / 1e6

## Part 1 - Compare time-averaged country emissions (tropics only)


In [None]:
# Create a new dataframe with average emissions
avg_emissions = countries.set_index("alpha3")
avg_emissions["trace"] = trace_wide.mean().transpose() / 1e6
# avg_emissions["trace"] = trace_wide.loc['2020-01-01']  / 1e6

avg_emissions = avg_emissions.reset_index().set_index("name")
avg_emissions["gfw"] = gfw_wide.mean().transpose() / 1e6
# avg_emissions["gfw"] = gfw_wide.loc['2020-01-01'] / 1e6

avg_emissions = avg_emissions.dropna()

In [None]:
len(avg_emissions)

In [None]:
from sklearn.metrics import r2_score

In [None]:
r2_score(avg_emissions.gfw, avg_emissions.trace)

In [None]:
avg_emissions["me"] = avg_emissions.trace - avg_emissions.gfw
avg_emissions["mae"] = (avg_emissions.trace - avg_emissions.gfw).abs()
avg_emissions["mape"] = (avg_emissions.trace - avg_emissions.gfw).abs() / avg_emissions.gfw * 100
avg_emissions = avg_emissions.replace(np.inf, np.nan)
avg_emissions.mean().round(2)

In [None]:
sub = avg_emissions.loc[(avg_emissions.mape > 1) & (avg_emissions.gfw > 1)]
sub

In [None]:
(avg_emissions.gfw > 1).mean()

In [None]:
top20 = avg_emissions.sort_values(by="mae", ascending=False).head(20)

In [None]:
names = {
    "Democratic Republic of the Congo": "DRC",
    "Lao Peoples Democratic Republic": "Laos",
    "Bolivia (Plurinational State of)": "Bolivia",
    "Côte dIvoire": "Côte d'Ivoire",
    "United Republic of Tanzania": "Tanzania",
    "Viet Nam": "Vietnam",
    "Venezuela (Bolivarian Republic of)": "Venezuela",
}

In [None]:
plt.figure(figsize=(12, 10))

for i, row in top20.reset_index()[["name", "alpha3"]].iterrows():
    plt.subplot(5, 4, i + 1)
    name = row["name"]
    alpha3 = row["alpha3"]
    plt.plot(gfw_wide[name].index, gfw_wide[name].values / 1e6, label="Zarin et al.")
    plt.plot(trace_wide[alpha3].index, trace_wide[alpha3].values / 1e6, label="CarbonPlan")

    plt.xticks(["2001-01-01", "2010-01-01", "2020-01-01"], [2001, 2010, 2020])
    if name in names:
        name = names[name]
    plt.title(name, fontsize=axis_name_size)
    if i > 3:
        plt.ylim(0, 200)
    if i == 8:
        plt.ylabel("Emissions [Mt CO2 / yr]", fontsize=axis_name_size)

ax = plt.gca()
fig = plt.gcf()
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, loc="upper center", ncol=2, bbox_to_anchor=(0.5, 1.03))
plt.tight_layout()
plt.savefig("top20_time_series.png", bbox_inches="tight")
plt.show()
plt.close()

In [None]:
# Scatter Plot
xmin = 1e-6
xmax = 1e4
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot([xmin, xmax], [xmin, xmax], "0.5")
avg_emissions.plot.scatter("gfw", "trace", ax=plt.gca())
plt.gca().set_xscale("log")
plt.gca().set_yscale("log")
plt.ylabel("CarbonPlan [Mt CO$_2$ / yr]", fontsize=axis_name_size)
plt.xlabel("Zarin [Mt CO$_2$ / yr]", fontsize=axis_name_size)
plt.xlim(xmin, xmax)
plt.ylim(xmin, xmax)
plt.title("a) Forest related carbon emissions", fontsize=axis_name_size)

xmin = 1e-4
xmax = 1e6
plt.subplot(1, 2, 2)
plt.plot([xmin, xmax], [xmin, xmax], "0.5")
agb.plot.scatter("gfw_agb_co2", "trace_agb_co2", ax=plt.gca())
plt.gca().set_xscale("log")
plt.gca().set_yscale("log")
plt.ylabel("CarbonPlan [Mt CO$_2$]", fontsize=axis_name_size)
plt.xlabel("Zarin [Mt CO$_2$]", fontsize=axis_name_size)
plt.xlim(xmin, xmax)
plt.ylim(xmin, xmax)
plt.title("b) Forest AGB stock in 2000", fontsize=axis_name_size)

plt.tight_layout()
plt.savefig("gfw_scatter.png")

## Part 2 - Maps of Tropical Emissions


In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [None]:
plt.figure(figsize=(14, 8))
plt.subplot(2, 1, 1)
kwargs = dict(
    legend=True,
    legend_kwds={
        "orientation": "vertical",
        "label": "Emissions [Mt CO$_2$ / yr]",
    },
    lw=0.25,
    cmap="Reds",
    vmin=0,
    vmax=1000,
)
ax = plt.gca()
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="2%", pad=0.2)
avg_emissions.plot("trace", ax=ax, cax=cax, **kwargs)
ax.set_title("Forest related carbon emissions from CarbonPlan", fontsize=axis_name_size)
ax.set_xlabel("Longitude", fontsize=axis_name_size)
ax.set_ylabel("Latitude", fontsize=axis_name_size)

plt.subplot(2, 1, 2)
kwargs = dict(
    legend=True,
    legend_kwds={
        "orientation": "vertical",
        "label": "Emissions Difference [%]",
    },
    lw=0.25,
    cmap="RdBu_r",
    vmin=-20,
    vmax=20,
)
avg_emissions["pdiff"] = (
    (avg_emissions["trace"] - avg_emissions["gfw"]) / avg_emissions["gfw"]
) * 100

ax = plt.gca()
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="2%", pad=0.2)
avg_emissions.plot("pdiff", ax=ax, cax=cax, **kwargs)
ax.set_title("% difference from Zarin", fontsize=axis_name_size)
ax.set_xlabel("Longitude", fontsize=axis_name_size)
ax.set_ylabel("Latitude", fontsize=axis_name_size)

plt.tight_layout()
plt.savefig("gfw_map.png")

## Part 3 - Compare fire emissions


In [None]:
# CarbonPlan's emissions
emissions = {}
versions = ["v0.4"]
for version in versions:
    for mechanism in ["fire"]:
        emissions[version + "-" + mechanism] = pd.read_csv(
            "s3://carbonplan-climatetrace/{}/country_rollups_emissions_from_{}.csv".format(
                version, mechanism
            )
        )

# Blue Sky Fire emissions
emissions["Blue Sky"] = pd.read_csv("forest-fires_bsa.csv")

In [None]:
emissions[f"{version}-fire"]

In [None]:
emissions["Blue Sky"]

In [None]:
version = "v0.4"

comparison = pd.merge(
    emissions[f"{version}-fire"].rename({"tCO2eq": "CarbonPlan"}, axis=1),
    emissions["Blue Sky"].rename({"tCO2": "BSA"}, axis=1),
    how="inner",  # "left",
    left_on=["iso3_country", "begin_date"],
    right_on=["iso3_country", "begin_date"],
)

comparison["BSA"] /= 1e6
comparison["CarbonPlan"] /= 1e6

comparison["year"] = pd.to_datetime(comparison.begin_date).dt.year
comparison["BSA"] = comparison.BSA.fillna(0)

In [None]:
r2_score(comparison.BSA, comparison.CarbonPlan)

In [None]:
(comparison.CarbonPlan - comparison.BSA).mean()

In [None]:
(comparison.CarbonPlan <= comparison.BSA).mean()

In [None]:
len(comparison.iso3_country.unique())

In [None]:
xmin = 1e-4
xmax = 1e4
plt.figure(figsize=(5, 5))
plt.plot([xmin, xmax], [xmin, xmax], "0.5")
comparison.plot.scatter("BSA", "CarbonPlan", ax=plt.gca())
plt.gca().set_xscale("log")
plt.gca().set_yscale("log")
plt.ylabel("CarbonPlan [Mt CO$_2$ / yr]", fontsize=axis_name_size)
plt.xlabel("BSA [Mt CO$_2$ / yr]", fontsize=axis_name_size)
plt.yticks()
plt.xlim(xmin, xmax)
plt.ylim(xmin, xmax)
plt.title("Forest fire emissions", fontsize=axis_name_size)
plt.savefig("bsa_scatter.png", bbox_inches="tight")

In [None]:
avg_yr = comparison.groupby("iso3_country").mean()

xmin = 1e-4
xmax = 1e4
plt.figure(figsize=(5, 5))
plt.plot([xmin, xmax], [xmin, xmax], "0.5")
avg_yr.plot.scatter("BSA", "CarbonPlan", ax=plt.gca())
plt.gca().set_xscale("log")
plt.gca().set_yscale("log")
plt.ylabel("CarbonPlan [Mt CO$_2$ / yr]", fontsize=axis_name_size)
plt.xlabel("BSA [Mt CO$_2$ / yr]", fontsize=axis_name_size)
plt.xlim(xmin, xmax)
plt.ylim(xmin, xmax)
plt.title("Forest fire emissions", fontsize=axis_name_size)
plt.tight_layout()
plt.savefig("bsa_scatter_avg.png")

In [None]:
comparison.head()

In [None]:
comparison.loc[comparison.iso3_country.isin(["RUS", "USA"])]

In [None]:
comparison.loc[comparison.iso3_country.isin(["BRA"])]

In [None]:
emissions["Mt CO2"] = emissions.tCO2eq / 1e6
sub = emissions.loc[(emissions.iso3_country == "LKA"), ["begin_date", "Mt CO2", "iso3_country"]]
sub["year"] = pd.to_datetime(sub.begin_date).dt.year

In [None]:
plt.plot(sub.year, sub["Mt CO2"], "o-")
plt.xticks([2001, 2005, 2010, 2015, 2020], [2001, 2005, 2010, 2015, 2020])
plt.ylabel("Mt CO2")
plt.grid()

In [None]:
sub[["iso3_country", "year", "Mt CO2"]]