In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import statsmodels.formula.api as smf

from sklearn.metrics import mean_squared_error


from carbonplan_retro.data import cat
from carbonplan_retro.analysis import rfia

from carbonplan import styles

styles.mpl.set_theme(style="carbonplan_light", font_scale=1.5)
colors = styles.colors.light

In [None]:
retro_json = cat.retro_db_light_json.read()

In [None]:
used_assessment_areas = list(
    set(
        [
            assessment_area["code"]
            for project in retro_json
            for assessment_area in project["assessment_areas"]
        ]
    )
)

In [None]:
arb_numbers = pd.read_csv("~/lost+found/2015_aa_lut.csv")

In [None]:
assessment_areas = arb_numbers.set_index("aa_code")["site_class"].to_dict()

rfia_results = []
for row in arb_numbers.itertuples():
    if row.aa_code in used_assessment_areas:
        try:
            rfia_data = rfia.load_rfia_data(row.aa_code, site_class=row.site_class)
        except:
            print(row.aa_code)
            raise
        cp_per_inventory = rfia_data.groupby("YEAR").apply(rfia.get_rfia_slag_co2e_acre)

        median_cp = cp_per_inventory.loc[
            (cp_per_inventory.index <= 2011) & (cp_per_inventory.index >= 2010)
        ].median()
        if np.isnan(median_cp):
            # when stitch evals across states/supersections, some old inventories are missing
            # specifically assessment areas: 82, 93, 94, 228, 229 [WA/OR/ID mix?]
            # in this instance -- and only in this instance -- look to more recent inventorys
            median_cp = cp_per_inventory.loc[(cp_per_inventory.index >= 2010)].median()
        rfia_results.append((row.aa_code, row.site_class, median_cp))

    rfia_df = pd.DataFrame(rfia_results, columns=["aa_code", "site_class", "rfia"])
    comparison = arb_numbers.join(
        rfia_df.set_index(["aa_code", "site_class"]), on=["aa_code", "site_class"]
    )

In [None]:
units = "(tCO2e acre$^{-1}$)"

In [None]:
sns.displot(
    data=comparison[["rfia", "common_practice"]].dropna(), palette=["g", "k"], bins=25, alpha=0.25
)

In [None]:
comparison = comparison.dropna()
params = smf.ols("common_practice~rfia", data=comparison).fit().params
rmse = mean_squared_error(comparison["common_practice"], comparison["rfia"]) ** 0.5

In [None]:
r2 = (comparison.corr() ** 2)["common_practice"]["rfia"]

In [None]:
r2

In [None]:
g = sns.FacetGrid(data=comparison, height=4.5)
g.map(plt.scatter, "rfia", "common_practice", s=175, alpha=0.5, color=".3")

EXTENT = 215
ticks = np.arange(0, 201, 50)
plt.xlim(0, EXTENT)
plt.ylim(0, EXTENT)
plt.yticks(ticks)
plt.xticks(ticks)

plt.plot((0, EXTENT), (0, EXTENT), lw=3, ls="--", color="r")

plt.ylabel(f"ARB Common Practice\n{units}")
plt.xlabel(f"rFIA Common Practice\n{units}")

g.axes[0][0].annotate(
    f"R$^{2}$: {r2:.2f}\nSlope: {params['rfia']:.2f}\nRMSE: {rmse:.2f}",
    xy=(0.6, 0.25),
    xycoords="figure fraction",
)

In [None]:
store = {}
for project in retro_json:
    slag_store = []
    for assessment_area in project["assessment_areas"]:
        if assessment_area["code"] != 999:

            aa_slag = comparison.loc[
                (comparison["aa_code"] == assessment_area["code"])
                & (comparison["site_class"] == assessment_area["site_class"]),
                "rfia",
            ].mean()
            if np.isnan(aa_slag):

                # used to have `all` but got split in 2015
                if assessment_area["site_class"] == "all":
                    aa_slag = comparison.loc[
                        (comparison["aa_code"] == assessment_area["code"]), "rfia"
                    ].mean()

                # used to be split but got combined in 2015
                else:
                    aa_slag = comparison.loc[
                        (comparison["aa_code"] == assessment_area["code"])
                        & (comparison["site_class"] == "all"),
                        "rfia",
                    ].mean()

            if np.isnan(aa_slag):
                raise
            weighted_slag = aa_slag * assessment_area["acreage"] / project["acreage"]
            slag_store.append(weighted_slag)

            store[project["opr_id"]] = sum(slag_store)

In [None]:
cp = {project["opr_id"]: project["carbon"]["common_practice"]["value"] for project in retro_json}

per_proj_cp = pd.concat(
    [pd.Series(store).rename("ours"), pd.Series(cp).rename("theirs")], axis=1
).dropna()

per_proj_r2 = per_proj_cp.corr()["ours"]["theirs"]

per_proj_rmse = mean_squared_error(per_proj_cp["ours"], per_proj_cp["theirs"]) ** 0.5

In [None]:
g = sns.FacetGrid(data=per_proj_cp, height=4.5)
g.map(plt.scatter, "ours", "theirs", s=175, alpha=0.5, color=".3")

EXTENT = 215
ticks = np.arange(0, 201, 50)
plt.xlim(0, EXTENT)
plt.ylim(0, EXTENT)
plt.yticks(ticks)
plt.xticks(ticks)

plt.plot((0, EXTENT), (0, EXTENT), lw=3, ls="--", color="r")

plt.ylabel(f"Project Reported\nCommon Practice\n{units}")
plt.xlabel(f"Project Recalculated\nCommon Practice\n{units}")

g.axes[0][0].annotate(
    f"R$^{2}$: {per_proj_r2:.2f}\nRMSE: {per_proj_rmse:.2f}",
    xy=(0.6, 0.35),
    xycoords="figure fraction",
)
fn = "/home/jovyan/pub-figs/per_project_cp_compare.pdf"
plt.savefig(fn, dpi=300, bbox_inches="tight")

In [None]:
(
    (per_proj_cp["ours"] - per_proj_cp["theirs"])
    / ((per_proj_cp["ours"] + per_proj_cp["theirs"]) / 2)
).describe()

In [None]:
store = {}
for project in retro_json:
    slag_store = []
    for assessment_area in project["assessment_areas"]:
        if assessment_area["code"] != 999:

            aa_slag = comparison.loc[
                (comparison["aa_code"] == assessment_area["code"]), "rfia"
            ].mean()
            if np.isnan(aa_slag):

                # used to have `all` but got split in 2015
                if assessment_area["site_class"] == "all":
                    aa_slag = comparison.loc[
                        (comparison["aa_code"] == assessment_area["code"]), "rfia"
                    ].mean()

                # used to be split but got combined in 2015
                else:
                    aa_slag = comparison.loc[
                        (comparison["aa_code"] == assessment_area["code"])
                        & (comparison["site_class"] == "all"),
                        "rfia",
                    ].mean()

            if np.isnan(aa_slag):
                raise
            weighted_slag = aa_slag * assessment_area["acreage"] / project["acreage"]
            slag_store.append(weighted_slag)

            store[project["opr_id"]] = sum(slag_store)

In [None]:
cp = {project["opr_id"]: project["carbon"]["common_practice"]["value"] for project in retro_json}

per_proj_cp = pd.concat(
    [pd.Series(store).rename("ours"), pd.Series(cp).rename("theirs")], axis=1
).dropna()

per_proj_r2 = per_proj_cp.corr()["ours"]["theirs"]

per_proj_rmse = mean_squared_error(per_proj_cp["ours"], per_proj_cp["theirs"]) ** 0.5

In [None]:
g = sns.FacetGrid(data=per_proj_cp, height=4.5)
g.map(plt.scatter, "ours", "theirs", s=175, alpha=0.5, color=".3")

EXTENT = 215
ticks = np.arange(0, 201, 50)
plt.xlim(0, EXTENT)
plt.ylim(0, EXTENT)
plt.yticks(ticks)
plt.xticks(ticks)

plt.plot((0, EXTENT), (0, EXTENT), lw=3, ls="--", color="r")

plt.ylabel(f"Project Reported\nCommon Practice\n{units}")
plt.xlabel(f"Project Recalculated\nCommon Practice\n{units}")

g.axes[0][0].annotate(
    f"R$^{2}$: {per_proj_r2:.2f}\nRMSE: {per_proj_rmse:.2f}",
    xy=(0.6, 0.35),
    xycoords="figure fraction",
)
fn = "/home/jovyan/pub-figs/per_project_cp_compare_no_site_class.pdf"
plt.savefig(fn, dpi=300, bbox_inches="tight")