In [None]:
import os
import json
import fsspec

import numpy as np
import pandas as pd

from carbonplan_retro.data import cat
from carbonplan_retro.load.issuance import load_issuance_table, ifm_opr_ids

In [None]:
df = load_issuance_table(forest_only=False)
retro_json = cat.retro_db_light_json.read()

In [None]:
total_arbocs = df["allocation"].sum()
display(total_arbocs)

## Total Value


In [None]:
price_per_arboc = 13.67

display(price_per_arboc * total_arbocs)

### figure 3a


In [None]:
def subset_stats(criteria):
    nunique_opr_ids = df[criteria]["opr_id"].nunique()
    total_arbocs = (df[criteria]["allocation"].sum() / 1_000_000).round(1)
    return (nunique_opr_ids, total_arbocs)


upfront_opr_ids = [
    project["opr_id"]
    for project in retro_json
    if project["carbon"]["initial_carbon_stock"]["value"]
    > project["carbon"]["common_practice"]["value"]
]
compliance_opr_ids = [project["opr_id"] for project in retro_json]

subsets = {
    "all": np.tile(True, len(df)),
    "all_forest": df["project_type"] == "forest",
    "compliance_ifm": (df["opr_id"].isin(ifm_opr_ids)) & (df["Early Action/ Compliance"] == "COP"),
    "non_graduated_compliance_ifms": (df["opr_id"].isin(compliance_opr_ids))
    & (df["Early Action/ Compliance"] == "COP"),
    "upfront_ifm": (df["opr_id"].isin(upfront_opr_ids)) & (df["arb_rp_id"].isin(["A"])),
}
{k: subset_stats(v) for k, v in subsets.items()}

## Carbon stocks for SC by species


In [None]:
sc_data = cat.rfia_all(assessment_area_id=297).read()

sc_data = sc_data[sc_data["YEAR"] == 2010].copy()  # use 2010 because comporable to CP data


sc_data["CARB_ACRE"] = sc_data["CARB_ACRE"] * 44 / 12 * 0.907185

In [None]:
fortyps_of_interest = {"Douglas fir": 201, "Ponderosa pine": 221, "Tanoak": 941}

In [None]:
standing_carbon = {}
for k, v in fortyps_of_interest.items():
    standing_carbon[k] = sc_data.loc[sc_data["FORTYPCD"] == v, "CARB_ACRE"].item()
display(standing_carbon)

In [None]:
sc_data[["FORTYPCD", "CARB_ACRE", "nPlots_TREE"]]

### Coastal Alaska


In [None]:
import pandas as pd

In [None]:
# ak has three assessment areas but lets summarize across all to report inline value
ak_assessment_areas = [285, 286, 287]
ak_all = pd.concat([cat.rfia_all(assessment_area_id=aa_id).read() for aa_id in ak_assessment_areas])

ak_all = ak_all[ak_all["YEAR"] == 2013].copy()  # 2013 to match what used in CP

sums = ak_all.groupby("FORTYPCD").sum()[["CARB_TOTAL", "AREA_TOTAL", "nPlots_TREE"]]

In [None]:
ak_forests_of_interest = {
    "Sitka sprice": 305,
    "Western hemlock": 301,
    "Cottonwood": 703,
    "Paper birch": 902,
}

In [None]:
ak_carb_acre = sums["CARB_TOTAL"] / sums["AREA_TOTAL"] * 44 / 12 * 0.907185

In [None]:
sums

In [None]:
{k: ak_carb_acre[v].round(1) for k, v in ak_forests_of_interest.items()}

### Project Examples


In [None]:
with fsspec.open(
    # carbonplan-retro/remapping/
    "az://carbonplan-retro/results/reclassification-crediting-error.json",
    account_key=os.environ["BLOB_ACCOUNT_KEY"],
    account_name="carbonplan",
    mode="r",
) as f:
    reclass_credit = json.load(f)

In [None]:
np.median(reclass_credit[project["opr_id"]]["delta_arbocs"])

In [None]:
def summarize_project(project):
    crediting_error = np.percentile(
        reclass_credit[project["opr_id"]]["delta_arbocs"], q=[5, 50, 95]
    ).round(1)

    as_frac = crediting_error / project["arbocs"]["calculated"]
    print(f"{project['opr_id']} has a {crediting_error[1]} crediting error ({as_frac[1].round(3)})")
    print(f"CI: {crediting_error[0]}, {crediting_error[2]}")
    print(f"% CI: {as_frac[0].round(3)}, {as_frac[2].round(3)}")

In [None]:
opr_id = "ACR189"
project = [x for x in retro_json if x["opr_id"] == opr_id][0]

summarize_project(project)

In [None]:
opr_id = "ACR361"
project = [x for x in retro_json if x["opr_id"] == opr_id][0]

summarize_project(project)

Old values: 839,636 tCO2e (95% CI: 155,996 to 1,482,684 tCO2e) or 35.3% of its total credits (95%
CI: 6.5% to 62.4%).


In [None]:
opr_id = "CAR1183"
project = [x for x in retro_json if x["opr_id"] == opr_id][0]

summarize_project(project)

### Ecosection combos


In [None]:
data = pd.read_csv("~/rfia/processed_data/297_by_ecosection.csv")
data = data[data["YEAR"] == 2010]  # match CP calculation period

In [None]:
data["ECOSECTION"] = data["ECOSUBCD"].str[:-1].str.strip()


lst = ["M261A", "M261B", "M261D"]  # ['263A']# ["M221A","M221B","M221C"] #['263A']#
subset = data[(data["CARB_TOTAL"] > 0) & (data["ECOSECTION"].isin(lst))].copy()

In [None]:
sums = subset.groupby(["site", "ECOSECTION"])[["CARB_TOTAL", "AREA_TOTAL"]].sum()
slag = sums["CARB_TOTAL"] / sums["AREA_TOTAL"] * 44 / 12 * 0.907185

In [None]:
ss_average = subset.groupby(["site"])[["CARB_TOTAL", "AREA_TOTAL"]].sum()

In [None]:
ss_slag = ss_average["CARB_TOTAL"] / ss_average["AREA_TOTAL"] * 44 / 12 * 0.907185

In [None]:
slag.unstack(1).round(1)

In [None]:
ss_slag

In [None]:
((ss_slag - slag) / slag).unstack(0)

# Methods


## Classifier


In [None]:
with fsspec.open(
    "az://carbonplan-retro/reclassification/classifier_fscores.json",
    account_name="carbonplan",
    mode="r",
    account_key=os.environ["BLOB_ACCOUNT_KEY"],
) as f:
    fscores = json.load(f)

In [None]:
np.median([scores[0] for ss_name, scores in fscores.items()])

In [None]:
table = (
    pd.Series({ss_name: round(scores[0], 2) for ss_name, scores in fscores.items()})
    .sort_values(ascending=False)
    .rename("Weighted F-Score")
    .to_frame()
)

In [None]:
table.index = table.index.set_names("Superseciton")

In [None]:
table

## Common Practice Stuff


In [None]:
retro_json[0]

In [None]:
projects = [
    x
    for x in retro_json
    if x["carbon"]["initial_carbon_stock"]["value"] > x["carbon"]["common_practice"]["value"]
]

In [None]:
cp_df = pd.DataFrame(
    [
        (
            project["carbon"]["average_slag_baseline"]["value"],
            project["carbon"]["common_practice"]["value"],
        )
        for project in projects
    ],
    columns=["baseline", "cp"],
)

In [None]:
sum((cp_df["baseline"] <= cp_df["cp"] * 1.05)) / len(cp_df.dropna())

In [None]:
from carbonplan_retro.analysis.project_overcrediting import get_slag_to_total_scalar

In [None]:
pd.DataFrame([get_slag_to_total_scalar(project) for project in projects]).describe()

In [None]:
sum([x["rp_1"]["ifm_3"] == x["baseline"]["ifm_3"] for x in projects]) / len(projects)

In [None]:
with fsspec.open(
    "az://carbonplan-scratch/results/common-practice-verification.json",
    account_key=os.environ["BLOB_ACCOUNT_KEY"],
    account_name="carbonplan",
    mode="r",
) as f:
    cp_verification = json.load(f)

In [None]:
cp_recalc = {x["opr_id"]: x["recalculated"] for x in cp_verification["projects"]}

In [None]:
with fsspec.open(
    "az://carbonplan-retro/results/reclassification-crediting-error.json",
    account_key=os.environ["BLOB_ACCOUNT_KEY"],
    account_name="carbonplan",
    mode="r",
) as f:

    crediting_error = json.load(f)

In [None]:
crediting_df = pd.DataFrame({k: v["delta_arbocs"] for k, v in crediting_error.items()})

In [None]:
median_crediting_error = {k: np.median(v["delta_arbocs"]) for k, v in crediting_error.items()}

In [None]:
tp = pd.concat(
    [
        pd.Series(median_crediting_error).rename("crediting_error"),
        pd.Series(cp_recalc).rename("cp"),
    ],
    axis=1,
)

In [None]:
tp.corr()

In [None]:
import seaborn as sns

sns.jointplot("cp", "crediting_error", data=tp)

In [None]:
nconds_df = pd.read_csv("~/lost+found/cleaned_olaf.csv")

In [None]:
nconds_df["cond_prop_group"]

In [None]:
from collections import Counter

In [None]:
largest_aa = {}
for project in retro_json:
    try:
        acreage_counter = Counter()
        for assessment_area in project["assessment_areas"]:
            if assessment_area["code"] != 999:
                acreage_counter[assessment_area["code"]] += assessment_area["acreage"]
        largest_aa[project["opr_id"]] = max(acreage_counter, key=acreage_counter.get)
    except:
        print(project["opr_id"])

        arboc_range = crediting_df.quantile(q=[0.25, 0.75]).T
arboc_range["norm_range"] = (arboc_range[0.75] - arboc_range[0.25]) / (
    arboc_range[0.25] + arboc_range[0.75]
)
arboc_range["abs_range"] = np.abs(arboc_range[0.75] - arboc_range[0.25])

In [None]:
import matplotlib.pyplot as plt

In [None]:
subset.corr()["nconds"]

In [None]:
arb_nconds = nconds_df.groupby("aa_code")["cond_prop_group"].sum()

nconds_df["cov"] = nconds_df["slag_co2e_std"] / nconds_df["slag_co2e_acre"]

arboc_range["nconds"] = arboc_range.reset_index().index.map(arb_nconds)

subset = arboc_range[arboc_range["nconds"] < 1500]

# plt.scatter(arboc_range['nconds'], arboc_range['norm_range'], )
plt.scatter(subset["nconds"], subset["norm_range"], c=".3", alpha=0.55, s=125)
plt.xlabel("ARB Reported\nFIA Condition Count")
plt.ylabel("Normalized\nOvercrediting IQR")

In [None]:
[x["arbocs"]["calculated"] - x["arbocs"]["issuance"] for x in projects if x["opr_id"] == "ACR257"]