In [None]:
import numpy as np
import pandas as pd

from carbonplan_retro.load.issuance import load_issuance_table, ifm_opr_ids
from carbonplan_retro.load.project_db import load_project_db

In [None]:
from carbonplan_retro.data import cat

## Issusance Summary Statistics

This notebook generates summary statistics from the 2020-09-09 ARB Issuance Table. The numbers
generated here provide context to over-crediting calculations.


In [None]:
df = load_issuance_table(forest_only=False)


def subset_stats(criteria):
    nunique_opr_ids = df[criteria]["opr_id"].nunique()
    total_arbocs = df[criteria]["allocation"].sum()
    return (nunique_opr_ids, total_arbocs)


retro_json = cat.retro_db_light_json.read()
upfront_opr_ids = [
    project["opr_id"]
    for project in retro_json
    if project["carbon"]["initial_carbon_stock"]["value"]
    > project["carbon"]["common_practice"]["value"]
]
compliance_opr_ids = [project["opr_id"] for project in retro_json]

subsets = {
    "all": np.tile(True, len(df)),
    "all_forest": df["project_type"] == "forest",
    "compliance_ifm": (df["opr_id"].isin(ifm_opr_ids)) & (df["Early Action/ Compliance"] == "COP"),
    "non_graduated_compliance_ifms": (df["opr_id"].isin(compliance_opr_ids))
    & (df["Early Action/ Compliance"] == "COP"),
    "upfront_ifm": (df["opr_id"].isin(upfront_opr_ids)) & (df["arb_rp_id"].isin(["A"])),
}
{k: subset_stats(v) for k, v in subsets.items()}

In [None]:
total_issued_arbocs = df["allocation"].sum()

In [None]:
ifms = df[df["opr_id"].isin(ifm_opr_ids)]

In [None]:
df[df["project_type"] == "forest"].opr_id.nunique()

In [None]:
len(ifm_opr_ids)

Early Action ARBOCs are actually a fairly sizable fraction of the overall program -- weighing in at
12.5 percent. I acutally find this a little surprising... EA projects also have similar skew toward
forests.


In [None]:
total_ea_arbocs = df[df["is_ea"] == 1].allocation.sum()

fraction_arboc_ea = total_ea_arbocs / total_issued_arbocs
display(f"{fraction_arboc_ea* 100:0.2f} percent ARBOCs in the issuance table come from EA period")

forest_ea_arbocs = df[(df["is_ea"] == 1) & (df["project_type"] == "forest")].allocation.sum()
fraction_arboc_forest_ea = forest_ea_arbocs / total_issued_arbocs

display(
    f"And EA forest projects represent {fraction_arboc_forest_ea*100:.2f} percent of all issued ARBOCs"
)

forest_ea_as_frac_of_ea = forest_ea_arbocs / total_ea_arbocs
display(
    f"This means EA forests represent a little over half of all EA ARBOCs ({forest_ea_as_frac_of_ea * 100:.2f} percent to be exact)"
)

## Graduated projects


In [None]:
project_db = load_project_db(
    "/home/jovyan/lost+found/Forest-Offset-Projects-v0.3.json", use_cache=True, save=False
)

graduated_projects = project_db[project_db["project"]["early_action"].str.startswith("CAR")][
    "project"
]["early_action"].to_dict()

graduated_projects["CAR1109"] = "CAR749"  # One AC project converted over...ha
graduated_projects

In [None]:
non_grads = (
    df[
        (df["project_type"] == "forest")
        & (df["is_ea"])
        & (~df["opr_id"].isin(graduated_projects.values()))
    ]["opr_id"]
    .unique()
    .tolist()
)

In [None]:
graduated_list = list(graduated_projects.values()) + list(graduated_projects.keys())

In [None]:
df["project_category"] = np.nan

In [None]:
non_graduated_counts = {"ifm": 3, "ac": 6}
graduated_counts = {"ifm": 19, "ac": 1}
compliance_counts = {"ifm": 74, "ac": 5, "refor": 2}

In [None]:
sum(compliance_counts.values()) + sum(graduated_counts.values())

In [None]:
df.loc[df["project_type"] == "forest", "project_category"] = "compliance"
df.loc[
    (df["project_type"] == "forest") & (df["opr_id"].isin(non_grads)),
    "project_category",
] = "non-graduated"
df.loc[
    (df["project_type"] == "forest") & (df["opr_id"].isin(graduated_list)),
    "project_category",
] = "graduated"

In [None]:
df["guid"] = df["opr_id"]
df.loc[df["opr_id"].isin(graduated_projects.keys()), "guid"] = df["opr_id"].map(graduated_projects)

In [None]:
forest_share_total = (
    df.groupby(df["project_type"] == "forest")["allocation"].sum() / df["allocation"].sum()
).rename("forest_share_total")

## Total forests


In [None]:
forests = df[df["project_type"] == "forest"]

In [None]:
# Note from JH: the commented out line below was here before but there aren't any variables in the notebook
# called `forests`. I've switched this to `df` for now but Grayson should confirm.
# display(f"Across EA and COP there are {forests['guid'].nunique()} unique forest projects")
display(f"Across EA and COP there are {forests['guid'].nunique()} unique forest projects")

We can break those 110 projects down into three categories of projects: compliance, graduate, and
non-graduated projects


In [None]:
def danny_table_stats(subset, total_arbocs):
    return {
        "n_proj": subset["guid"].nunique(),
        "allocation": subset["allocation"].sum(),
        "frac_total_arbocs": round(subset["allocation"].sum() / total_arbocs, 3),
    }

In [None]:
initial_forests = df[
    (df["project_type"] == "forest") & (df["arb_rp_id"].isin(["A", "("]))
]  # '(' catches the two reforestation projects

In [None]:
from carbonplan_retro.data import cat

In [None]:
retro_json = cat.retro_db_light_json.read()
upfront_opr_ids = [
    project["opr_id"]
    for project in retro_json
    if project["carbon"]["initial_carbon_stock"]["value"]
    > project["carbon"]["common_practice"]["value"]
]

In [None]:
ics_gt_cp_lst = project_db[
    project_db["baseline"]["initial_carbon_stock"] > project_db["baseline"]["common_practice"]
].index.tolist()

ics_gt_cp_lst = [x for x in ics_gt_cp_lst if x not in graduated_projects.keys()]

len(ics_gt_cp_lst)

In [None]:
subsets = {
    "all_forest": initial_forests,
    "compliance_ifm": initial_forests[
        (initial_forests["project_category"] == "compliance")
        & (initial_forests["opr_id"].isin(ifm_opr_ids))
    ],
    "ifm_ics_gt_cp": initial_forests[initial_forests["opr_id"].isin(ics_gt_cp_lst)],
}

In [None]:
initial_forest_table = pd.DataFrame(
    {k: danny_table_stats(v, total_issued_arbocs) for k, v in subsets.items()}
).T

In [None]:
initial_forest_table.rename(
    columns={
        "n_proj": "Project Count",
        "allocation": "First RP ARBOCs",
        "frac_total_arbocs": "Fraction of Total Issued ARBOCs",
    }
)

In [None]:
df.groupby("project_category")["guid"].nunique()

In [None]:
display(f"Compliance Forest projects represent {81/110. * 100:.2f} percent of all forest projects")

In [None]:
display(f"{compliance_counts['ifm']/93.}")

In [None]:
display(f"There are {len(graduated_projects)} graduated projects")

In [None]:
graduated_arbocs = df[df["opr_id"].isin(graduated_projects)].allocation.sum()
fraction_arbocs_from_graduated = graduated_arbocs / total_issued_arbocs

display(
    f"Graudated projects represent just {fraction_arbocs_from_graduated *100:0.2f} percent of all issued arbocs"
)

So if graduated projects are 2.92 percent of all ARBOCs and EA forests are 6.88 percent of all
issued ARBOCs, EA+graduated projects represent less than 10 percent of all ARBOCs issued to date.


In [None]:
total_ifm_issued_arbocs = ifms["allocation"].sum()

In [None]:
initial_ifm_arbocs = ifms[ifms["arb_rp_id"] == "A"]["allocation"].sum()

In [None]:
display(
    f"IFMs represent {total_ifm_issued_arbocs/total_issued_arbocs * 100:.2f} percent of all compliance (EA excluded) ARBOCS"
)

In [None]:
initial_frac_total_ifm = initial_ifm_arbocs / total_ifm_issued_arbocs
initial_frac_total_issued = initial_ifm_arbocs / total_issued_arbocs

In [None]:
display(
    f"Initial IFM ARBOCs represent {initial_frac_total_ifm * 100:.2f} percent of all ARBOCs issued to compliance IFM projects"
)

In [None]:
display(
    f"Initial IFMs ARBOCs represent {initial_frac_total_issued * 100:.2f} percent of all issued ARBOCS"
)

## Some other stuff


In [None]:
retro_db = load_project_db("Forest-Offset-Projects-v0.3", use_cache=False, save=False)

graduated_projects = [
    k
    for k, v in retro_db["project"].set_index("opr_id")["early_action"].to_dict().items()
    if v.startswith("CAR")
]

In [None]:
def initial_delta(data):
    """For projects with more than one reporting period, how much bigger was
    initial than subsequent?
    """
    if "B" in data["arb_rp_id"].unique().tolist():

        initial = data[data["arb_rp_id"] == "A"]["allocation"].sum()
        # have to aggregate by arb_rp_id first because issuances can be rolling (rare -- only know of one case)
        avg_subsequent = (
            data[data["arb_rp_id"] != "A"].groupby("arb_rp_id")["allocation"].sum().mean()
        )
        return initial / avg_subsequent
    return None

In [None]:
ifms[~ifms.opr_id.isin(graduated_projects)].groupby(["opr_id"]).apply(
    initial_delta
).sort_values().dropna().describe()