In [None]:
import os
import json
import fsspec

import pandas as pd

with fsspec.open(
    "az://carbonplan-scratch/overcredited_arbocs.json",
    account_key=os.environ["BLOB_ACCOUNT_KEY"],
    account_name="carbonplan",
    mode="r",
) as f:
    d = json.load(f)

import seaborn as sns
from carbonplan import styles
import matplotlib.pyplot as plt

styles.mpl.set_theme(style="carbonplan_light", font_scale=1.5)
colors = styles.colors.light


import numpy as np

from carbonplan_retro.data import cat

retro_json = cat.retro_db_light_json.read()

In [None]:
df = pd.DataFrame({k: v["alt_slag"] for k, v in d.items() if k != "ACR360"})

In [None]:
d["ACR282"].keys()

In [None]:
pd.Series(d["ACR189"]["delta_arbocs"]).describe(percentiles=[0.05, 0.95]).round()

In [None]:
pd.Series(d["ACR282"]["delta_arbocs"]).hist()

In [None]:
df.sum(axis=1).describe(percentiles=[0.05, 0.95]).round(1)

In [None]:
(df.sum(axis=1) / 1_000_000).hist(color=".3", alpha=0.55, bins=25)
plt.grid(False)
plt.xlabel("Overcrediting (Million ARBOCs)")
plt.ylabel("Draw Count")
plt.savefig("/home/jovyan/pub-figs/distribution_overcrediting.pdf", dpi=300, bbox_inches="tight")

In [None]:
from collections import Counter

In [None]:
analyzed = df.columns.tolist()

In [None]:
len([p["opr_id"] for p in retro_json if p["opr_id"] not in analyzed])

In [None]:
largest_aa = {}
for project in retro_json:
    try:
        acreage_counter = Counter()
        for assessment_area in project["assessment_areas"]:
            if assessment_area["code"] != 999:
                acreage_counter[assessment_area["code"]] += assessment_area["acreage"]
        largest_aa[project["opr_id"]] = max(acreage_counter, key=acreage_counter.get)
    except:
        print(project["opr_id"])

In [None]:
arb_stats = pd.read_csv("/home/jovyan/lost+found/cleaned_olaf.csv")

In [None]:
arboc_range = df.quantile(q=[0.25, 0.75]).T

In [None]:
arboc_range["norm_range"] = (arboc_range[0.75] - arboc_range[0.25]) / (
    arboc_range[0.25] + arboc_range[0.75]
)

In [None]:
arb_nconds = arb_stats.groupby("aa_code")["cond_prop_group"].sum()

In [None]:
arb_stats["cov"] = arb_stats["slag_co2e_std"] / arb_stats["slag_co2e_acre"]

In [None]:
arboc_range["nconds"] = arboc_range.reset_index().index.map(arb_nconds)

In [None]:
subset = arboc_range[arboc_range["nconds"] < 1500]

In [None]:
# plt.scatter(arboc_range['nconds'], arboc_range['norm_range'], )
plt.scatter(subset["nconds"], subset["norm_range"], c=".3", alpha=0.55, s=125)
plt.xlabel("ARB Reported\nFIA Condition Count")
plt.ylabel("Normalized\nOvercrediting IQR")

In [None]:
plt.scatter(
    arboc_range["nconds"],
    arboc_range["norm_range"],
)
df.T.loc[nf_opr_ids].T.median(axis=0).round(0)

In [None]:
nf_opr_ids = [project["opr_id"] for project in nf_proj]

In [None]:
issued_arbocs = pd.Series(
    {project["opr_id"]: project["arbocs"]["issuance"] for project in retro_json}
).rename("issuance")

In [None]:
df = pd.concat([df.quantile(0.5, axis=0).rename("overcredit"), issued_arbocs], axis=1).dropna()