In [None]:
import os
import fsspec
import json
import pandas as pd
import numpy as np

## Load the data


We exclude two projects from all further analysis due to insufficient underlying FIA data


In [None]:
with fsspec.open(
    "https://carbonplan.blob.core.windows.net/carbonplan-scratch/overcredited_arbocs.json", "r"
) as f:
    data = json.load(f)
del data["ACR360"]
del data["CAR1102"]

with fsspec.open(
    "https://carbonplan.blob.core.windows.net/carbonplan-retro/projects/retro-db-light-v1.0.json",
    "r",
) as f:
    db = json.load(f)
db = list(filter(lambda x: ~(x["id"] == "ACR360") & ~(x["id"] == "CAR1102"), db))

In [None]:
print(f"Number of projects: {len(db)}")

In [None]:
print(
    f"Total ARBOCs: {np.sum([[x for x in db if x['id'] == key][0]['arbocs']['issuance'] for key in data.keys()])}"
)

## Calculate over-crediting


We write a simple function to help with formatting


In [None]:
def format_si(num, precision=1, suffixes=["", "K", "M", "G", "T", "P"], hide_suffix=False):
    m = sum([abs(num / 1000.0 ** x) >= 1 for x in range(1, len(suffixes))])
    if hide_suffix:
        return f"{num/1000.0**m:.{precision}f}"
    else:
        return f"{num/1000.0**m:.{precision}f}{suffixes[m]}"

In [None]:
def format_pt(num, precision=1):
    return f"{num * 100:.{precision}f}%"

We write a function to calculate over-crediting (as a percentage or in ARBOCs), optionally using a
condition by which to filter projects


In [None]:
def get_overcrediting(condition=None, percentage=True, display=False):
    if condition is not None:
        keys = list(map(lambda x: x["id"], filter(condition, db)))
    else:
        keys = list(map(lambda x: x["id"], db))
    keys = list(filter(lambda x: x in data.keys(), keys))
    total = []
    for i in range(1000):
        total.append(np.nansum([data[key]["delta_arbocs"][i] for key in keys]))
        total_percentage = np.percentile(total, [5, 50, 95]) / np.sum(
            [[x for x in db if x["id"] == key][0]["arbocs"]["issuance"] for key in keys]
        )
        total_arbocs = np.percentile(total, [5, 50, 95])
    if display:
        print(
            f"Over-crediting in ARBOCs: \
            {format_si(total_arbocs[1])}, ({format_si(total_arbocs[0])}, {format_si(total_arbocs[2])})"
        )
        print(
            f"Over-crediting as %: \
            {format_pt(total_percentage[1])} ({format_pt(total_percentage[0])}, {format_pt(total_percentage[2])})"
        )
        print(
            f"Projects included: \
            {len(keys)}"
        )
    else:
        return {"count": len(keys), "percent": total_percentage, "arbocs": total_arbocs}

## Examples of over-crediting


Now we compute over-crediting for some example conditions


In [None]:
condition = None
get_overcrediting(condition=condition, percentage=True, display=True)

In [None]:
condition = lambda x: "New Forests" in x["developers"] or "New Forests" in x["owners"]
get_overcrediting(condition=condition, percentage=True, display=True)

In [None]:
condition = lambda x: "Finite Carbon" in x["developers"] or "Finite Carbon" in x["owners"]
get_overcrediting(condition=condition, percentage=True, display=True)

In [None]:
condition = lambda x: x["id"] == "ACR189"
get_overcrediting(condition=condition, percentage=True, display=True)

## Table of over-crediting by developer


First we get unique developers


In [None]:
developers = [x["developers"] for x in db if x["id"] in data.keys()]
developers = [item for sublist in developers for item in sublist]
developers = list(set(developers))

In [None]:
df = pd.DataFrame()
df["Developer"] = developers
results = [
    get_overcrediting(condition=lambda x: d in x["developers"] or d in x["owners"], percentage=True)
    for d in developers
]
df["Count"] = [d["count"] for d in results]
df["Over-crediting as %"] = [f'{format_pt(d["percent"][1])}' for d in results]
df["Range for %"] = [
    f'({format_pt(d["percent"][0])}, {format_pt(d["percent"][2])})' for d in results
]
df["Over-crediting as ARBOCs"] = [f'{format_si(d["arbocs"][1])}' for d in results]
df["Range for ARBOCs"] = [
    f'({format_si(d["arbocs"][0])}, {format_si(d["arbocs"][2])})' for d in results
]

In [None]:
df[df["Count"] > 5]