In [None]:
from collections import defaultdict, Counter
import fsspec
import json
import os

import pandas as pd

from carbonplan_forest_offsets.data import cat
from carbonplan_forest_offsets.utils import aa_code_to_ss_code

In [None]:
def get_top_species_by_aa(project):
    spcd_per_ss = defaultdict(Counter)
    aa_to_ss_d = aa_code_to_ss_code()
    for assessment_area in project["assessment_areas"]:
        if assessment_area["code"] != 999:
            ss = aa_to_ss_d.get(assessment_area["code"])
            for species in assessment_area["species"]:
                spcd_per_ss[(ss, assessment_area["code"], assessment_area["site_class"])][
                    species["code"]
                ] += species["fraction"]
        else:
            all_species = {
                species["code"]: species["fraction"] for species in assessment_area["species"]
            }
            return {(ss, 999, "all"): all_species for ss in project["supersection_ids"]}

    return spcd_per_ss

In [None]:
with fsspec.open(
    "az://carbonplan-retro/classifications.json",
    account_key=os.environ["BLOB_ACCOUNT_KEY"],
    account_name="carbonplan",
    mode="r",
) as f:
    reclassification_weights = json.load(f)

aa_to_ss_d = aa_code_to_ss_code()

In [None]:
project_db = cat.project_db_json.read()
spcd_to_name = cat.fia_species_names.read()[0]
fia_fortyp_names = cat.fia_fortyp_to_common_name.read()[0]

In [None]:
fia_fortyp_names["104"] = "Eastern white pine / eastern hemlock"
fia_fortyp_names["266"] = "Engelmann spruce / subalpine fir"
fia_fortyp_names["124"] = "Red spruce / balsam fir"
fia_fortyp_names["371"] = "California mixed conifer"
fia_fortyp_names["401"] = "Eastern white pine / northern red oak / white ash"
fia_fortyp_names["403"] = "Longleaf pine / oak"
fia_fortyp_names["404"] = "Shortleaf pine / oak"
fia_fortyp_names["405"] = "Virginia pine / southern red oak"
fia_fortyp_names["516"] = "Cherry / white ash / yellow-poplar"

In [None]:
records = []
for project in project_db:
    if project["opr_id"] != "CAR1":
        try:
            top_species = get_top_species_by_aa(project)
            for k, species_fraction in top_species.items():
                ss_id, aa_id, _ = k
                classification_key = f"({ss_id}, {aa_id})"
                proj_class = reclassification_weights[project["opr_id"]][classification_key]
                classifications_arr = [
                    (fia_fortyp_names.get(str(int(float(k))), f"error-{k}"), v)
                    for k, v in proj_class.items()
                    if v > 0.1
                ]
                species_arr = [
                    {"name": spcd_to_name.get(str(k), f"none-{k}"), "fraction": v}
                    for k, v in species_fraction.items()
                    if v > 0.10
                ]
                record = {
                    "id": project["opr_id"],
                    "aa_id": aa_id,
                    "ss_id": ss_id,
                    "classification": classifications_arr,
                    "species": species_arr,
                }
                records.append(record)
        except:
            # raise
            print(project["opr_id"])
            # display(project)

In [None]:
df = pd.DataFrame()
df["Project"] = [d["id"] for d in records]
df["Supersection"] = [d["ss_id"] for d in records]
df["Assessment Area"] = [d["aa_id"] for d in records]
df["Species"] = [
    "\n".join(
        [
            str(s["name"]).capitalize() + " : " + "%.1f" % (s["fraction"] * 100) + "%"
            for s in d["species"]
        ]
    )
    for d in records
]
df["Classification"] = [
    "\n".join(
        [str(s[0]).capitalize() + " : " + "%.1f" % (s[1] * 100) + "%" for s in d["classification"]]
    )
    for d in records
]

In [None]:
with fsspec.open(
    "az://carbonplan-retro/results/reclassification-labels.json",
    account_key=os.environ["BLOB_ACCOUNT_KEY"],
    account_name="carbonplan",
    mode="w",
) as f:
    json.dump(records, f)