In [1]:
from collections import defaultdict
import json
import os

from dask.distributed import Client
import fsspec
import numpy as np
import pandas as pd
from shapely.ops import cascaded_union
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import RadiusNeighborsClassifier

## And a bunch of carbonplan dependencies
from carbonplan_data import cat as core_cat

from carbonplan_retro.data import cat
from carbonplan_retro.analysis.assign_project_fldtypcd import load_classification_data
from carbonplan_retro.load.geometry import (
    get_overlapping_states,
    load_supersections,
)
from carbonplan_retro.load.project_db import load_project_db

In [2]:
def get_aoi(ss_ids):
    da = core_cat.nlcd.raster(region="conus").to_dask()
    crs = da.attrs["crs"]

    supersections = load_supersections().to_crs(crs)

    subset_supersection = supersections[supersections["ss_id"].isin(ss_ids)].copy()
    subset_supersection.loc[:, "dissolve_all"] = 1

    aoi = subset_supersection.dissolve(by="dissolve_all").buffer(150_000).to_crs("epsg:4326").item()
    return aoi

In [3]:
def species_array_to_d(species_array):
    return {str(species["code"]): round(species["fraction"], 4) for species in species_array}


def prepare_regional_classifier(ss_ids, prefit_radius=None):
    """returns trained classifier and data vectorizer to apply to multiple opr_ids"""
    
    if (len(ss_ids) == 1) & (ss_ids[0] > 200):
        data = load_classification_data(["ak"])

    else:
        da = core_cat.nlcd.raster(region="conus").to_dask()
        crs = da.attrs["crs"]

        supersections = load_supersections().to_crs(crs)

        subset_supersection = supersections[supersections["ss_id"].isin(ss_ids)].copy()
        subset_supersection.loc[:, "dissolve_all"] = 1

        aoi = (
            subset_supersection.dissolve(by="dissolve_all")
            .buffer(150_000)
            .to_crs("epsg:4326")
            .item()
        )

        postal_codes = get_overlapping_states(aoi)
        print(f"preparing to load: {[x for x in postal_codes]}")
        data = load_classification_data(postal_codes, aoi=aoi)

    print(f"fitting classifier ")
    if prefit_radius:
        print(f"using cached radius: {prefit_radius}")
        clf = RadiusNeighborsClassifier(
            weights="distance", algorithm="brute", outlier_label=-999, radius=prefit_radius
        )
    else:
        # JJH: check algorthim
        base_clf = RadiusNeighborsClassifier(
            weights="distance", algorithm="brute", outlier_label=-999
        )
        param_grid = [
            {"radius": np.arange(0.15, 0.651, 0.025)}
        ]  # initial testing never yielded a case where we went above 0.5

        # we can parallalize this further
        # dask_ml.model_selection.GridSearchCV
        clf = GridSearchCV(
            base_clf, param_grid, n_jobs=int(os.cpu_count() / 2), cv=5, refit=True, verbose=10
        )

    clf.fit(data["features"], data["targets"])
    return clf, data["dictvectorizer"]

In [4]:
client = Client()
client

0,1
Client  Scheduler: tcp://127.0.0.1:36525  Dashboard: /user/jhamman/proxy/8787/status,Cluster  Workers: 4  Cores: 8  Memory: 53.69 GB


In [5]:
project_db = load_project_db('Forest-Offset-Projects-v0.3', save=False)
projects = project_db[~project_db["project"]["early_action"].str.startswith("CAR")]
projects = projects[~project_db["project"]["species"].isnull()]
projects = projects[~projects["project"]["species"].apply(lambda x: "all" in x)]

loading load Forest-Offset-Projects-v0.3 from /home/jovyan/retro/data
failed to load from disk -- grabbing Forest-Offset-Projects-v0.3 from google


  This is separate from the ipykernel package so we can avoid doing imports until


In [6]:
retro_json = cat.retro_db_light_json.read()

In [7]:
clf_cache = {}  # separate cell so you can re-run next cell if anything bonks

In [11]:
project["project"]["supersection_ids"].item()

[55, 79]

In [17]:
# classifications = defaultdict(dict)

USE_CACHE = False

if USE_CACHE:
    with fsspec.open(
        "az://carbonplan-scratch/radius_neighbor_params.json",
        account_name="carbonplan",
        mode="r",
        account_key=os.environ["BLOB_ACCOUNT_KEY"],
    ) as f:
        radius_params = json.load(f)
else:
    radius_params = {}
supersections = set(tuple(project["project"]["supersection_ids"].item()) for _, project in projects.iterrows())
for ssid in supersections:
    print(ssid)
#     if opr_id in ["CAR1094", "CAR1032"]:
#         print(f"skipping {opr_id} -- discuss w group")
#         continue

#     print(opr_id)
#     cache_key = project["project"]["supersection_ids"].astype(str).item()
#     try:
#         clf, data_encoder = clf_cache[cache_key]
#     except:
    clf, data_encoder = prepare_regional_classifier(ssid)  #, radius_params.get(cache_key)
#     )
#     clf_cache[project["project"]["supersection_ids"].astype(str).item()] = (clf, data_encoder)

#     for aa_id, species_arr in project_db["project"]["species"][opr_id].items():

#         feat_dict = species_array_to_d(species_arr)
#         feats = data_encoder.transform(feat_dict)
#         classification = pd.Series(clf.predict_proba(feats).flatten(), index=clf.classes_)
#         classifications[opr_id][aa_id] = classification[classification > 0].sort_values().to_dict()

(32,)
preparing to load: ['ar', 'la', 'tx', 'al', 'fl', 'ga', 'ms']


ValueError: cannot reindex from a duplicate axis

distributed.client - ERROR - Failed to reconnect to scheduler after 600.00 seconds, closing client
distributed.utils - ERROR - Timed out trying to connect to 'tcp://127.0.0.1:36525' after 600 s: Timed out trying to connect to 'tcp://127.0.0.1:36525' after 600 s: in <distributed.comm.tcp.TCPConnector object at 0x7f017fe6e390>: ConnectionRefusedError: [Errno 111] Connection refused
Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.7/site-packages/distributed/comm/tcp.py", line 186, in read
    n_frames = await stream.read_bytes(8)
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.7/site-packages/distributed/client.py", line 1223, in _handle_report
    msgs = await self.scheduler_comm.comm.read()
  File "/srv/conda/envs/notebook/lib/python3.7/site-packages/distributed/comm/tcp.py", line 201, in read
    conv

In [None]:
%debug

> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/indexes/base.py[0m(3476)[0;36m_can_reindex[0;34m()[0m
[0;32m   3474 [0;31m        [0;31m# trying to reindex on an axis with duplicates[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3475 [0;31m        [0;32mif[0m [0;32mnot[0m [0mself[0m[0;34m.[0m[0m_index_as_unique[0m [0;32mand[0m [0mlen[0m[0;34m([0m[0mindexer[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 3476 [0;31m            [0;32mraise[0m [0mValueError[0m[0;34m([0m[0;34m"cannot reindex from a duplicate axis"[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3477 [0;31m[0;34m[0m[0m
[0m[0;32m   3478 [0;31m    [0;32mdef[0m [0mreindex[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mtarget[0m[0;34m,[0m [0mmethod[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mlevel[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mlimit[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mtolerance[0m[0;34m=[0

ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/internals/managers.py[0m(1301)[0;36mreindex_indexer[0;34m()[0m
[0;32m   1299 [0;31m        [0;31m# some axes don't allow reindexing with dups[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   1300 [0;31m        [0;32mif[0m [0;32mnot[0m [0mallow_dups[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 1301 [0;31m            [0mself[0m[0;34m.[0m[0maxes[0m[0;34m[[0m[0maxis[0m[0;34m][0m[0;34m.[0m[0m_can_reindex[0m[0;34m([0m[0mindexer[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   1302 [0;31m[0;34m[0m[0m
[0m[0;32m   1303 [0;31m        [0;32mif[0m [0maxis[0m [0;34m>=[0m [0mself[0m[0;34m.[0m[0mndim[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/generic.py[0m(4880)[0;36m_reindex_with_indexers[0;34m()[0m
[0;32m   4878 [0;31m                [0mfill_value[0m[0;34m=[0m[0mfill_value[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   4879 [0;31m                [0mallow_dups[0m[0;34m=[0m[0mallow_dups[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 4880 [0;31m                [0mcopy[0m[0;34m=[0m[0mcopy[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   4881 [0;31m            )
[0m[0;32m   4882 [0;31m            [0;31m# If we've made a copy once, no need to make another one[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/generic.py[0m(4833)[0;36m_reindex_axes[0;34m()[0m
[0;32m   4831 [0;31m                [0mfill_value[0m[0;34m=[0m[0mfill_value[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   4832 [0;31m                [0mcopy[0m[0;34m=[0m[0mcopy[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 4833 [0;31m                [0mallow_dups[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   4834 [0;31m            )
[0m[0;32m   4835 [0;31m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/generic.py[0m(4809)[0;36mreindex[0;34m()[0m
[0;32m   4807 [0;31m        [0;31m# perform the reindex on the axes[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   4808 [0;31m        return self._reindex_axes(
[0m[0;32m-> 4809 [0;31m            [0maxes[0m[0;34m,[0m [0mlevel[0m[0;34m,[0m [0mlimit[0m[0;34m,[0m [0mtolerance[0m[0;34m,[0m [0mmethod[0m[0;34m,[0m [0mfill_value[0m[0;34m,[0m [0mcopy[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   4810 [0;31m        ).__finalize__(self, method="reindex")
[0m[0;32m   4811 [0;31m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/series.py[0m(4315)[0;36mreindex[0;34m()[0m
[0;32m   4313 [0;31m    )
[0m[0;32m   4314 [0;31m    [0;32mdef[0m [0mreindex[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mindex[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 4315 [0;31m        [0;32mreturn[0m [0msuper[0m[0;34m([0m[0;34m)[0m[0;34m.[0m[0mreindex[0m[0;34m([0m[0mindex[0m[0;34m=[0m[0mindex[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   4316 [0;31m[0;34m[0m[0m
[0m[0;32m   4317 [0;31m    def drop(
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/frame.py[0m(3859)[0;36mreindexer[0;34m()[0m
[0;32m   3857 [0;31m                [0;31m# GH 4107[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3858 [0;31m                [0;32mtry[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 3859 [0;31m                    [0mvalue[0m [0;34m=[0m [0mvalue[0m[0;34m.[0m[0mreindex[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mindex[0m[0;34m)[0m[0;34m.[0m[0m_values[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3860 [0;31m                [0;32mexcept[0m [0mValueError[0m [0;32mas[0m [0merr[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3861 [0;31m                    [0;31m# raised in MultiIndex.from_tuples, see test_insert_error_msmgs[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/frame.py[0m(3864)[0;36mreindexer[0;34m()[0m
[0;32m   3862 [0;31m                    [0;32mif[0m [0;32mnot[0m [0mvalue[0m[0;34m.[0m[0mindex[0m[0;34m.[0m[0mis_unique[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3863 [0;31m                        [0;31m# duplicate axis[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 3864 [0;31m                        [0;32mraise[0m [0merr[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3865 [0;31m[0;34m[0m[0m
[0m[0;32m   3866 [0;31m                    [0;31m# other[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/frame.py[0m(3873)[0;36m_sanitize_column[0;34m()[0m
[0;32m   3871 [0;31m[0;34m[0m[0m
[0m[0;32m   3872 [0;31m        [0;32mif[0m [0misinstance[0m[0;34m([0m[0mvalue[0m[0;34m,[0m [0mSeries[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 3873 [0;31m            [0mvalue[0m [0;34m=[0m [0mreindexer[0m[0;34m([0m[0mvalue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3874 [0;31m[0;34m[0m[0m
[0m[0;32m   3875 [0;31m        [0;32melif[0m [0misinstance[0m[0;34m([0m[0mvalue[0m[0;34m,[0m [0mDataFrame[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/frame.py[0m(3239)[0;36m_set_item[0;34m()[0m
[0;32m   3237 [0;31m        """
[0m[0;32m   3238 [0;31m        [0mself[0m[0;34m.[0m[0m_ensure_valid_index[0m[0;34m([0m[0mvalue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 3239 [0;31m        [0mvalue[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0m_sanitize_column[0m[0;34m([0m[0mkey[0m[0;34m,[0m [0mvalue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3240 [0;31m        [0mNDFrame[0m[0;34m.[0m[0m_set_item[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mkey[0m[0;34m,[0m [0mvalue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3241 [0;31m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/pandas/core/frame.py[0m(3163)[0;36m__setitem__[0;34m()[0m
[0;32m   3161 [0;31m        [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3162 [0;31m            [0;31m# set column[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 3163 [0;31m            [0mself[0m[0;34m.[0m[0m_set_item[0m[0;34m([0m[0mkey[0m[0;34m,[0m [0mvalue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   3164 [0;31m[0;34m[0m[0m
[0m[0;32m   3165 [0;31m    [0;32mdef[0m [0m_setitem_slice[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mkey[0m[0;34m:[0m [0mslice[0m[0;34m,[0m [0mvalue[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/geopandas/geodataframe.py[0m(853)[0;36m__setitem__[0;34m()[0m
[0;32m    851 [0;31m            [0;32mexcept[0m [0mTypeError[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 853 [0;31m        [0msuper[0m[0;34m([0m[0mGeoDataFrame[0m[0;34m,[0m [0mself[0m[0;34m)[0m[0;34m.[0m[0m__setitem__[0m[0;34m([0m[0mkey[0m[0;34m,[0m [0mvalue[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    854 [0;31m[0;34m[0m[0m
[0m[0;32m    855 [0;31m    [0;31m#[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/srv/conda/envs/notebook/lib/python3.7/site-packages/geopandas/tools/clip.py[0m(242)[0;36mclip[0;34m()[0m
[0;32m    240 [0;31m    [0;31m# Preserve the original order of the input[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    241 [0;31m    [0;32mif[0m [0misinstance[0m[0;34m([0m[0mconcat[0m[0;34m,[0m [0mGeoDataFrame[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 242 [0;31m        [0mconcat[0m[0;34m[[0m[0;34m"_order"[0m[0;34m][0m [0;34m=[0m [0morder[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    243 [0;31m        [0;32mreturn[0m [0mconcat[0m[0;34m.[0m[0msort_values[0m[0;34m([0m[0mby[0m[0;34m=[0m[0;34m"_order"[0m[0;34m)[0m[0;34m.[0m[0mdrop[0m[0;34m([0m[0mcolumns[0m[0;34m=[0m[0;34m"_order"[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    244 [0;31m    [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/home/jovyan/retro/carbonplan_retro/analysis/assign_project_fldtypcd.py[0m(103)[0;36mload_classification_data[0;34m()[0m
[0;32m    101 [0;31m        [0mdata[0m[0;34m[[0m[0;34m"fraction_species"[0m[0;34m][0m[0;34m.[0m[0mvalues[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    102 [0;31m    )  # .toarray() explodes the sparse array returned from DictVectorizer() out into a dense array
[0m[0;32m--> 103 [0;31m    [0my[0m [0;34m=[0m [0mdata[0m[0;34m[[0m[0mtarget_var[0m[0;34m][0m[0;34m.[0m[0mvalues[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    104 [0;31m    [0;31m# idx = ~np.isnan(X).any(axis=1)[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    105 [0;31m[0;34m[0m[0m
[0m


ipdb>  target_var


'FORTYPCD'


ipdb>  data


*** NameError: name 'data' is not defined


ipdb>  data["fraction_species"]


*** NameError: name 'data' is not defined


ipdb>  X


*** NameError: name 'X' is not defined


ipdb>  target_var


'FORTYPCD'


ipdb>  y


*** NameError: name 'y' is not defined


ipdb>  postal_codes


['ar', 'la', 'tx', 'al', 'fl', 'ga', 'ms']


ipdb>  conds


                     CN           PLT_CN  CONDID  OWNCD  FORTYPCD  FLDTYPCD  \
index                                                                         
107237  259578802010854  249672510010854       2   46.0     503.0     503.0   
107238  259577851010854  249672511010854       1   46.0     406.0     161.0   
107239  259577852010854  249672511010854       2   46.0     161.0     161.0   
107240  259574166010854  249672512010854       1   46.0     406.0     706.0   
107241  259574167010854  249672512010854       2   46.0     406.0     161.0   
...                 ...              ...     ...    ...       ...       ...   
95942    53219663020004  259119059010854       1   11.0     141.0     141.0   
95943    53219721020004  259119060010854       1   11.0     161.0     406.0   
95944    53219814020004  259119061010854       1   11.0     161.0     161.0   
95945    53219915020004  259119062010854       1   11.0     161.0     161.0   
95946    53220006020004  259119063010854       1   1

ipdb>  tree_features


PLT_CN           CONDID
17479078010478   1         {'491': 0.0214, '611': 0.2605, '721': 0.261, '...
17479844010478   1         {'461': 0.1769, '544': 0.2474, '641': 0.0756, ...
17479968010478   2             {'131': 0.8397, '611': 0.1298, '831': 0.0305}
17480533010478   1         {'391': 0.0932, '611': 0.3546, '68': 0.115, '8...
17481234010478   1             {'110': 0.2316, '131': 0.7588, '824': 0.0096}
                                                 ...                        
549073062126144  1         {'316': 0.0634, '401': 0.0195, '407': 0.1066, ...
549073097126144  1         {'221': 0.4001, '544': 0.0554, '551': 0.0276, ...
549073150126144  1         {'316': 0.034, '408': 0.1531, '409': 0.0916, '...
549073158126144  1         {'131': 0.1723, '521': 0.0139, '552': 0.0268, ...
558391170126144  1         {'131': 0.0831, '316': 0.1004, '403': 0.1726, ...
Name: fraction_species, Length: 202146, dtype: object


ipdb>  tree_features.index


MultiIndex([( 17479078010478, 1),
            ( 17479844010478, 1),
            ( 17479968010478, 2),
            ( 17480533010478, 1),
            ( 17481234010478, 1),
            ( 17481349010478, 1),
            ( 17481713010478, 2),
            ( 17481793010478, 1),
            ( 17481964010478, 1),
            ( 17482868010478, 1),
            ...
            (549073022126144, 1),
            (549073023126144, 1),
            (549073036126144, 1),
            (549073037126144, 1),
            (549073040126144, 1),
            (549073062126144, 1),
            (549073097126144, 1),
            (549073150126144, 1),
            (549073158126144, 1),
            (558391170126144, 1)],
           names=['PLT_CN', 'CONDID'], length=202146)


## Store some outputs

Store the 5-fold CV radius parameter as well as the classifications. Can't imagine we'd ever need to
use the radii, but they're sort of expensive to compute so store for good measure.


In [None]:
fit_radii = {k: v[0].best_params_["radius"] for k, v in clf_cache.items()}

if not USE_CACHE:
    with fsspec.open(
        "az://carbonplan-scratch/radius_neighbor_params.json",
        account_name="carbonplan",
        mode="w",
        account_key=os.environ["BLOB_ACCOUNT_KEY"],
    ) as f:
        json.dump(fit_radii, f, indent=2)

with fsspec.open(
    "az://carbonplan-scratch/project_radius_classification.json",
    account_name="carbonplan",
    mode="w",
    account_key=os.environ["BLOB_ACCOUNT_KEY"],
) as f:
    json.dump(classifications, f, indent=2)