In [None]:
import gspread
import json
import numpy as np
import pathlib

from oauth2client.service_account import ServiceAccountCredentials
from pandas import DataFrame, MultiIndex, to_datetime, to_numeric, read_json

In [None]:
def get_sheet(sheet, doc):
    """
    helper function to open a specific google sheet
    """
    scope = [
        "https://spreadsheets.google.com/feeds",
        "https://www.googleapis.com/auth/drive",
    ]

    credentials = ServiceAccountCredentials.from_json_keyfile_name(
        "carbonplan-03794eb9a308.json", scope
    )  # Your json file here

    gc = gspread.authorize(credentials)
    wks = gc.open(doc)
    sheet = wks.worksheet(sheet)
    return sheet

In [None]:
sheet = get_sheet("Sheet2", "Forest-Offset-Projects")

In [None]:
def get_df():
    data = sheet.get_all_values()
    data = np.asarray(data)
    df = DataFrame(data[1:], columns=data[0])

    levels = ["level0", "level1", "level2"]
    left = df[levels].copy()
    left[levels[:2]] = left[levels[:2]].mask(left == "", None).ffill()
    index = MultiIndex.from_frame(left)

    types = df["type"]

    df.index = index
    df = df.drop(columns=levels + ["type"])
    df = df.transpose()
    df = df.iloc[1:]

    types.index = index

    return df, types

In [None]:
def json_loads(v):
    try:
        if "SEE NOTE" in v:
            return None
        return json.loads(v)
    except:
        print(v)
        raise


def cast_col(col, type_str):
    if type_str == "YYYY-MM-DD":
        return col  # to_datetime(col, errors='coerce')
    elif type_str == "str" or type_str == "str:previous_project_id":
        return col.astype(str)
    elif type_str == "bool":
        return col.astype(bool)
    elif type_str == "int":
        return to_numeric(col, errors="coerce", downcast="integer")
    elif type_str == "float":
        return to_numeric(col.str.replace(",", ""), errors="coerce", downcast="float")
    elif type_str == "[lon:float, lat:float]" or type_str == "[int]":
        return [json_loads(v) if v else [] for v in col]
    elif type_str == "[(is_intentional, size)]":
        return col  # TODO
    else:
        try:
            return [json_loads(v) if v else "" for v in col]
        except:
            print(col)
            raise


df, types = get_df()

for index, col in df.iteritems():
    type_str = types[index]
    df[index] = cast_col(col, type_str)

In [None]:
df.head()

In [None]:
df.to_json("retro.json", orient="index", date_format="iso", date_unit="s", indent=2)

In [None]:
strip = lambda x: x.strip()


def str_to_tuple(s):
    return tuple(map(strip, s[1:-1].replace("'", "").split(",")))

In [None]:
df2 = read_json("retro.json", orient="index", convert_dates=True)
df2.columns = MultiIndex.from_tuples(map(str_to_tuple, df2.columns))

In [None]:
# df.compare(df2)

In [None]:
def make_project(name):
    """
    return a template project
    """
    obj = {
        "type": "Offset-Project",
        "name": name,
        "documents": {},
        "project": {},
        "live_carbon": {},
        "baseline": {},
        "rp": [],
    }
    return obj

In [None]:
projects = []
for key, col in df.iterrows():
    d = make_project(key)

    # documents
    keys = col["documents"].index.get_level_values(0).unique()
    d["documents"].update({k: col[("documents", k)].to_dict() for k in keys})

    # project
    temp = col["project"]
    temp.index = temp.index.droplevel(1)
    d["project"].update(temp.to_dict())

    # live_carbon
    d["live_carbon"].update({"components": col[("live_carbon", "components")].to_dict()})
    d["live_carbon"]["notes"] = col[("live_carbon", "notes", "")]

    # baseline
    temp = col["baseline"]
    temp.index = temp.index.droplevel(1)
    d["baseline"].update(temp.to_dict())
    d["baseline"]["components"] = col[("baseline", "components")].to_dict()
    d["baseline"]["economics"] = col[("baseline", "economics")].to_dict()

    # rp[0-?]
    for i in range(7):
        key = f"rp[{i}]"
        if not col[(key, "date_submitted", "")]:
            continue

        temp = col[key]
        temp.index = temp.index.droplevel(1)
        rp = temp.to_dict()
        rp["components"] = col[(key, "components")].to_dict()
        rp["attestation"] = col[(key, "attestation")].to_dict()
        d["rp"].append(rp)
    projects.append(d)

In [None]:
with open("retro_projects.json", "w") as f:
    f.write(json.dumps(projects, indent=2))