In [1]:
import json

import dotenv
import polars as pl
from utils import get_ids

dotenv.load_dotenv("../../.env")
FILENAME = "../data/PwC_CCSA Map - Datasets for Vizzuality map.xlsx"

## Categories

In [2]:
categories = pl.read_excel(FILENAME, sheet_name="categories")
categories.select(pl.col("Categories").alias("name")).write_json(
    "categories.json", row_oriented=True, pretty=True
)

## Datasets 

JSON must have

```json
{
    "name": "name",
    "description": "description",
    "category": "category_id",
    "unit": "units",
    "datum": [
        {
          "iso3": "AFG",
          "value": 0.1
        },
    ...
  ]
}
``` 

In [3]:
category_ids = get_ids("categories")

In [4]:
datasets_info = pl.read_excel(FILENAME, sheet_name="datasets info").filter(
    ~pl.all_horizontal(pl.all().is_null())
)
data = pl.read_excel(FILENAME, sheet_name="data").filter(~pl.all_horizontal(pl.all().is_null()))

name,ID,description,Type,Unit,Category
str,str,str,str,str,str
"""CARICOM Member…","""CARICOM""","""Country is a m…","""category""",,"""Governance / A…"
"""OECS Members""","""OECS""","""Country is a m…","""category""",,"""Governance / A…"
"""Overseas Terri…","""Overseas Terri…","""Country is an …","""category""",,"""Governance / A…"
"""Climate Impact…","""Climate Impact…","""Climate Impact…","""category""",,"""Cliamte Risk /…"
"""CO2 emissions …","""CO2 emissions …","""CO2 emissions …","""continuous""","""t CO2 / capita…","""Cliamte Risk /…"
"""CO2 emissions …","""CO2 emissions …","""CO2 emissions …","""category""",,"""Cliamte Risk /…"
"""Climate Readin…","""Climate Adapta…","""Climate Adapta…","""continuous""","""$""","""Climate Readin…"
"""Climate Readin…","""Climate Mitiga…","""Climate Mitiga…","""continuous""","""$""","""Climate Readin…"
"""Loss and Damag…","""Total Damage C…","""Total Damage c…","""continuous""","""$""","""Climate Impact…"
"""Lives and Live…","""People Affecte…","""People Affecte…","""continuous""","""$""","""Climate Impact…"


In [38]:
def make_datum(dataset_id: str):
    """Extract datum dicts from data sheet"""
    return (
        data.select(pl.col("Abbreviation").alias("iso3"), pl.col(dataset_id).alias("value"))
        .to_struct(name=dataset_id)
        .to_list()
    )


datasets = datasets_info.select(
    pl.col("name"),
    pl.col("description"),
    pl.col("Category").map_dict(category_ids).alias("category"),
    pl.col("Unit").alias("unit"),
    datum=pl.col("ID"),
).to_dicts()

In [39]:
for ds in datasets:
    ds["datum"] = make_datum(ds["datum"])

[{'name': 'CARICOM Members',
  'description': 'Country is a member of CARICOM',
  'category': 4,
  'unit': None,
  'datum': [{'iso3': 'ABW', 'value': None},
   {'iso3': 'AIA', 'value': 'yes'},
   {'iso3': 'ATG', 'value': 'yes'},
   {'iso3': 'BES', 'value': None},
   {'iso3': 'BHS', 'value': 'yes'},
   {'iso3': 'BLZ', 'value': 'yes'},
   {'iso3': 'BMU', 'value': 'yes'},
   {'iso3': 'BRB', 'value': 'yes'},
   {'iso3': 'CRI', 'value': None},
   {'iso3': 'CUW', 'value': None},
   {'iso3': 'CYM', 'value': None},
   {'iso3': 'DMA', 'value': 'yes'},
   {'iso3': 'DOM', 'value': None},
   {'iso3': 'GLP', 'value': None},
   {'iso3': 'GRD', 'value': 'yes'},
   {'iso3': 'GUY', 'value': 'yes'},
   {'iso3': 'HND', 'value': None},
   {'iso3': 'HTI', 'value': 'yes'},
   {'iso3': 'JAM', 'value': 'yes'},
   {'iso3': 'KNA', 'value': 'yes'},
   {'iso3': 'LCA', 'value': 'yes'},
   {'iso3': 'MEX', 'value': None},
   {'iso3': 'MSR', 'value': 'yes'},
   {'iso3': 'PAN', 'value': None},
   {'iso3': 'SUR', 'valu

In [40]:
with open("datasets.json", "w") as f:
    json.dump(datasets, f)