# Parse Projects `xlsx`

In [None]:
import json
import os

import dotenv
import httpx
import polars as pl

dotenv.load_dotenv("../../.env")

In [None]:
df = pl.read_csv("../data/CLEAN-Climate-Smart Map2023-11-07-16-59-55.csv")

# parse money
df = df.with_columns(
    pl.col("Total Amount").str.strip_prefix("$").str.replace_all(",", "").str.to_decimal()
)

## SDGs

In [None]:
sdgs = df.select(pl.col("SDG").str.split("; ").list.explode()).unique().to_numpy().ravel()
sdgs = sorted(sdgs, key=lambda x: int(x[4:6]))
print(json.dumps([{"name": e} for e in sdgs]))

## Pillars

In [None]:
pilars = df.select(pl.col("Pillar")).unique()
print(json.dumps([{"name": e} for e in list(pilars.to_numpy().ravel())]))

## Countries

In [None]:
countries = list(
    df.select(pl.col("Country").str.split("; ").list.explode()).unique().to_numpy().ravel()
)

## get `IDs`

Previous SDG and pillar JSONs must be loaded first into Strapi

In [None]:
def get_ids(plural_api_id: str) -> dict[str, int]:
    """Get Strapi IDs for model"""
    res = httpx.get(
        f"https://staging.ccsa.dev-vizzuality.com/cms/api/{plural_api_id}",
        headers={"Authorization": f"bearer {os.getenv('STRAPI_TOKEN')}"},
    )
    res.raise_for_status()
    ids = {e["attributes"]["name"]: e["id"] for e in res.json()["data"]}
    return ids

In [None]:
sdg_ids = get_ids("sdgs")
print(sdg_ids)

In [None]:
pillar_ids = get_ids("pillars")
print(pillar_ids)

In [None]:
country_ids = get_ids("countries")
print(sorted(country_ids.items(), key=lambda x: x[0]))

```
{
    "US Virgin Islands": "Virgin Islands, U.S.",
    "St. Kitts & Nevis": "Saint Kitts and Nevis",
"Regional": "",
"Puerto Rico": "Puerto Rico",
"Curacao": "Curaçao",
"Trinidad & Tobago": "Trinidad and Tobago",
Japan
Cayman
Suriname
Bonaire
Mexico
St. Vincent & the Grenadines
Turks & Caicos Islands
}
```

In [None]:
# Countries not in Strapi
for c in countries:
    if c not in country_ids:
        print(c)