# Parse Projects `xlsx`

In [40]:
import json

import dotenv
import polars as pl

from .utils import get_ids

dotenv.load_dotenv("../../.env")

True

In [41]:
df = pl.read_csv("../data/CLEAN-Climate-Smart Map2023-11-07-16-59-55.csv")
# parse money
df = df.with_columns(
    pl.col("Total Amount").str.strip_prefix("$").str.replace_all(",", "").cast(pl.Float32)
)

## SDGs

In [42]:
sdgs = df.select(pl.col("SDG").str.split("; ").list.explode()).unique().to_numpy().ravel()
sdgs = sorted(sdgs, key=lambda x: int(x[4:6]))
print(json.dumps([{"name": e} for e in sdgs]))

[{"name": "SDG 7 - Affordable and clean energy"}, {"name": "SDG 8 - Decent work and economic growth"}, {"name": "SDG 9 - Industry Innovation and Infrastructure"}, {"name": "SDG 11 - Sustainable Cities and Communities"}, {"name": "SDG 12 - Responsible production and consumption"}, {"name": "SDG 13 - Climate Action"}, {"name": "SDG 14 - Life below water"}, {"name": "SDG 15 - Life on land"}, {"name": "SDG 17 - Partnership for the goals"}]


## Pillars

In [43]:
pilars = df.select(pl.col("Pillar")).unique()
print(json.dumps([{"name": e} for e in list(pilars.to_numpy().ravel())]))

[{"name": "Climate Smart Map"}, {"name": "1.5% New Green Jobs for Physical & Economic Resilience"}, {"name": "30 x 30 Nature Based Solutions"}, {"name": "90% Renewable Energy for All"}]


## Countries

In [44]:
countries = list(
    df.select(pl.col("Country").str.split("; ").list.explode()).unique().to_numpy().ravel()
)

## get `IDs`

Previous SDG and pillar JSONs must be loaded first into Strapi

In [46]:
sdg_ids = get_ids("sdgs")
print(sdg_ids)

{'SDG 7 - Affordable and clean energy': 1, 'SDG 8 - Decent work and economic growth': 2, 'SDG 9 - Industry Innovation and Infrastructure': 3, 'SDG 11 - Sustainable Cities and Communities': 4, 'SDG 12 - Responsible production and consumption': 5, 'SDG 13 - Climate Action': 6, 'SDG 14 - Life below water': 7, 'SDG 15 - Life on land': 8, 'SDG 17 - Partnership for the goals': 9}


In [47]:
pillar_ids = get_ids("pillars")
print(pillar_ids)

{'1.5% New Green Jobs for Physical & Economic Resilience': 1, 'Climate Smart Map': 2, '90% Renewable Energy for All': 3, '30 x 30 Nature Based Solutions': 4}


In [48]:
country_ids = get_ids("countries")
print(sorted(country_ids.items(), key=lambda x: x[0]))

[('Anguilla', 919), ('Antigua and Barbuda', 920), ('Aruba', 918), ('Bahamas', 922), ('Barbados', 925), ('Belize', 923), ('Bermuda', 924), ('Bonaire, Sint Eustatius and Saba', 921), ('British Virgin Islands', 946), ('Cayman Islands', 928), ('Costa Rica', 926), ('Curaçao', 927), ('Dominica', 929), ('Dominican Republic', 930), ('Grenada', 932), ('Guadeloupe', 931), ('Guyana', 933), ('Haiti', 935), ('Honduras', 934), ('Jamaica', 936), ('Montserrat', 940), ('México', 939), ('Panama', 941), ('Puerto Rico', 948), ('Saint Kitts and Nevis', 937), ('Saint Lucia', 938), ('Saint Vincent and the Grenadines', 945), ('Suriname', 942), ('Trinidad and Tobago', 944), ('Turks and Caicos Islands', 943), ('Virgin Islands, U.S.', 947)]


## Clean Data and make JSON

In [49]:
# Countries that are not in the countries table in Strapi

(
    set(df.select(pl.col("Country").str.split("; ").list.explode()).unique().to_numpy().ravel())
    - country_ids.keys()
)

{'Bonaire',
 'Cayman',
 'Curacao',
 'Japan',
 'Mexico',
 'Regional',
 'St. Kitts & Nevis',
 'St. Vincent & the Grenadines',
 'Trinidad & Tobago',
 'Turks & Caicos Islands',
 'US Virgin Islands'}

In [50]:
countries_fix = {
    "Bonaire": "Bonaire, Sint Eustatius and Saba",
    "Cayman": "Cayman Islands",
    "Curacao": "Curaçao",
    "Japan": "",
    "Mexico": "México",
    "Regional": "",
    "St. Kitts & Nevis": "Saint Kitts and Nevis",
    "St. Vincent & the Grenadines": "Saint Vincent and the Grenadines",
    "Trinidad & Tobago": "Trinidad and Tobago",
    "Turks & Caicos Islands": "Turks and Caicos Islands",
    "US Virgin Islands": "Virgin Islands, U.S.",
}

df = df.with_columns(
    pl.col("Country")
    .str.split("; ")
    .list.eval(
        pl.when(pl.element().is_in(countries_fix.keys()))
        .then(pl.element().map_dict(countries_fix))
        .otherwise(pl.element())
    ),
)

The predicate 'col("").is_in([Series])' in 'when->then->otherwise' is not a valid aggregation and might produce a different number of rows than the group_by operation would. This behavior is experimental and may be subject to change


Pillar,Opportunity Name,Project Highlight,Account Name,Total Amount,SDG,Country
str,str,str,str,f32,str,list[str]
"""1.5% New Green…","""Greening infra…","""Solving the p…","""CRDC Global""",1.8e6,"""SDG 8 - Decent…","[""Barbados"", ""Jamaica"", ""Trinidad and Tobago""]"
"""1.5% New Green…","""Resilient Fish…",,"""Organisation o…",2.8e6,"""SDG 7 - Afford…","[""Anguilla"", ""Antigua and Barbuda"", … ""Saint Vincent and the Grenadines""]"
"""1.5% New Green…","""Resilient Boat…",,"""Organisation o…",2e6,"""SDG 11 - Susta…","[""Anguilla"", ""Antigua and Barbuda"", … ""Saint Vincent and the Grenadines""]"
"""1.5% New Green…","""Climate Smart …",,,625000.0,"""SDG 8 - Decent…","[""Jamaica""]"
"""1.5% New Green…","""Sustainable To…",,"""Organisation o…",3e7,"""SDG 8 - Decent…","[""Anguilla"", ""Antigua and Barbuda"", … ""Saint Vincent and the Grenadines""]"
"""1.5% New Green…","""Village Touris…",,"""Organisation o…",3.5e6,"""SDG 8 - Decent…","[""Anguilla"", ""Antigua and Barbuda"", … ""Saint Vincent and the Grenadines""]"
"""1.5% New Green…","""Kalinago Artis…",,"""Organisation o…",2e6,"""SDG 8 - Decent…","[""Anguilla"", ""Antigua and Barbuda"", … ""Saint Vincent and the Grenadines""]"
"""1.5% New Green…","""Fisheries Cent…",,"""Organisation o…",5.4e6,"""SDG 8 - Decent…","[""Anguilla"", ""Antigua and Barbuda"", … ""Saint Vincent and the Grenadines""]"
"""1.5% New Green…","""Hillsborough F…",,"""Organisation o…",2.81e7,"""SDG 8 - Decent…","[""Anguilla"", ""Antigua and Barbuda"", … ""Saint Vincent and the Grenadines""]"
"""1.5% New Green…","""Green Energy J…",,"""Bermuda""",0.0,"""SDG 7 - Afford…","[""Bermuda""]"


In [54]:
# count the nulls
print(df.filter(pl.col("Country").list.eval(pl.element().is_null()).list.any()))

shape: (0, 7)
┌────────┬──────────────────┬───────────────────┬──────────────┬──────────────┬─────┬───────────┐
│ Pillar ┆ Opportunity Name ┆ Project Highlight ┆ Account Name ┆ Total Amount ┆ SDG ┆ Country   │
│ ---    ┆ ---              ┆ ---               ┆ ---          ┆ ---          ┆ --- ┆ ---       │
│ str    ┆ str              ┆ str               ┆ str          ┆ f32          ┆ str ┆ list[str] │
╞════════╪══════════════════╪═══════════════════╪══════════════╪══════════════╪═════╪═══════════╡
└────────┴──────────────────┴───────────────────┴──────────────┴──────────────┴─────┴───────────┘


In [55]:
# map pillar, SDG and countries to corresponding IDs
df_with_ids = df.with_columns(
    pl.col("SDG").str.split("; ").list.eval(pl.element().map_dict(sdg_ids)),
    pl.col("Pillar").map_dict(pillar_ids),
    pl.col("Country").list.eval(pl.element().map_dict(country_ids)),
)

column_names = {
    "Pillar": "pillar",
    "Opportunity Name": "name",
    "Project Highlight": "highlight",
    "Account Name": "account",
    "Total Amount": "amount",
    "SDG": "sdgs",
    "Country": "countries",
}
# count the nulls
print(df_with_ids.filter(pl.col("Country").list.eval(pl.element().is_null()).list.any()))

shape: (3, 7)
┌────────┬───────────────┬──────────────┬──────────────┬──────────────┬─────────────┬──────────────┐
│ Pillar ┆ Opportunity   ┆ Project      ┆ Account Name ┆ Total Amount ┆ SDG         ┆ Country      │
│ ---    ┆ Name          ┆ Highlight    ┆ ---          ┆ ---          ┆ ---         ┆ ---          │
│ i64    ┆ ---           ┆ ---          ┆ str          ┆ f32          ┆ list[i64]   ┆ list[i64]    │
│        ┆ str           ┆ str          ┆              ┆              ┆             ┆              │
╞════════╪═══════════════╪══════════════╪══════════════╪══════════════╪═════════════╪══════════════╡
│ 3      ┆ Phase 2:      ┆ A regional   ┆ Caribbean    ┆ 2.03e8       ┆ [1, 2, … 9] ┆ [919, 920, … │
│        ┆ Build         ┆ blended      ┆ Development  ┆              ┆             ┆ 947]         │
│        ┆ Caribbean     ┆ financial    ┆ Bank         ┆              ┆             ┆              │
│        ┆ Blended…      ┆ fun…         ┆              ┆              ┆      

In [53]:
df_with_ids.rename(column_names).write_json("projects.json", pretty=True, row_oriented=True)