In [22]:
import csv

import geopandas as gpd
import pandas as pd
import numpy as np
import requests
from psycopg2.pool import ThreadedConnectionPool
from shapely.geometry import shape

In [8]:
env_path = ".env"
with open(env_path) as f:
    env = {}
    for line in f.readlines():
        if line.startswith("#"):
            continue
        env_key, _val = line.split("=", 1)
        env_value = _val.split("\n")[0]
        env[env_key] = env_value

postgres_thread_pool = ThreadedConnectionPool(
    1,
    50,
    host=env["API_POSTGRES_HOST"],
    port=env["API_POSTGRES_PORT"],
    user=env["API_POSTGRES_USERNAME"],
    password=env["API_POSTGRES_PASSWORD"],
)

In [9]:
CSV_URL = "https://hdr.undp.org/sites/default/files/data/2020/IHDI_HDR2020_040722.csv"
YEAR = 2019

In [10]:
with requests.Session() as s:
    download = s.get(CSV_URL)
    decoded_content = download.content.decode("utf-8")

csv_rows = list(csv.reader(decoded_content.splitlines(), delimiter=","))
df = pd.DataFrame(csv_rows[1:], columns=csv_rows[0])
df.head()

Unnamed: 0,iso3,country,hdicode,region,hdi_2010,hdi_2011,hdi_2012,hdi_2013,hdi_2014,hdi_2015,...,ineq_inc_2010,ineq_inc_2011,ineq_inc_2012,ineq_inc_2013,ineq_inc_2014,ineq_inc_2015,ineq_inc_2016,ineq_inc_2017,ineq_inc_2018,ineq_inc_2019
0,AFG,Afghanistan,Low,SA,0.472,0.477,0.489,0.496,0.5,0.5,...,,,,10.8,10.8,10.8,10.8,10.8,,
1,AGO,Angola,Medium,SSA,0.517,0.533,0.544,0.555,0.565,0.572,...,36.4,50.0,50.0,50.0,28.9,28.9,28.9,28.9,28.9,28.9
2,ALB,Albania,High,ECA,0.745,0.764,0.775,0.782,0.787,0.788,...,14.4,18.3,18.3,18.3,18.3,18.3,18.3,12.2,13.2,13.17898
3,AND,Andorra,Very High,,0.837,0.836,0.858,0.856,0.863,0.862,...,,,,,,,,,,
4,ARE,United Arab Emirates,Very High,AS,0.82,0.826,0.832,0.838,0.847,0.859,...,,,,,,,,,,


In [11]:
cols = f"iso3 country hdicode region hdi_{YEAR}".split()
df = df.loc[:, cols]  # filter columns
df["hdi_2019"] = pd.to_numeric(df["hdi_2019"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 206 entries, 0 to 205
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   iso3      206 non-null    object 
 1   country   206 non-null    object 
 2   hdicode   206 non-null    object 
 3   region    206 non-null    object 
 4   hdi_2019  200 non-null    float64
dtypes: float64(1), object(4)
memory usage: 8.2+ KB


In [30]:
df = df.dropna(subset=["hdi_2019"])

In [31]:
df = df[df.iso3 != ""]

## Values for the meatadata.json

### minimum value

In [32]:
df["hdi_2019"].min()

0.394


### `min` and `max` for every hdi code (low -> very high) to set as bins for the legend

In [34]:
df.groupby("hdicode")["hdi_2019"].apply(lambda x: f"{x.min()} - {x.max()}")

hdicode
High         0.703 - 0.796
Low          0.394 - 0.546
Medium       0.554 - 0.697
Very High    0.804 - 0.957
Name: hdi_2019, dtype: object