In [17]:
import pandas as pd
import numpy as np
import json
import geopandas as gpd
import requests
import pycountry

# The Map

In [32]:
with open("world_excl_ant.json") as f:
    world = json.load(f)

for feature in world['features']:
    feature['properties']['ISO3'] = feature['properties']['iso_a3']

with open("/Users/finn/Documents/GitHub/courses/courses/datasets/advanced/world.json", "w") as f:
    json.dump(world, f)

# The Existing Map

In [58]:
df = pd.read_csv("https://raw.githubusercontent.com/EconomicsObservatory/courses/main/datasets/advanced/global_gdp_perCapita_2021.csv")
df.columns = ["country", "ISO3", "value"]
df.to_csv("/Users/finn/Documents/GitHub/courses/courses/datasets/advanced/global_gdp_perCapita_2021.csv", index=False)

In [57]:
df

Unnamed: 0,Country,ISO3,GDP per capita (PPP)
0,Aruba,ABW,42698.359870
1,Afghanistan,AFG,1665.805842
2,Angola,AGO,6491.125578
3,Albania,ALB,15709.251260
4,Andorra,AND,
...,...,...,...
211,Kosovo,XKX,13055.900780
212,Yemen,YEM,
213,South Africa,ZAF,14624.415170
214,Zambia,ZMB,3555.920229


# Data:

In [62]:
wb_data = pd.concat([
    pd.read_csv("P_Data_Extract_From_World_Development_Indicators/d7f0eeed-2f3d-4758-9b3d-79008b6e522c_Data.csv"),
    pd.read_csv("P_Data_Extract_From_World_Development_Indicators/b86289ea-d79f-42ec-8371-4f12b6adc35a_Data.csv")
])

wb_data = wb_data.rename(columns={"Country Name": "Country", "Country Code": "ISO3", "Series Name": "series", "Time": "date", "Value": "value"})
wb_data = wb_data.sort_values(by=["series", "Country", "date"], ascending=False)
wb_data.value = pd.to_numeric(wb_data.value, errors='coerce')
wb_data = wb_data.dropna(subset=["value"])
wb_data = wb_data.drop_duplicates(subset=["Country", "series"], keep="first")
wb_data = wb_data[wb_data["date"] > 2019]
wb_data['date'] = pd.to_datetime(wb_data['date'], format='%Y').dt.strftime('%Y-%m-%d')
wb_data.series.unique()

# array(['Population growth (annual %)',
#        'Life expectancy at birth, total (years)', 'Gini index',
#        'GDP per capita, PPP (current international $)',
#        'CO2 emissions (metric tons per capita)',
#        'Access to electricity (% of population)'], dtype=object)

series_file_names = {
    'Population growth (annual %)': 'world_population_growth_wb',
    'Life expectancy at birth, total (years)': 'world_life_expectancy_wb',
    'GDP per capita, PPP (current international $)': 'world_gdp_per_capita_PPP_wb',
    'CO2 emissions (metric tons per capita)': 'world_co2_emissions_wb',
    'Access to electricity (% of population)': 'world_access_to_electricity_wb',
    'Urban population (% of total population)': 'world_urban_population_pct_wb',
    'Fertility rate, total (births per woman)' : 'world_fertility_rate_wb'
}


for series, file_name in series_file_names.items():
    data = wb_data[wb_data["series"] == series]
    data = data[["Country", "ISO3", "value", "date"]]
    #data = data.rename(columns={"value": file_name})
    data.to_csv(f"/Users/finn/Documents/GitHub/courses/courses/datasets/simple/{file_name}.csv", index=False)
    data.to_csv(f"/Users/finn/Documents/GitHub/courses/courses/4/map_data/{file_name}.csv", index=False)

In [61]:
wb_data.series.unique()

array(['Urban population (% of total population)',
       'Population growth (annual %)',
       'Life expectancy at birth, total (years)', 'Gini index',
       'GDP per capita, PPP (current international $)',
       'Fertility rate, total (births per woman)',
       'CO2 emissions (metric tons per capita)',
       'Access to electricity (% of population)'], dtype=object)

In [42]:
wb_data.date.value_counts()

date
2021.0    486
2022.0    425
2015.0    223
2020.0    201
2019.0     19
2018.0     11
2017.0      9
2016.0      7
2014.0      7
2011.0      6
2012.0      5
2013.0      5
2010.0      4
2005.0      2
2023.0      2
2006.0      1
2009.0      1
Name: count, dtype: int64

In [40]:
wb_data.series.value_counts()

series
Access to electricity (% of population)                         4340
CO2 emissions (metric tons per capita)                          4340
GDP per capita, PPP (current international $)                   4340
Life expectancy at birth, total (years)                         4340
Gini index                                                      4340
Population growth (annual %)                                    4340
Renewable electricity output (% of total electricity output)    4340
Name: count, dtype: int64

In [34]:
wb_data

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,Time,Time Code,Value
0,Afghanistan,AFG,Access to electricity (% of population),EG.ELC.ACCS.ZS,2004.0,YR2004,23.814182
1,Afghanistan,AFG,Access to electricity (% of population),EG.ELC.ACCS.ZS,2005.0,YR2005,28.669672
2,Afghanistan,AFG,Access to electricity (% of population),EG.ELC.ACCS.ZS,2006.0,YR2006,33.544418
3,Afghanistan,AFG,Access to electricity (% of population),EG.ELC.ACCS.ZS,2007.0,YR2007,38.440002
4,Afghanistan,AFG,Access to electricity (% of population),EG.ELC.ACCS.ZS,2008.0,YR2008,42.400000
...,...,...,...,...,...,...,...
30380,,,,,,,
30381,,,,,,,
30382,,,,,,,
30383,Data from database: World Development Indicators,,,,,,
