In [479]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

# Set the default template to dark
pio.templates.default = "plotly_dark"

In [480]:
countries_path = '../data/WID_countries.csv'
countries_df = pd.read_csv(filepath_or_buffer=countries_path, delimiter=';')
countries_df.head(2)

Unnamed: 0,alpha2,titlename,shortname,region,region2
0,AD,Andorra,Andorra,Europe,Western Europe
1,AE,the United Arab Emirates,United Arab Emirates,Asia,West Asia


In [481]:
af_path = '../data/WID_data_AF.csv'
af = pd.read_csv(filepath_or_buffer=af_path, delimiter=';')
af.head(2)

Unnamed: 0,country,variable,percentile,year,value,age,pop
0,AF,ehfghgi999,p0p100,1980,13.085614,999,i
1,AF,ehfghgi999,p0p100,1981,12.898053,999,i


In [482]:
af_meta_path = '../data/WID_metadata_AF.csv'
af_meta = pd.read_csv(af_meta_path, delimiter=';')
af_meta.head(2)

Unnamed: 0,country,variable,age,pop,countryname,shortname,simpledes,technicaldes,shorttype,longtype,shortpop,longpop,shortage,longage,unit,source,method,extrapolation,data_points
0,AF,acitgri992,992,i,Afghanistan,Corporate income tax,,,Average,Average income or wealth between two percentil...,individuals,The base unit is the individual (rather than t...,Adults,The population is comprised of individuals ove...,AFN,,,,
1,AF,acitgri999,999,i,Afghanistan,Corporate income tax,,,Average,Average income or wealth between two percentil...,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,AFN,,,,


In [483]:
print(f"The data file contains {af.shape[1]} columns and {af.shape[0]} rows")
print(f"The metadata file contains {af_meta.shape[1]} columns and {af_meta.shape[0]} rows")
print(f"There are {af.variable.nunique()} and {af_meta.variable.nunique()} unique values for 'variable' in the data and metadata files respectively.")

The data file contains 7 columns and 191381 rows
The metadata file contains 19 columns and 552 rows
There are 552 and 552 unique values for 'variable' in the data and metadata files respectively.


In [484]:
cols = af_meta.columns
nulls = [val for val in af_meta.isna().sum().values]
unique_vals = [af_meta[col].nunique() for col in af_meta.columns]

summary_dict = {
    'cols': cols,
    'no_of_nulls': nulls,
    'no_of_unique': unique_vals
}

summary = pd.DataFrame(summary_dict)
summary.set_index('cols', inplace=True)
summary

Unnamed: 0_level_0,no_of_nulls,no_of_unique
cols,Unnamed: 1_level_1,Unnamed: 2_level_1
country,0,1
variable,0,552
age,0,38
pop,0,4
countryname,0,1
shortname,0,136
simpledes,342,32
technicaldes,414,35
shorttype,0,14
longtype,0,14


In [485]:
df = pd.merge(left=af, right=af_meta, how='left', on=['country', 'variable'])

In [486]:
gdp = df[(df['shortname'] == 'Gross domestic product') & (df['unit'] == '% of national income')][['shortname', 'year', 'value', 'shortage', 'shorttype', 'longtype', 'unit']].copy()
total_pop = df[(df['shortname'] == 'Population') & (df['shortpop'] == 'individuals') & (df['shortage'] == 'All Ages')][['year', 'value', 'shortname']]
total_pop.head(1)

Unnamed: 0,year,value,shortname
143943,1950,7480461.0,Population


In [487]:
shortname_list = [
    'Defense',
    'Economic affairs',
    'Education',
    'Environmental protection',
    'Health',
    'Housing and community amenities',
    'Public order and safety',
    'Recreation and culture',
    'Social protection'
]

cols_to_keep = ['year', 'value', 'shortname']

a = df[(df['shortname'].isin(shortname_list)) & (df['shorttype'] == 'Total')][cols_to_keep]

total_pop = df[(df['shortname'] == 'Population') & (df['shortpop'] == 'individuals') & (df['shortage'] == 'All Ages')][['year', 'value']]
fx = df.loc[df['shortname'] == 'PPP conversion factor, LCU per USD'][['year', 'value']].rename(columns={'value': 'local_currency_per_usd'})

In [488]:
fig = go.Figure()

for _, group in a.groupby('shortname'):
    data = group.merge(total_pop, how='inner', on='year', suffixes=('', '_pop')).merge(fx, how='inner', on='year')
    data['per_capita_local'] = data['value'] / data['value_pop']
    data['per_capita_usd'] = data['per_capita_local'] / data['local_currency_per_usd']

    name = data['shortname'].unique()[0]
    fig.add_trace(
        go.Scatter(
            x=data['year'],
            y=data['per_capita_usd'],
            name=name,
            mode='lines+markers'
        )
    )

fig.update_layout(
    title='Per capita public spending'
)

fig.show()

In [489]:
cols = ['shorttype', 'longtype', 'method']
info = df.loc[df['shortname'] == 'Market exchange rate, LCU per USD'][cols]
for col in cols:
    print(info[col].unique())

['Exchange rates']
['Exchange rate series']
[nan]


In [491]:
x = df.loc[df['shortname'] == 'PPP conversion factor, LCU per USD']#[['year', 'value']]
x['unit'].unique()

array(['local currency per foreign currency'], dtype=object)

In [477]:
x

Unnamed: 0,country,variable,percentile,year,value,age_x,pop_x,age_y,pop_y,countryname,...,longtype,shortpop,longpop,shortage,longage,unit,source,method,extrapolation,data_points
147001,AF,xlcuspi999,p0p100,1970,0.008475,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
147002,AF,xlcuspi999,p0p100,1971,0.008066,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
147003,AF,xlcuspi999,p0p100,1972,0.008071,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
147004,AF,xlcuspi999,p0p100,1973,0.008057,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
147005,AF,xlcuspi999,p0p100,1974,0.008524,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
147006,AF,xlcuspi999,p0p100,1975,0.008284,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
147007,AF,xlcuspi999,p0p100,1976,0.008056,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
147008,AF,xlcuspi999,p0p100,1977,0.008915,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
147009,AF,xlcuspi999,p0p100,1978,0.008942,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
147010,AF,xlcuspi999,p0p100,1979,0.009093,999,i,999,i,Afghanistan,...,Exchange rate series,individuals,The base unit is the individual (rather than t...,All Ages,The population is comprised of individuals of ...,local currency per foreign currency,[URL][URL_LINK]http://data.worldbank.org/[/URL...,We extrapolate the PPP from the latest ICP (20...,,
