In [63]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import os

# Set the default template to dark
pio.templates.default = "plotly_dark"

In [64]:
notebook_path = os.getcwd()
data_path = os.path.abspath(os.path.join(notebook_path, "../data"))
processed_path = os.path.join(data_path, "processed")
var_desc_path = os.path.join(data_path, 'reference', 'variable_descriptions.csv')

# Load variable descriptions dataframe.
var_df = pd.read_csv(var_desc_path, delimiter=',')
var_df

Unnamed: 0,variable,shortname,pop,shortpop,shortage,unit,shorttype,longtype
0,adefgei999,Defense,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
1,aecogei999,Economic affairs,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
2,aedpgei999,Education: Primary,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
3,aedsgei999,Education: Secondary,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
4,aedtgei999,Education: Tertiary,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
...,...,...,...,...,...,...,...,...
61,wsakgei999,Social protection: social assistance in kind,i,individuals,All Ages,% of national income,Wealth-income ratio,Ratio of net wealth (of a given sector) to net...
62,wsopgei999,Social protection,i,individuals,All Ages,% of national income,Wealth-income ratio,Ratio of net wealth (of a given sector) to net...
63,wspigei999,Social protection: social insurance,i,individuals,All Ages,% of national income,Wealth-income ratio,Ratio of net wealth (of a given sector) to net...
64,xlcuspi999,"PPP conversion factor, LCU per USD",i,individuals,All Ages,local currency per foreign currency,Exchange rates,Exchange rate series


In [65]:
for name, group in var_df.groupby('shortname'):
    variables = group['variable'].unique()
    units = group['unit'].unique()
    types = group['shorttype'].unique()
    pops = group['shortpop'].unique()
    print(name, '\n', variables, '\n', units, '\n', types, '\n', pops)

Defense 
 ['adefgei999' 'mdefgei999' 'wdefgei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Economic affairs 
 ['aecogei999' 'mecogei999' 'wecogei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Education 
 ['aedugei999' 'medugei999' 'wedugei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Education: Primary 
 ['aedpgei999' 'medpgei999' 'wedpgei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Education: Secondary 
 ['aedsgei999' 'medsgei999' 'wedsgei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Education: Tertiary 
 ['aedtgei999' 'medtgei999' 'wedtgei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Environmental protection 
 ['aenvgei999' 'menvgei999' 'wenvgei999'] 
 ['AUD' 

### Topics to analyse:
- Total per capita public spending
- Total per capita private wealth
- Total per capita private debt
- Per capita public spending breakdown across categories - health, education etc.
- GDP per capita and total
- CO2 and carbon footprint totals, per capita, per gdp etc.
- Correlations between variable - debt vs. wealth, population vs. public spending, population vs. national income etc.
### Other questions to ask:
- Which countries spend the most on defense?
- What is the trend in environmental protection spending and how does it correlate with CO2 / carbon footprint?
- Which regions spend more per capita on the various public spending categories?
### Process:
- Aggregate each CSV by 'variable'
### Other notes:
- All currency based variables where 'shorttype' == 'Average' refer to per capita values
- All currency based variables where 'shorttype' == 'Wealth-income ratio' refer to the ratio of variable value to net national income

In [84]:
l = ['adefgei999', 'mdefgei999', 'wdefgei999']
var_df[var_df['variable'].isin(l)]['shorttype'].unique()

array(['Average', 'Total', 'Wealth-income ratio'], dtype=object)

In [92]:
country = 'US'
df = pd.read_csv(f"{processed_path}/{country}.csv")


display(df[df['variable'] == 'adefgei999'][['shorttype', 'value', 'value_usd', 'value_usd_per_capita', 'longtype']][0:1])
display(df[df['variable'] == 'mdefgei999'][['shorttype', 'value', 'value_usd', 'value_usd_per_capita', 'longtype']][0:1])
display(df[df['variable'] == 'wdefgei999'][['shorttype', 'value', 'value_usd', 'value_usd_per_capita', 'longtype']][0:1])
df[df['variable'] == 'wdefgei999']['longtype'].unique()

Unnamed: 0,shorttype,value,value_usd,value_usd_per_capita,longtype
144,Average,2050.5,2050.5,,Average income or wealth between two percentil...


Unnamed: 0,shorttype,value,value_usd,value_usd_per_capita,longtype
1130,Total,466956400000.0,466956400000.0,2050.518546,Macroeconomic variable (i.e. corresponding to ...


Unnamed: 0,shorttype,value,value_usd,value_usd_per_capita,longtype
2029,Wealth-income ratio,0.061035,,,Ratio of net wealth (of a given sector) to net...


array(['Ratio of net wealth (of a given sector) to net national income'],
      dtype=object)