In [188]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import os

# Set the default template to dark
pio.templates.default = "plotly_dark"

In [189]:
notebook_path = os.getcwd()
data_path = os.path.abspath(os.path.join(notebook_path, "../data"))
processed_path = os.path.join(data_path, "processed")
var_desc_path = os.path.join(data_path, 'reference', 'variable_descriptions.csv')

# Load variable descriptions dataframe.
var_df = pd.read_csv(var_desc_path, delimiter=',')
var_df

Unnamed: 0,variable,shortname,pop,shortpop,shortage,unit,shorttype,longtype
0,adefgei999,Defense,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
1,adefgoi999,Defense,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
2,aecogei999,Economic affairs,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
3,aecogoi999,Economic affairs,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
4,aedpgei999,Education: Primary,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
...,...,...,...,...,...,...,...,...
74,wsakgei999,Social protection: social assistance in kind,i,individuals,All Ages,% of national income,Wealth-income ratio,Ratio of net wealth (of a given sector) to net...
75,wsopgei999,Social protection,i,individuals,All Ages,% of national income,Wealth-income ratio,Ratio of net wealth (of a given sector) to net...
76,wspigei999,Social protection: social insurance,i,individuals,All Ages,% of national income,Wealth-income ratio,Ratio of net wealth (of a given sector) to net...
77,xlcuspi999,"PPP conversion factor, LCU per USD",i,individuals,All Ages,local currency per foreign currency,Exchange rates,Exchange rate series


In [190]:
import numpy as np

# Per capita
currency_vars_avg = var_df[(var_df['unit'] == 'AUD') & (var_df['shorttype'] == 'Average')]['variable']

# Total
currency_vars_total = var_df[(var_df['unit'] == 'AUD') & (var_df['shorttype'] == 'Total')]['variable']

# % of national income
ration_vars_wealth_income = var_df[(var_df['unit'] == '% of national income') & (var_df['shorttype'] == 'Wealth-income ratio')]['variable']

# All others
currency_vars_avg_total_list = list(np.concatenate((currency_vars_total.values, currency_vars_avg.values), axis=0))
non_currency = var_df[~var_df['variable'].isin(currency_vars_avg_total_list)]
currency = var_df[var_df['variable'].isin(currency_vars_avg_total_list)]

currency

Unnamed: 0,variable,shortname,pop,shortpop,shortage,unit,shorttype,longtype
0,adefgei999,Defense,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
1,adefgoi999,Defense,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
2,aecogei999,Economic affairs,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
3,aecogoi999,Economic affairs,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
4,aedpgei999,Education: Primary,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
5,aedsgei999,Education: Secondary,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
6,aedtgei999,Education: Tertiary,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
7,aedugei999,Education,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
8,aedugoi999,Education,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...
9,aenvgei999,Environmental protection,i,individuals,All Ages,AUD,Average,Average income or wealth between two percentil...


In [191]:
currency_names = [
    'Defense',
    'Economic affairs',
    'Education: Primary'
]

if 'Defense' in currency_names:
    print(True)

True


In [192]:
currency_vars_total = var_df[(var_df['unit'] == 'AUD') & (var_df['shorttype'] == 'Total')]['variable']

In [193]:
for name, group in var_df.groupby('shortname'):
    variables = group['variable'].unique()
    units = group['unit'].unique()
    types = group['shorttype'].unique()
    pops = group['shortpop'].unique()
    print(name, '\n', variables, '\n', units, '\n', types, '\n', pops)

Defense 
 ['adefgei999' 'adefgoi999' 'mdefgei999' 'mdefgoi999' 'wdefgei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Economic affairs 
 ['aecogei999' 'aecogoi999' 'mecogei999' 'mecogoi999' 'wecogei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Education 
 ['aedugei999' 'aedugoi999' 'medugei999' 'medugoi999' 'wedugei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Education: Primary 
 ['aedpgei999' 'medpgei999' 'wedpgei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Education: Secondary 
 ['aedsgei999' 'medsgei999' 'wedsgei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']
Education: Tertiary 
 ['aedtgei999' 'medtgei999' 'wedtgei999'] 
 ['AUD' '% of national income'] 
 ['Average' 'Total' 'Wealth-income ratio'] 
 ['individuals']

### Topics to analyse:
- Total per capita public spending
- Total per capita private wealth
- Total per capita private debt
- Per capita public spending breakdown across categories - health, education etc.
- GDP per capita and total
- CO2 and carbon footprint totals, per capita, per gdp etc.
- Correlations between variable - debt vs. wealth, population vs. public spending, population vs. national income etc.
### Other questions to ask:
- Which countries spend the most on defense?
- What is the trend in environmental protection spending and how does it correlate with CO2 / carbon footprint?
- Which regions spend more per capita on the various public spending categories?
### Process:
- Aggregate each CSV by 'variable'
### Other notes:
- All currency based variables where 'shorttype' == 'Average' refer to per capita values
- All currency based variables where 'shorttype' == 'Wealth-income ratio' refer to the ratio of variable value to net national income

In [194]:
l = ['adefgei999', 'mdefgei999', 'wdefgei999']
var_df[var_df['variable'].isin(l)]['shorttype'].unique()

array(['Average', 'Total', 'Wealth-income ratio'], dtype=object)

In [202]:
country = 'AU'
df = pd.read_csv(f"{processed_path}/{country}.csv")


df[df['variable'] == 'mdefgoi999'][['shorttype', 'value', 'value_usd_per_capita', 'longtype']]

Unnamed: 0,shorttype,value,value_usd_per_capita,longtype
1494,Total,14863160000.0,1331.00819,Macroeconomic variable (i.e. corresponding to ...
1495,Total,14502010000.0,1257.321132,Macroeconomic variable (i.e. corresponding to ...
1496,Total,13952680000.0,1250.51003,Macroeconomic variable (i.e. corresponding to ...
1497,Total,15310840000.0,1610.25084,Macroeconomic variable (i.e. corresponding to ...
1498,Total,12561550000.0,1313.97277,Macroeconomic variable (i.e. corresponding to ...
1499,Total,12327310000.0,1161.583182,Macroeconomic variable (i.e. corresponding to ...
1500,Total,13383980000.0,1165.487642,Macroeconomic variable (i.e. corresponding to ...
1501,Total,13616230000.0,1063.842751,Macroeconomic variable (i.e. corresponding to ...
1502,Total,14692590000.0,1171.165985,Macroeconomic variable (i.e. corresponding to ...
1503,Total,14867640000.0,1144.816847,Macroeconomic variable (i.e. corresponding to ...
