In [70]:
import requests

url = "https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/spr_exp_ffa?format=JSON&unit=MIO_EUR&unit=MEUR_KP15&unit=MEUR_KP10&unit=EUR_HAB_KP15&unit=EUR_HAB_KP10&unit=MIO_NAC&unit=MNAC_KP15&unit=MNAC_KP10&unit=MIO_PPS&unit=PPS_HAB&unit=PC_GDP&unit=PC_FUNC&spscheme=TOTAL&spdep=SPR&spdep=CASH&spdep=CASH_P&spdep=CASH_P_MNTCHB&spdep=CASH_P_PARL&spdep=CASH_P_FAM_ALL&spdep=CASH_P_OTH&spdep=CASH_LUMP&spdep=CASH_LUMP_BRTH&spdep=CASH_LUMP_PARL&spdep=CASH_LUMP_OTH&spdep=KND&spdep=KND_CHDC&spdep=KND_ACCOM&spdep=KND_HOM&spdep=KND_OTH&spdepm=TOTAL&spdepm=MT&spdepm=NMT&lang=en"

response = requests.get(url)
data = response.json()
print(list(data.keys()))


['version', 'class', 'label', 'source', 'updated', 'value', 'status', 'id', 'size', 'dimension', 'extension']


In [76]:
import numpy as np
import pandas as pd

dimension_order = data['id']
dimension_sizes = data['size']
dimensions = data['dimension']

label_maps = {}
for dim in dimension_order:
    raw = dimensions[dim]['category']['index']
    label_maps[dim] = {v: k for k, v in raw.items()}  # numeric index → label

def decode_index(flat_index, sizes):
    indices = []
    for size in reversed(sizes):
        indices.insert(0, flat_index % size)
        flat_index //= size
    return indices

eu_countries = {
    'AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'EL', 'ES',
    'FI', 'FR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'MT',
    'NL', 'PL', 'PT', 'RO', 'SE', 'SI', 'SK'
}

cleaned_data = []
for flat_key, value in data['value'].items():
    flat_index = int(flat_key)
    dim_indices = decode_index(flat_index, dimension_sizes)

    record = {}
    for i, dim in enumerate(dimension_order):
        record[dim] = label_maps[dim].get(dim_indices[i])

    if record['geo'] in eu_countries and int(record['time']) >= 2015:
        record['value'] = value
        cleaned_data.append(record)

df = pd.DataFrame(cleaned_data)
df.columns = [col.lower() for col in df.columns]
unit_map = {
    'MIO_EUR': 'Million Euros',
    'MEUR_KP15': 'Million Euros (2015 prices)',
    'MEUR_KP10': 'Million Euros (2010 prices)',
    'EUR_HAB_KP15': '€ per person (2015 prices)',
    'EUR_HAB_KP10': '€ per person (2010 prices)',
    'MIO_NAC': 'Million NAC',
    'MNAC_KP15': 'Million NAC (2015 prices)',
    'MNAC_KP10': 'Million NAC (2010 prices)',
    'MIO_PPS': 'Million PPS',
    'PPS_HAB': 'PPS per person',
    'PC_GDP': '% of GDP',
    'PC_FUNC': '% of function-based spending',
}

spdepm_map = {
    'MT': 'Mothers only',
    'NMT': 'Non-mothers',
    'TOTAL': 'All parents',
}

spdep_map = {
    'CASH': 'Cash benefits',
    'KND': 'Benefits in kind',
    'SPR': 'Social protection reporting',
    'CASH_P': 'Cash periodic',
    'CASH_LUMP': 'Lump-sum cash',
    'CASH_LUMP_BRTH': 'Birth grants',
    'CASH_LUMP_PARL': 'Parental leave (lump-sum)',
    'CASH_LUMP_OTH': 'Other lump-sum benefits',
    'CASH_P_MNTCHB': 'Maternity/childbirth benefits',
    'CASH_P_PARL': 'Parental leave (periodic)',
    'CASH_P_FAM_ALL': 'Family allowance',
    'CASH_P_OTH': 'Other cash periodic',
    'KND_CHDC': 'Childcare services',
    'KND_ACCOM': 'Housing/accommodation',
    'KND_HOM': 'Home-based support',
    'KND_OTH': 'Other benefits in kind'

}

df['unit'] = df['unit'].replace(unit_map)
df['spdepm'] = df['spdepm'].replace(spdepm_map)
df['spdep'] = df['spdep'].replace(spdep_map)


df.rename(columns={
    'geo': 'country',
    'time': 'year',
    'value': 'expenditure',
    'spscheme': 'scheme',
    'spdep': 'benefit_type',
    'spdepm': 'target_group'
}, inplace=True)



df['year'] = df['year'].astype(int)
df['expenditure'] = pd.to_numeric(df['expenditure'], errors='coerce')
df = df.fillna(0)

df['freq'] = df['freq'].replace({'A': 'Annual'})


df.to_csv("eu_family_expenditure_full.csv", index=False)

print(df.head())


     freq scheme   benefit_type  target_group                        unit  \
0  Annual  TOTAL  Cash benefits  Mothers only  € per person (2010 prices)   
1  Annual  TOTAL  Cash benefits  Mothers only  € per person (2010 prices)   
2  Annual  TOTAL  Cash benefits  Mothers only  € per person (2010 prices)   
3  Annual  TOTAL  Cash benefits  Mothers only  € per person (2010 prices)   
4  Annual  TOTAL  Cash benefits  Mothers only  € per person (2010 prices)   

  country  year  expenditure  
0      AT  2015        57.15  
1      AT  2016        55.13  
2      AT  2017        61.15  
3      AT  2018        64.05  
4      AT  2019        61.58  


In [75]:
summary_dict = summary.to_dict(orient='records')

for row in summary_dict[:50]:
    print(row)


{'country': 'AT', 'year': 2015, 'expenditure': 500912.5, 'change_vs_prev_year': nan}
{'country': 'AT', 'year': 2016, 'expenditure': 511022.59, 'change_vs_prev_year': 10110.090000000026}
{'country': 'AT', 'year': 2017, 'expenditure': 513609.39, 'change_vs_prev_year': 2586.7999999999884}
{'country': 'AT', 'year': 2018, 'expenditure': 518220.19, 'change_vs_prev_year': 4610.799999999988}
{'country': 'AT', 'year': 2019, 'expenditure': 518536.59, 'change_vs_prev_year': 316.4000000000233}
{'country': 'AT', 'year': 2020, 'expenditure': 562473.85, 'change_vs_prev_year': 43937.25999999995}
{'country': 'AT', 'year': 2021, 'expenditure': 536009.77, 'change_vs_prev_year': -26464.079999999958}
{'country': 'AT', 'year': 2022, 'expenditure': 563087.56, 'change_vs_prev_year': 27077.790000000037}
{'country': 'BE', 'year': 2015, 'expenditure': 469448.06, 'change_vs_prev_year': nan}
{'country': 'BE', 'year': 2016, 'expenditure': 473081.85, 'change_vs_prev_year': 3633.789999999979}
{'country': 'BE', 'year'