### Get macroeconomic variables for selected countries

In [None]:
import wbdata
import pandas as pd
from datetime import datetime
import eurostat

In [24]:
# --- Configuration ---
wb_indicators = {
    'SL.UEM.TOTL.ZS': 'Unemployment Rate (%)',
    'NY.GDP.MKTP.KD.ZG': 'GDP Growth (%)',
    'FS.AST.PRVT.GD.ZS': 'Household Debt-to-GDP (%)', # Check if this is the correct indicator for household debt
    'SL.EMP.TOTL.SP.ZS': 'Employment Rate (%)',
    'NE.CON.PRVT.ZS': 'Household Consumption (% of GDP)',
}

data_date = (datetime(2013, 1, 1), datetime(2023, 1, 1))

# ISO3 codes for World Bank (Matches the desired final country names)
european_countries_wb_iso3 = ['AUT','BEL','BGR','HRV','CYP','CZE','DNK','EST','FIN','FRA',
                              'DEU','GRC','HUN','IRL','ITA','LVA','LTU','LUX','MLT','NLD',
                              'POL','PRT','ROU','SVK','SVN','ESP','SWE']

# Eurostat codes (will be mapped) - Only include codes you intend to map
# Exclude aggregates like EU27_2020, EA unless you handle them separately
european_countries_eurostat_codes = [
    'BE', 'BG', 'CZ', 'DK', 'DE', 'EE', 'IE', 'EL', 'ES', # EL is Greece
    'FR', 'HR', 'IT', 'CY', 'LV', 'LT', 'LU', 'HU', 'MT', 'NL', 'AT', 'PL',
    'PT', 'RO', 'SI', 'SK', 'FI', 'SE'
    # Add others like 'IS', 'NO', 'CH', 'UK', 'TR' if needed and map them below
]

# Mapping from Eurostat codes to World Bank Full Country Names
# We derive the WB names automatically later, but define the code mapping here
eurostat_to_wb_map = {
    'AT': 'Austria', 'BE': 'Belgium', 'BG': 'Bulgaria', 'HR': 'Croatia', 'CY': 'Cyprus',
    'CZ': 'Czech Republic', 'DK': 'Denmark', 'EE': 'Estonia', 'FI': 'Finland', 'FR': 'France',
    'DE': 'Germany', 'EL': 'Greece', 'HU': 'Hungary', 'IE': 'Ireland', 'IT': 'Italy',
    'LV': 'Latvia', 'LT': 'Lithuania', 'LU': 'Luxembourg', 'MT': 'Malta', 'NL': 'Netherlands',
    'PL': 'Poland', 'PT': 'Portugal', 'RO': 'Romania', 'SK': 'Slovak Republic',
    'SI': 'Slovenia', 'ES': 'Spain', 'SE': 'Sweden'
    # Add mappings for IS, NO, CH, UK, TR if included above and desired
    # 'CH': 'Switzerland', 'NO': 'Norway', 'IS': 'Iceland', 'TR': 'Turkiye', 'UK': 'United Kingdom'
}

# --- Retrieve World Bank Data ---
print("Retrieving World Bank data...")
# Use ISO3 codes, wbdata returns full names which we will use as the standard
wb_df = wbdata.get_dataframe(wb_indicators, country=european_countries_wb_iso3, date=data_date)
wb_df = wb_df.reset_index()
wb_df.rename(columns={'country':'Country', 'date':'Year'}, inplace=True)

# Extract the list of actual country names returned by World Bank for consistency
wb_country_names = wb_df['Country'].unique().tolist()

# Melt WB data
wb_df = wb_df.melt(id_vars=['Country', 'Year'], var_name='Indicator', value_name='Value')

# --- Retrieve Eurostat Data ---
print("Retrieving Eurostat data...")
dataset_code = 'prc_hpi_a' # Annual House Price Index
try:
    hpi_data = eurostat.get_data_df(dataset_code, flags=False)

    if hpi_data is None or hpi_data.empty:
        print(f"Error: Could not retrieve Eurostat data for '{dataset_code}'")
    else:
        # Verify required columns exist
        required_cols = ['purchase', 'unit', 'geo\\TIME_PERIOD']
        if not all(col in hpi_data.columns for col in required_cols):
             print(f"Error: Raw Eurostat data missing required columns. Found: {hpi_data.columns}")
        else:
            # Filter for the desired HPI metric (Annual % Change for Total Purchases)
            idx_1 = hpi_data['purchase'] == 'TOTAL'
            idx_2 = hpi_data['unit'] == 'RCH_A_AVG' # Rate of Change, Annual Average

            eurostat_df = hpi_data[idx_1 & idx_2].copy()

            if not eurostat_df.empty:
                # Rename the geo column BEFORE filtering by country code
                eurostat_df.rename(columns={'geo\\TIME_PERIOD': 'geo_code'}, inplace=True)

                # Filter Eurostat data for the countries we can map
                eurostat_df = eurostat_df[eurostat_df['geo_code'].isin(eurostat_to_wb_map.keys())]

                # *** Apply the country name mapping ***
                eurostat_df['Country'] = eurostat_df['geo_code'].map(eurostat_to_wb_map)

                # Keep only countries that are also in the WB results for consistency
                eurostat_df = eurostat_df[eurostat_df['Country'].isin(wb_country_names)]

                # Melt Eurostat data
                id_vars_melt = ['Country', 'freq', 'purchase', 'unit', 'geo_code'] # Include all non-year ID columns
                value_vars_melt = [col for col in eurostat_df.columns if col not in id_vars_melt]

                eurostat_df = eurostat_df.melt(
                    id_vars=id_vars_melt,
                    value_vars=value_vars_melt,
                    var_name='Year',
                    value_name='Value'
                )

                # Clean up Eurostat data
                eurostat_df = eurostat_df.drop(columns=['freq', 'purchase', 'unit', 'geo_code']) # Drop original codes/units
                eurostat_df['Indicator'] = 'House Price Index Annual Change (%)' # Assign indicator name

                # Convert Year and Value, drop NaNs
                eurostat_df = eurostat_df[pd.to_numeric(eurostat_df['Year'], errors='coerce').notna()] # Ensure Year is numeric string
                eurostat_df['Year'] = eurostat_df['Year'].astype(int)
                eurostat_df['Value'] = pd.to_numeric(eurostat_df['Value'], errors='coerce')
                eurostat_df.dropna(subset=['Value'], inplace=True)

            else:
                print("Warning: Eurostat filtering resulted in empty DataFrame. No HPI data to add.")
                eurostat_df = pd.DataFrame() # Ensure it's an empty DF if no data found


except Exception as e:
    print(f"An error occurred during Eurostat processing: {e}")
    eurostat_df = pd.DataFrame() # Create empty DF on error

combined_df = pd.concat([wb_df, eurostat_df], ignore_index=True)
print("Done!")

combined_df.sort_values(['Country', 'Year', 'Indicator'], inplace=True)

Retrieving World Bank data...
Retrieving Eurostat data...
Done!


In [25]:
# Review the results
combined_df.pivot_table(index='Country', columns='Indicator', values='Year', aggfunc='count')

Indicator,Employment Rate (%),GDP Growth (%),House Price Index Annual Change (%),Household Consumption (% of GDP),Household Debt-to-GDP (%),Unemployment Rate (%)
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Austria,11.0,11.0,13.0,11.0,11.0,11.0
Belgium,11.0,11.0,18.0,11.0,11.0,11.0
Bulgaria,11.0,11.0,18.0,11.0,11.0,11.0
Croatia,11.0,11.0,15.0,11.0,11.0,11.0
Cyprus,11.0,11.0,18.0,11.0,11.0,11.0
Czechia,11.0,11.0,,11.0,11.0,11.0
Denmark,11.0,11.0,18.0,11.0,11.0,11.0
Estonia,11.0,11.0,18.0,11.0,11.0,11.0
Finland,11.0,11.0,18.0,11.0,11.0,11.0
France,11.0,11.0,18.0,11.0,11.0,11.0


In [26]:
combined_df.head(10)

Unnamed: 0,Country,Year,Indicator,Value
1575,Austria,2011,House Price Index Annual Change (%),5.4
1600,Austria,2012,House Price Index Annual Change (%),6.6
1625,Austria,2013,House Price Index Annual Change (%),5.0
1650,Austria,2014,House Price Index Annual Change (%),3.8
1675,Austria,2015,House Price Index Annual Change (%),5.6
1700,Austria,2016,House Price Index Annual Change (%),6.7
1725,Austria,2017,House Price Index Annual Change (%),5.1
1750,Austria,2018,House Price Index Annual Change (%),6.0
1775,Austria,2019,House Price Index Annual Change (%),6.0
1800,Austria,2020,House Price Index Annual Change (%),7.6
