In [12]:
import os, sys
sys.path.insert(1, os.path.abspath('..'))

from eustats import *
import seaborn as sns
import matplotlib.pyplot as plt

In [13]:
# Create dictionaries with EU regions

eu_regions = { 'AT': 'Western Europe',
'BE': 'Western Europe', 'FR': 'Western Europe', 'DE': 'Western Europe',
'IE': 'Western Europe', 'LU': 'Western Europe', 'NL': 'Western Europe',
'CY': 'Southern Europe', 'EL': 'Southern Europe', 'IT': 'Southern Europe',
'MT': 'Southern Europe', 'PT': 'Southern Europe', 'ES': 'Southern Europe',
'DK': 'Northern Europe', 'EE': 'Northern Europe', 'FI': 'Northern Europe',
'LV': 'Northern Europe', 'LT': 'Northern Europe', 'SE': 'Northern Europe',
'BG': 'Central and Eastern Europe', 'HR': 'Central and Eastern Europe',
'CZ': 'Central and Eastern Europe', 'RO': 'Central and Eastern Europe',
'SK': 'Central and Eastern Europe', 'SI': 'Central and Eastern Europe',
'PL': 'Central and Eastern Europe', 'HU': 'Central and Eastern Europe' }

regions = {}
for item in countries.values():
    regions.update(codes[item])
    


In [14]:
# Get Regional GDP
def get_gdp_region():
    params = {'unit': 'MIO_EUR', 'geo': list(regions.keys()), 'time': 2023}
    df = client.get_dataset('nama_10r_2gdp', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df['Country'] = df['geo'].str[:2]
    df['EU Region'] = df['Country'].apply(lambda x: eu_regions[x])
    df.set_index('region_name', inplace=True) 
    df.rename(columns = {'values': 'GDP'}, inplace = True)
    df['GDP'] = df['GDP'] / 1000
    cols = ['Country', 'EU Region', 'GDP',]
    
    return df[cols]


In [15]:
# Get regional GDP per capita
def get_gdp_capita_region():
    params = {'unit': 'EUR_HAB', 'time': 2023,
              'geo': list(regions.keys())}
    df = client.get_dataset('nama_10r_2gdp', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df.set_index('region_name', inplace=True) 
    df.rename(columns = {'values': 'GDP per Capita'}, inplace = True)
    df = df[['GDP per Capita']]
    
    return df


In [16]:
# Get regional Unemployment
def get_unemployment_region():
    params = {'sex': 'T', 'geo': list(regions.keys()), 'time': 2023,
              'age': 'Y15-74', 'isced11': 'TOTAL'} 
    df = client.get_dataset('lfst_r_lfu3rt', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df.rename(columns = {'values': 'Unemployment %'}, inplace = True)
    df.set_index('region_name', inplace=True) 
    df = df[['Unemployment %']]
    
    return df


In [17]:
# Get life expectancy
def get_life_expectancy():
    params = {'sex': 'T', 'geo': list(regions.keys()), 'time': 2023}
    df = client.get_dataset('tgs00101', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df.rename(columns = {'values': 'Life Expectancy'}, inplace = True)
    df.set_index('region_name', inplace=True) 
    df = df[['Life Expectancy']]
    
    return df

In [18]:
# Tertiary Educational attainment

def get_tertiary_education():
    params = {'sex': 'T', 'geo': list(regions.keys()), 'time': 2023}
    df = client.get_dataset('tgs00109', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df.rename(columns = {'values': 'Tertiary Educational Attainment %'}, inplace = True)
    df.set_index('region_name', inplace=True) 
    df = df[['Tertiary Educational Attainment %']]
    
    return df

In [19]:
# Get Population Density

def get_population_density():
    params = {'geo': list(regions.keys()), 'time': 2023}
    df = client.get_dataset('tgs00024', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df.rename(columns = {'values': 'Population Density'}, inplace = True)
    df.set_index('region_name', inplace=True) 
    df = df[['Population Density']]
    
    return df


In [20]:
# Get Poverty Risk

def get_poverty_risk():
    params = {'geo': list(regions.keys()), 'time': 2023}
    df = client.get_dataset('ilc_peps11n', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df.rename(columns = {'values': 'People at Risk of Poverty %'}, inplace = True)
    df.set_index('region_name', inplace=True) 
    df = df[['People at Risk of Poverty %']]
    
    return df


In [21]:
# Get regional availability of doctors

def get_doctors():
    params = {'geo': list(regions.keys()), 'time': 2020,'unit': 'P_HTHAB',
              'isco08': 'OC221' }
    df = client.get_dataset('hlth_rs_prsrg', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df.rename(columns = {'values': 'Doctors per 100000'}, inplace = True)
    df.set_index('region_name', inplace=True) 
    df = df[['Doctors per 100000']]
    
    return df


In [22]:
# Deaths in road accidents

def get_fatal_road_accidents():
    params = {'victim': 'KIL', 'geo': list(regions.keys()), 'time': 2022,
              'unit': 'P_MHAB'}
    df = client.get_dataset('tran_r_acci', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df.rename(columns = {'values': 'Fatal Road Accidents per Million'}, inplace = True)
    df.set_index('region_name', inplace=True) 
    df = df[['Fatal Road Accidents per Million']]
    
    return df


In [23]:
# Get Regular Internet Users

def get_regular_internet_users():
    params = {'indic_is': 'I_IDAY', 'geo': list(regions.keys()), 'time': 2023,
              'unit': 'PC_IND'}
    df = client.get_dataset('isoc_r_iuse_i', params).to_dataframe()
    df.dropna(inplace = True)
    df['region_name'] = df['geo'].apply(lambda x: regions[x])
    df.rename(columns = {'values': 'Regular Internet Users %'}, inplace = True)
    df.set_index('region_name', inplace=True) 
    df = df[['Regular Internet Users %']]
    
    return df


In [24]:
df = get_gdp_region()
df = df.join(get_gdp_capita_region())
df = df.join(get_unemployment_region())
df = df.join(get_life_expectancy())
# df = df.join(get_doctors())
df = df.join(get_fatal_road_accidents())
df = df.join(get_tertiary_education())
df = df.join(get_population_density())
df = df.join(get_poverty_risk())
df = df.join(get_regular_internet_users())
df.dropna(thresh = 4, inplace = True)


In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 237 entries, Région de Bruxelles-Capitale/ Brussels Hoofdstedelijk Gewest to Övre Norrland
Data columns (total 11 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Country                            237 non-null    object 
 1   EU Region                          237 non-null    object 
 2   GDP                                237 non-null    float64
 3   GDP per Capita                     237 non-null    float64
 4   Unemployment %                     232 non-null    float64
 5   Life Expectancy                    237 non-null    float64
 6   Fatal Road Accidents per Million   236 non-null    float64
 7   Tertiary Educational Attainment %  236 non-null    float64
 8   Population Density                 235 non-null    float64
 9   People at Risk of Poverty %        225 non-null    float64
 10  Regular Internet Users %           181 non-null    float64

In [26]:
df.head()

Unnamed: 0_level_0,Country,EU Region,GDP,GDP per Capita,Unemployment %,Life Expectancy,Fatal Road Accidents per Million,Tertiary Educational Attainment %,Population Density,People at Risk of Poverty %,Regular Internet Users %
region_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Région de Bruxelles-Capitale/ Brussels Hoofdstedelijk Gewest,BE,Western Europe,10.99274,82100.0,10.6,82.1,20.0,33.2,7770.2,37.6,91.1
Prov. Antwerpen,BE,Western Europe,73.20549,59400.0,3.6,83.5,34.0,36.0,686.3,14.9,90.81
Prov. Limburg (BE),BE,Western Europe,118.67971,40700.0,3.2,83.6,56.0,46.3,377.0,11.4,91.27
Prov. Oost-Vlaanderen,BE,Western Europe,26.63891,47400.0,2.9,82.9,39.0,33.8,528.3,11.5,91.56
Prov. Vlaams-Brabant,BE,Western Europe,61.22394,58300.0,4.2,84.0,33.0,33.1,564.3,10.8,94.14


In [27]:
df.to_csv('../data/eu_regional_data.csv',
          float_format = '%.2f', encoding = 'utf-8')