In [1]:
#Import Modules
from logger import setup_custom_logger
import pandas as pd
import requests
import cbsodata
import datetime

In [2]:
# Setup Logger
try:
    logger = setup_custom_logger("E007_Consumer_Confidence")
    logger.info('starting')
except:
    logger.exception('logger could not be loaded')
    raise

2021-04-21 11:37:38 INFO     E007_Consumer_Confidence starting


INFO:E007_Consumer_Confidence:starting


In [3]:
try:
    # Get current date information
    now = datetime.datetime.now()
    
    logger.info('datetime loaded')
    yearMin = now.year-3
    yearMax = now.year
    
    # Values you can load
    # now.year
    # now.month
    # now.day
    # now.hour
    # now.minute
except:
    logger.exception('datetime could not be loaded')
    logger.info('set yearmin to a default value')
    # Set default values for fallback
    yearMin = 2010
    yearMax = 2030
    raise

2021-04-21 11:37:38 INFO     E007_Consumer_Confidence datetime loaded


INFO:E007_Consumer_Confidence:datetime loaded


In [4]:
# Dataset 83693ned
dataset_id = '83693ned'
# Table definitions

# Geslacht: T001038 = Totaal (man/vrouw)
# Leeftijd: 52052   = 15 tot 75 jaar
# Perioden: YYYY%%MM
# Seizoengecorrigeerd_2 = x1000_Beroepsbevolking_Seizoengecorrigeerd
# Seizoengecorrigeerd_4 = x1000_Werkzame_Beroepsbevolking_Seizoengecorrigeerd
# Seizoengecorrigeerd_6 = x1000_Werkloze_Beroepsbevolking_Seizoengecorrigeerd
# Seizoengecorrigeerd_8 = Werkloosheidspeercentage_Seizoengecorrigeerd_procenten
# Seizoengecorrigeerd_12 = Bruto_Arbeitsparticipatie_Seizoengecorrigeerd_procenten
# Seizoengecorrigeerd_14 = Netto_Arbeitsparticipatie_Seizoengecorrigeerd_procenten

In [5]:
try:
    logger.info(f'Retrieve data from dataset {dataset_id}')
    data = pd.DataFrame(cbsodata.get_data(
    f'{dataset_id}',
    filters=f"substring(Perioden,0,4) ge '{yearMin}'")) # These spaces need to be there, they are part of the value..
except:
    logger.exception('error loading data from CBS Statline')
    raise

2021-04-21 11:37:38 INFO     E007_Consumer_Confidence Retrieve data from dataset 83693ned


INFO:E007_Consumer_Confidence:Retrieve data from dataset 83693ned


In [None]:
# Remove quaterly and yearly data
try:
    data = data[data["Perioden"].str.contains("kwartaal")==False]
    data = data[data['Perioden'].map(len) > 4 ]
except:
    logger.exception('Perioden filter could not be applied')
    raise

In [None]:
# Date formatting and quarter format
try:
    data['Perioden'] = data['Perioden'].str.replace(' januari', '-01')
    data['Perioden'] = data['Perioden'].str.replace(' februari', '-02')
    data['Perioden'] = data['Perioden'].str.replace(' maart', '-03')
    data['Perioden'] = data['Perioden'].str.replace(' april', '-04')
    data['Perioden'] = data['Perioden'].str.replace(' mei', '-05')
    data['Perioden'] = data['Perioden'].str.replace(' juni', '-06')
    data['Perioden'] = data['Perioden'].str.replace(' juli', '-07')
    data['Perioden'] = data['Perioden'].str.replace(' augustus', '-08')
    data['Perioden'] = data['Perioden'].str.replace(' september', '-09')
    data['Perioden'] = data['Perioden'].str.replace(' oktober', '-10')
    data['Perioden'] = data['Perioden'].str.replace(' november', '-11')
    data['Perioden'] = data['Perioden'].str.replace(' december', '-12')
    data['Perioden'] = pd.to_datetime(data["Perioden"]).dt.date
except:
    logger.exception('Columns could not be changed to monthly numbers or formatted to different date')
    raise

In [8]:
# Export dataFrame to Excel file
try:
    data.to_csv("output/E007_Consumer_Confidence.csv")  
except:
    logger.exception('dataFrame could not be exported to output folder')