In [1]:
#Import Modules
from logger import setup_custom_logger
import pandas as pd
import requests
import cbsodata
import datetime

In [2]:
# Setup Logger
try:
    logger = setup_custom_logger("E008_Producer_Confidence")
    logger.info('starting')
except:
    logger.exception('logger could not be loaded')
    raise

2021-04-01 11:44:26 INFO     E008_Producer_Confidence starting


INFO:E008_Producer_Confidence:starting


In [3]:
try:
    # Get current date information
    now = datetime.datetime.now()
    
    logger.info('datetime loaded')
    yearMin = now.year-3
    yearMax = now.year
    
    # Values you can load
    # now.year
    # now.month
    # now.day
    # now.hour
    # now.minute
except:
    logger.exception('datetime could not be loaded')
    logger.info('set yearmin to a default value')
    # Set default values for fallback
    yearMin = 2010
    yearMax = 2030
    raise



2021-04-01 11:44:26 INFO     E008_Producer_Confidence datetime loaded


INFO:E008_Producer_Confidence:datetime loaded


In [4]:
# Dataset 80590ned
dataset_id = '81234ned'

# Table definitions

# Geslacht: T001038 = Totaal (man/vrouw)
# Leeftijd: 52052   = 15 tot 75 jaar
# Perioden: YYYY%%MM
# Seizoengecorrigeerd_2 = x1000_Beroepsbevolking_Seizoengecorrigeerd
# Seizoengecorrigeerd_4 = x1000_Werkzame_Beroepsbevolking_Seizoengecorrigeerd
# Seizoengecorrigeerd_6 = x1000_Werkloze_Beroepsbevolking_Seizoengecorrigeerd
# Seizoengecorrigeerd_8 = Werkloosheidspeercentage_Seizoengecorrigeerd_procenten
# Seizoengecorrigeerd_12 = Bruto_Arbeitsparticipatie_Seizoengecorrigeerd_procenten
# Seizoengecorrigeerd_14 = Netto_Arbeitsparticipatie_Seizoengecorrigeerd_procenten


In [5]:
try:
    logger.info(f'Retrieve data from dataset {dataset_id}')
    data = pd.DataFrame(cbsodata.get_data(
    f'{dataset_id}',
    filters=f"substring(Perioden,0,4) ge '{yearMin}'")) # These spaces need to be there, they are part of the value..
except:
    logger.exception('error loading data from CBS Statline')
    raise

2021-04-01 11:44:26 INFO     E008_Producer_Confidence Retrieve data from dataset 81234ned


INFO:E008_Producer_Confidence:Retrieve data from dataset 81234ned


In [6]:
data

Unnamed: 0,ID,BedrijfstakkenBranchesSBI2008,Perioden,Producentenvertrouwen_1,VerwachteBedrijvigheid_2,OordeelOrderpositie_3,OordeelVoorraden_4
0,396,C Industrie,2018 januari,10.3,13.3,16.3,1.4
1,397,C Industrie,2018 februari,10.9,16.0,15.6,1.2
2,398,C Industrie,2018 maart,9.5,12.6,14.6,1.3
3,399,C Industrie,2018 april,8.2,9.0,15.3,0.2
4,400,C Industrie,2018 mei,9.8,13.3,16.3,-0.3
...,...,...,...,...,...,...,...
346,3910,29-30 Transportmiddelenindustrie,2020 november,3.1,-1.6,4.4,6.4
347,3911,29-30 Transportmiddelenindustrie,2020 december,7.0,18.3,5.3,-2.5
348,3912,29-30 Transportmiddelenindustrie,2021 januari,2.5,6.6,-12.4,13.3
349,3913,29-30 Transportmiddelenindustrie,2021 februari,7.1,7.4,6.1,7.6


In [7]:
# Remove quaterly and yearly data
try:
    data = data[data["Perioden"].str.contains("kwartaal")==False]
    data = data[data['Perioden'].map(len) > 4 ]
except:
    logger.exception('Perioden filter could not be applied')
    raise

In [8]:
# Date formatting and quarter format
try:
    data['Perioden'] = data['Perioden'].str.replace(' januari', '-01')
    data['Perioden'] = data['Perioden'].str.replace(' februari', '-02')
    data['Perioden'] = data['Perioden'].str.replace(' maart', '-03')
    data['Perioden'] = data['Perioden'].str.replace(' april', '-04')
    data['Perioden'] = data['Perioden'].str.replace(' mei', '-05')
    data['Perioden'] = data['Perioden'].str.replace(' juni', '-06')
    data['Perioden'] = data['Perioden'].str.replace(' juli', '-07')
    data['Perioden'] = data['Perioden'].str.replace(' augustus', '-08')
    data['Perioden'] = data['Perioden'].str.replace(' september', '-09')
    data['Perioden'] = data['Perioden'].str.replace(' oktober', '-10')
    data['Perioden'] = data['Perioden'].str.replace(' november', '-11')
    data['Perioden'] = data['Perioden'].str.replace(' december', '-12')
    data['Perioden'] = pd.to_datetime(data["Perioden"]).dt.date
except:
    logger.exception('Columns could not be changed to monthly numbers or formatted to different date')
    raise
data

Unnamed: 0,ID,BedrijfstakkenBranchesSBI2008,Perioden,Producentenvertrouwen_1,VerwachteBedrijvigheid_2,OordeelOrderpositie_3,OordeelVoorraden_4
0,396,C Industrie,2018-01-01,10.3,13.3,16.3,1.4
1,397,C Industrie,2018-02-01,10.9,16.0,15.6,1.2
2,398,C Industrie,2018-03-01,9.5,12.6,14.6,1.3
3,399,C Industrie,2018-04-01,8.2,9.0,15.3,0.2
4,400,C Industrie,2018-05-01,9.8,13.3,16.3,-0.3
...,...,...,...,...,...,...,...
346,3910,29-30 Transportmiddelenindustrie,2020-11-01,3.1,-1.6,4.4,6.4
347,3911,29-30 Transportmiddelenindustrie,2020-12-01,7.0,18.3,5.3,-2.5
348,3912,29-30 Transportmiddelenindustrie,2021-01-01,2.5,6.6,-12.4,13.3
349,3913,29-30 Transportmiddelenindustrie,2021-02-01,7.1,7.4,6.1,7.6


In [9]:
# Export dataFrame to Excel file
try:
    data.to_csv("output/E008_Producer_Confidence.csv")  
except:
    logger.exception('dataFrame could not be exported to output folder')