In [1]:
# Import modules
from logger import setup_custom_logger
import sys
import pandas as pd    
import cbsodata
import datetime

In [2]:
# Setup of logger
try:
    logger = setup_custom_logger("E004_Households_Consumption")
    logger.info('starting')
except:
    logger.exception('logger could not be loaded')
    raise

2021-03-26 12:07:04 INFO     E004_Households_Consumption starting


INFO:E004_Households_Consumption:starting


In [3]:
try:
    # Get current date information
    now = datetime.datetime.now()
    
    logger.info('datetime loaded')
    yearMin = now.year-6
    currentYear = now.year-1
    
    # Values you can load
    # now.year
    # now.month
    # now.day
    # now.hour
    # now.minute
except:
    logger.exception('datetime could not be loaded')
    logger.info('set yearmin to a default value')
    # Set default values for fallback
    yearMin = 2010
    yearMax = 2030
    raise

2021-03-26 12:07:04 INFO     E004_Households_Consumption datetime loaded


INFO:E004_Households_Consumption:datetime loaded


In [4]:
# Dataset 84106NED
dataset_id = '84106NED'

# Table definitions
# Consumptieve Huishoudens: Huishoudens_10 
# SoortMutaties: A045303 = Prijs_tov_zelfde_periode_vorig_jaar
# SoortMutaties: A045299 = Volume_tov_zelfde_periode_vorig_jaar
# SoortMutaties: A045300 = Volume_tov_voorgaande_periode
# SoortMutaties: A045301 = Waarde_tov_zelfe_periode_vorig_jaar
# SoortMutaties: A045302 = Waarde_tov_voorgaande_periode




In [5]:
try:
    logger.info(f'Retrieve data from dataset {dataset_id}')
    df = pd.DataFrame(cbsodata.get_data(
    f'{dataset_id}',
    filters=f"substring(Perioden,0,4) ge '{yearMin}' and SoortMutaties eq 'A045303' or SoortMutaties eq 'A045299'",
    select=["Perioden", "SoortMutaties",   "Huishoudens_10"]))
except:
    logger.exception('error loading data from CBS Statline')
    raise
    

2021-03-26 12:07:04 INFO     E004_Households_Consumption Retrieve data from dataset 84106NED


INFO:E004_Households_Consumption:Retrieve data from dataset 84106NED


In [6]:
# Remove  yearly data
try:
    df = df[df['Perioden'].map(len) > 4 ]
except:
    logger.exception('Perioden filter could not be applied')
    raise

In [7]:
# Date formatting and quarter format
try:
    df["Perioden"] = df["Perioden"].str.replace(" 1e kwartaal", "-03-01")
    df["Perioden"] = df["Perioden"].str.replace(" 2e kwartaal", "-06-01")
    df["Perioden"] = df["Perioden"].str.replace(" 3e kwartaal", "-09-01")
    df["Perioden"] = df["Perioden"].str.replace(" 4e kwartaal", "-12-01")
    df['Perioden'] = pd.to_datetime(df["Perioden"]).dt.date
except:
    logger.exception('Columns could not be changed to monthly numbers or formatted to different date')
    raise

In [8]:
df

Unnamed: 0,Perioden,SoortMutaties,Huishoudens_10
0,2015-03-01,"Volume, t.o.v. zelfde periode vorig jaar",1.9
1,2015-06-01,"Volume, t.o.v. zelfde periode vorig jaar",2.2
2,2015-09-01,"Volume, t.o.v. zelfde periode vorig jaar",2.3
3,2015-12-01,"Volume, t.o.v. zelfde periode vorig jaar",1.5
5,2016-03-01,"Volume, t.o.v. zelfde periode vorig jaar",0.9
6,2016-06-01,"Volume, t.o.v. zelfde periode vorig jaar",0.5
7,2016-09-01,"Volume, t.o.v. zelfde periode vorig jaar",1.2
8,2016-12-01,"Volume, t.o.v. zelfde periode vorig jaar",1.9
10,2017-03-01,"Volume, t.o.v. zelfde periode vorig jaar",1.9
11,2017-06-01,"Volume, t.o.v. zelfde periode vorig jaar",2.6


In [9]:
try:
    df.to_csv("output/E004_Households_Consumption.csv")
except:
    logger.exception('Exporting failed')
    raise