In [1]:
from datetime import date, datetime, timedelta
from pytz import timezone
import pytz
utc = pytz.utc
# utc.zone
brussels = timezone('Europe/Brussels')
# brussels.zone
today = datetime.now(brussels).date()
# date.today()
today - timedelta(days=10)


datetime.date(2021, 11, 4)

In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


In [3]:
from datetime import datetime, date
import pandas as pd
import numpy as np

In [4]:
from app.utils import get_db_engine

In [5]:
from app.etl.pipeline import Pipeline
from app.etl.pipeline import Transformer
from app.models import models
from app.models.metadata import ETL_Metadata

In [6]:
pl = {
  "source": "https://statbel.fgov.be/sites/default/files/files/opendata/REFNIS%20code/TU_COM_REFNIS.zip",
  "model": "NIS_Code",
  "tranforms": [
    {
      "type": "rename_columns",
      "data": {
        "columns": {
          "LVL_REFNIS": "level",
          "CD_REFNIS": "nis",
          "CD_SUP_REFNIS": "parent_nis",
          "TX_REFNIS_NL": "text_nl",
          "TX_REFNIS_FR": "text_fr",
          "TX_REFNIS_DE": "text_de",
          "DT_VLDT_START": "valid_from",
          "DT_VLDT_END": "valid_till",
        }
      }
    },
    {
        "type": "update_value",
        "data": {
            "column": "nis",
            "update": {
                "type": "string",
                "format": "{0:0>5}"
            },
        }
    },
    {
        "type": "update_value",
        "data": {
            "column": "parent_nis",
            "current_value": "-",
            "value_if_true": {
                "type": "string",
                "value": ""
            },
        }
    },
    {
        "type": "update_value",
        "data": {
            "column": "parent_nis",
            "update": {
                "type": "string",
                "format": "{0:0>5}"
            },
        }
    },
    {
        "type": "update_value",
        "data": {
            "column": "valid_from",
            "current_value": "01/01/1970",
            "value_if_true": {
                "type": "date",
                "value": "date.min"
            },
            "value_if_false": {
                "type": "date",
                "format": "%d/%m/%Y"
            }

        }        
    }
  ]
}

In [7]:
pipeline = Pipeline(
    data_class=getattr(models, pl["model"]),
    path=pl["source"],
    transformer=Transformer(pl["tranforms"])
)

In [8]:
data_frame = pipeline.extract()

In [9]:
data_frame

Unnamed: 0,LVL_REFNIS,CD_REFNIS,CD_SUP_REFNIS,TX_REFNIS_DE,TX_REFNIS_FR,TX_REFNIS_NL,DT_VLDT_START,DT_VLDT_END
0,1,2000,-,Flämische Region,Région flamande,Vlaams Gewest,01/01/1970,31/12/9999
1,1,3000,-,Wallonische Region,Région wallonne,Waals Gewest,01/01/1970,31/12/9999
2,1,4000,-,Region Brüssel-Hauptstadt,Région de Bruxelles-Capitale,Brussels Hoofdstedelijk Gewest,01/01/1970,31/12/9999
3,2,10000,02000,Provinz Antwerpen,Province d'Anvers,Provincie Antwerpen,01/01/1970,31/12/9999
4,2,20000,-,Provinz Brabant,Province de Brabant,Provincie Brabant,01/01/1970,31/12/1994
...,...,...,...,...,...,...,...,...
2764,4,93086,93000,Vodelée,Vodelée,Vodelée,01/01/1970,31/12/1976
2765,4,93087,93000,Vogenée,Vogenée,Vogenée,01/01/1970,31/12/1976
2766,4,93088,93000,Walcourt,Walcourt,Walcourt,01/01/1970,31/12/9999
2767,4,93089,93000,Yves-Gomezée,Yves-Gomezée,Yves-Gomezée,01/01/1970,31/12/1976


In [10]:
data_frame = pipeline.transform(data_frame)

In [11]:
data_frame

Unnamed: 0,level,nis,parent_nis,text_de,text_fr,text_nl,valid_from,valid_till
0,1,02000,00000,Flämische Region,Région flamande,Vlaams Gewest,0001-01-01,31/12/9999
1,1,03000,00000,Wallonische Region,Région wallonne,Waals Gewest,0001-01-01,31/12/9999
2,1,04000,00000,Region Brüssel-Hauptstadt,Région de Bruxelles-Capitale,Brussels Hoofdstedelijk Gewest,0001-01-01,31/12/9999
3,2,10000,02000,Provinz Antwerpen,Province d'Anvers,Provincie Antwerpen,0001-01-01,31/12/9999
4,2,20000,00000,Provinz Brabant,Province de Brabant,Provincie Brabant,0001-01-01,31/12/1994
...,...,...,...,...,...,...,...,...
2764,4,93086,93000,Vodelée,Vodelée,Vodelée,0001-01-01,31/12/1976
2765,4,93087,93000,Vogenée,Vogenée,Vogenée,0001-01-01,31/12/1976
2766,4,93088,93000,Walcourt,Walcourt,Walcourt,0001-01-01,31/12/9999
2767,4,93089,93000,Yves-Gomezée,Yves-Gomezée,Yves-Gomezée,0001-01-01,31/12/1976


In [12]:
data_frame = pipeline.handle_metadata(data_frame)

OperationalError: (sqlite3.OperationalError) no such table: etl_metadata
[SQL: SELECT etl_metadata."table" AS etl_metadata_table, etl_metadata.last_date_processed AS etl_metadata_last_date_processed 
FROM etl_metadata 
WHERE etl_metadata."table" = ?
 LIMIT ? OFFSET ?]
[parameters: ('dim_nis_codes', 1, 0)]
(Background on this error at: https://sqlalche.me/e/14/e3q8)

In [None]:
data_frame

In [None]:
data_frame.describe()

In [None]:
data_frame.info()

In [None]:
data_frame.isnull().sum()

In [None]:
data_frame.dropna(inplace=True)

In [None]:
data_frame.isnull().sum()

In [None]:
test = {
    "hello": "workd"
}
type(test)

In [13]:
starting_day_of_current_year = datetime.now().date().replace(month=1, day=1)    
ending_day_of_current_year = datetime.now().date().replace(month=12, day=31)
starting_day_of_current_year

datetime.date(2021, 1, 1)

In [16]:
frequency = "daily"
frequency is not "daily"

  frequency is not "daily"


False