In [1]:
from datetime import date, datetime, timedelta
from pytz import timezone
import pytz
utc = pytz.utc
# utc.zone
brussels = timezone('Europe/Brussels')
# brussels.zone
today = datetime.now(brussels).date()
# date.today()


In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


In [3]:
from datetime import datetime, date
import pandas as pd
import numpy as np

In [4]:
from app.utils import get_db_engine

In [5]:
from app.etl.pipeline import Pipeline
from app.etl.pipeline import Transformer
from app.models import models
from app.models.metadata import ETL_Metadata

In [9]:
pl = {
  "source": "https://www.laatjevaccineren.be/vaccination-info/get/vaccinaties.csv",
  "model": "VaccinationsByNISCodeDailyUpdated",
  "metadata_handler": {
    "frequency": "daily",
    "full_refresh": True
  },
  "tranforms": [
    {
      "type": "drop_columns",
      "data": {
        "columns": [
          "MUNICIPALITY",
          "PROVINCE",
          "REGION",
          "EERSTELIJNSZONE"
        ]
      }
    },
    {
      "type": "rename_columns",
      "data": {
        "columns": {
          "NIS_CD": "nis_code",
          "GENDER_CD": "sex",
          "AGE_CD": "agegroup",
          "ADULT_FL(18+)": "plus18",
          "SENIOR_FL(65+)": "plus65",
          "FULLY_VACCINATED_AMT": "vaccinated_fully_total",
          "PARTLY_VACCINATED_AMT": "vaccinated_partly_total",
          "FULLY_VACCINATED_AZ_AMT": "vaccinated_fully_astrazeneca",
          "PARTLY_VACCINATED_AZ_AMT": "vaccinated_partly_astrazeneca",
          "FULLY_VACCINATED_PF_AMT": "vaccinated_fully_pfizer",
          "PARTLY_VACCINATED_PF_AMT": "vaccinated_partly_pfizer",
          "FULLY_VACCINATED_MO_AMT": "vaccinated_fully_moderna",
          "PARTLY_VACCINATED_MO_AMT": "vaccinated_partly_moderna",
          "FULLY_VACCINATED_JJ_AMT": "vaccinated_fully_johnsonandjohnson",
          "FULLY_VACCINATED_OTHER_AMT": "vaccinated_fully_other",
          "PARTLY_VACCINATED_OTHER_AMT": "vaccinated_partly_other",
          "POPULATION_NBR": "population_by_agecategory_and_municipality"
        }
      }
    },
    {
      "type": "update_value",
      "data": {
        "column": "plus18",
        "current_value": 1,
        "value_if_true": {
          "type": "boolean",
          "value": True
        },
        "value_if_false": {
          "type": "boolean",
          "value": False
        }
      }
    },
    {
      "type": "update_value",
      "data": {
        "column": "plus65",
        "current_value": 1,
        "value_if_true": {
          "type": "boolean",
          "value": True
        },
        "value_if_false": {
          "type": "boolean",
          "value": False
        }
      }
    }
  ]
}

In [10]:
pipeline = Pipeline(
    data_class=getattr(models, pl["model"]),
    path=pl["source"],
    transformer=Transformer(pl["tranforms"])
)

In [11]:
data_frame = pipeline.extract()

In [12]:
data_frame

Unnamed: 0,NIS_CD,GENDER_CD,AGE_CD,ADULT_FL(18+),SENIOR_FL(65+),MUNICIPALITY,PROVINCE,REGION,EERSTELIJNSZONE,FULLY_VACCINATED_AMT,...,FULLY_VACCINATED_AZ_AMT,PARTLY_VACCINATED_AZ_AMT,FULLY_VACCINATED_PF_AMT,PARTLY_VACCINATED_PF_AMT,FULLY_VACCINATED_MO_AMT,PARTLY_VACCINATED_MO_AMT,FULLY_VACCINATED_JJ_AMT,FULLY_VACCINATED_OTHER_AMT,PARTLY_VACCINATED_OTHER_AMT,POPULATION_NBR
0,32011,V,60-69,1,1,Kortemark,WEST-VLAANDEREN,VLAAMS GEWEST,Houtland en Polder,341,...,50,2,287,0,0,0,4,0,0,351
1,23016,V,40-49,1,0,Dilbeek,VLAAMS-BRABANT,VLAAMS GEWEST,Pajottenland,2371,...,294,3,1603,36,23,0,451,0,0,2829
2,37015,M,30-39,1,0,Tielt,WEST-VLAANDEREN,VLAAMS GEWEST,RITS,1097,...,23,0,1046,8,7,0,20,1,0,1278
3,44085,M,12-17,0,0,Lovendegem,OOST-VLAANDEREN,VLAAMS GEWEST,West-Meetjesland,796,...,0,0,789,8,2,0,5,0,0,865
4,11039,V,50-59,1,0,Schilde,ANTWERPEN,VLAAMS GEWEST,Voorkempen,1484,...,338,4,962,5,34,0,150,0,0,1588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6593,12014,V,12-17,0,0,Heist-Op-Den-Berg,ANTWERPEN,VLAAMS GEWEST,Bonstato,1189,...,0,0,1173,5,0,0,16,0,0,1290
6594,13021,M,0-11,0,0,Meerhout,ANTWERPEN,VLAAMS GEWEST,Zuiderkempen,31,...,0,0,31,0,0,0,0,0,0,636
6595,45065,M,90-100+,1,1,Rozebeke,OOST-VLAANDEREN,VLAAMS GEWEST,Vlaamse Ardennen,21,...,18,0,3,0,0,0,0,0,0,24
6596,73028,V,60-69,1,0,Herstappe,LIMBURG,VLAAMS GEWEST,ZOLim,5,...,0,0,5,0,0,0,0,0,0,5


In [13]:
data_frame = pipeline.transform(data_frame)

In [14]:
data_frame

Unnamed: 0,nis_code,sex,agegroup,plus18,plus65,vaccinated_fully_total,vaccinated_partly_total,vaccinated_fully_astrazeneca,vaccinated_partly_astrazeneca,vaccinated_fully_pfizer,vaccinated_partly_pfizer,vaccinated_fully_moderna,vaccinated_partly_moderna,vaccinated_fully_johnsonandjohnson,vaccinated_fully_other,vaccinated_partly_other,population_by_agecategory_and_municipality
0,32011,V,60-69,True,True,341,2,50,2,287,0,0,0,4,0,0,351
1,23016,V,40-49,True,False,2371,39,294,3,1603,36,23,0,451,0,0,2829
2,37015,M,30-39,True,False,1097,8,23,0,1046,8,7,0,20,1,0,1278
3,44085,M,12-17,False,False,796,8,0,0,789,8,2,0,5,0,0,865
4,11039,V,50-59,True,False,1484,9,338,4,962,5,34,0,150,0,0,1588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6593,12014,V,12-17,False,False,1189,5,0,0,1173,5,0,0,16,0,0,1290
6594,13021,M,0-11,False,False,31,0,0,0,31,0,0,0,0,0,0,636
6595,45065,M,90-100+,True,True,21,0,18,0,3,0,0,0,0,0,0,24
6596,73028,V,60-69,True,False,5,0,0,0,5,0,0,0,0,0,0,5


In [None]:
list = [
    pipeline.data_class(**kwargs) for kwargs in data_frame.to_dict(orient="records")
]

In [None]:
list

In [None]:
data_frame = pipeline.handle_metadata(data_frame)

In [None]:
data_frame

In [None]:
data_frame = data_frame.groupby([
    'year', 'nis', 'sex',
    'nationality_code', 'nationality_text_nl', 'nationality_text_fr',
    'marital_status_code',
    'marital_status_text_nl',
    'marital_status_text_fr',
    'age'
]).sum().reset_index()
data_frame

In [None]:
data_frame.groupby(['year', 'nis', 'sex', 'nationality_code', 'marital_status_code', 'age'])['population'].transform('size')

In [None]:

data_frame[data_frame.groupby(['year', 'nis', 'sex', 'nationality_code', 'marital_status_code', 'age'])['nis'].transform('size') > 1]


In [34]:
data_frame.describe()

Unnamed: 0,nis_district,sex,number_of_deaths
count,769390.0,769390.0,769390.0
mean,45619.111244,1.476951,1.822042
std,22528.129442,0.499469,1.389211
min,11000.0,1.0,1.0
25%,25000.0,1.0,1.0
50%,44000.0,1.0,1.0
75%,62000.0,2.0,2.0
max,93000.0,2.0,33.0


In [11]:
data_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6598 entries, 0 to 6597
Data columns (total 21 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   NIS_CD                       6598 non-null   int64 
 1   GENDER_CD                    6598 non-null   object
 2   AGE_CD                       6598 non-null   object
 3   ADULT_FL(18+)                6598 non-null   int64 
 4   SENIOR_FL(65+)               6598 non-null   int64 
 5   MUNICIPALITY                 6598 non-null   object
 6   PROVINCE                     6598 non-null   object
 7   REGION                       6598 non-null   object
 8   EERSTELIJNSZONE              6576 non-null   object
 9   FULLY_VACCINATED_AMT         6598 non-null   int64 
 10  PARTLY_VACCINATED_AMT        6598 non-null   int64 
 11  FULLY_VACCINATED_AZ_AMT      6598 non-null   int64 
 12  PARTLY_VACCINATED_AZ_AMT     6598 non-null   int64 
 13  FULLY_VACCINATED_PF_AMT      6598

In [12]:
data_frame.isnull().sum()

NIS_CD                          0
GENDER_CD                       0
AGE_CD                          0
ADULT_FL(18+)                   0
SENIOR_FL(65+)                  0
MUNICIPALITY                    0
PROVINCE                        0
REGION                          0
EERSTELIJNSZONE                22
FULLY_VACCINATED_AMT            0
PARTLY_VACCINATED_AMT           0
FULLY_VACCINATED_AZ_AMT         0
PARTLY_VACCINATED_AZ_AMT        0
FULLY_VACCINATED_PF_AMT         0
PARTLY_VACCINATED_PF_AMT        0
FULLY_VACCINATED_MO_AMT         0
PARTLY_VACCINATED_MO_AMT        0
FULLY_VACCINATED_JJ_AMT         0
FULLY_VACCINATED_OTHER_AMT      0
PARTLY_VACCINATED_OTHER_AMT     0
POPULATION_NBR                  0
dtype: int64

In [None]:
data_frame = data_frame.groupby([
    'year', 'week', 'nis_district',
    'sex', 'agegroup', 'date'
]).sum().reset_index()
data_frame

In [None]:
data_frame.dropna(inplace=True)

In [None]:
data_frame.isnull().sum()

In [None]:
test = 2021
type(test)

In [None]:
starting_day_of_current_year = datetime.now().date().replace(month=1, day=1)    
ending_day_of_current_year = datetime.now().date().replace(month=12, day=31)
starting_day_of_current_year

In [None]:
frequency = "daily"
frequency is not "daily"