In [1]:
# Python packages
import sys
sys.path.append('../')
from datetime import datetime
import numpy as np
import pandas as pd
import pickle
# Added (new in developing predict)
from itertools import product


from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold, GridSearchCV, cross_validate, KFold, cross_val_score
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, Lasso, LassoCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVR
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor

from xgboost import XGBRegressor, XGBClassifier

# Custom functions
import src.settings as settings
import src.mapper_cols as mapper_cols
from src.run_all.main_get_data import get_data, get_data_predict
from src.run_all.main_preprocess import preprocess_data, preprocess_data_predict
from src.utilities.utilities import get_latest_file, list_filenames

# instellingen voor panda weergave aanpassen
pd.set_option('display.max_rows', 500) # alle rijen tonen
pd.set_option('display.max_columns', 500) # alle kolommen tonen
pd.set_option('display.width', 1000) # kolombreedte
pd.set_option("display.precision", 2)     # precisie van de kolommen aanpassen
pd.set_option('display.float_format', lambda x: '{:.15f}'.format(x)) # floats output tot 15 decimalen

# Load dataframe to extend features for future

In [2]:
datapath = '../data/'
filename = 'df_get_data_WMO_WIJK_HUISHOUDENS_BEVOLKING_HEFFING_202104042111.parquet.gzip'
df_get_data_WMO = pd.read_parquet(datapath + filename)

In [3]:
# df_get_data = df_get_data_WMO.reset_index().copy()
df_get_data = df_get_data_WMO.copy()
df_get_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,perioden,typemaatwerkarrangement,wmoclienten,wmoclientenper1000inwoners,aantalinkomensontvangers,aantalinwoners,actieven1575jaar,afstandtotgrotesupermarkt,afstandtothuisartsenpraktijk,afstandtotkinderdagverblijf,afstandtotschool,alandbouwbosbouwenvisserij,bedrijfsmotorvoertuigen,bedrijfsvestigingentotaal,bevolkingsdichtheid,bfnijverheidenenergie,bouwjaarvanaf2000,bouwjaarvoor2000,dekkingspercentage,eenpersoonshuishoudens,eigendomonbekend,geboorterelatief,geboortetotaal,gehuwd,gemeentenaam,gemelectriciteitsverbruikappartement,gemelectriciteitsverbruikeigenwoning,gemelectriciteitsverbruikhoekwoning,gemelectriciteitsverbruikhuurwoning,gemelectriciteitsverbruiktussenwoning,gemelectriciteitsverbruiktweeondereenkapwoning,gemelectriciteitsverbruikvrijstaandewoning,gemgasverbruikappartement,gemgasverbruikeigenwoning,gemgasverbruikhoekwoning,gemgasverbruikhuurwoning,gemgasverbruiktussenwoning,gemgasverbruiktweeondereenkapwoning,gemgasverbruikvrijstaandewoning,gemgestandaardiseerdinkomenvanhuish,gemiddeldaardgasverbruiktotaal,gemiddeldehuishoudensgrootte,gemiddeldelektriciteitsverbruiktotaal,gemiddeldewoningwaarde,gemiddeldinkomenperinkomensontvanger,gemiddeldinkomenperinwoner,gescheiden,geweldsenseksuelemisdrijven,gihandelenhoreca,hjvervoerinformatieencommunicatie,huishonderofrondsociaalminimum,huishoudensmeteenlaaginkomen,huishoudensmetkinderen,huishoudenstot110vansociaalminimum,huishoudenstot120vansociaalminimum,huishoudenstotaal,huishoudenszonderkinderen,huurwoningentotaal,inbezitoverigeverhuurders,inbezitwoningcorporatie,jongerenmetjeugdzorginnatura,k0tot15jaar,k15tot25jaar,k20huishoudensmethoogsteinkomen,k20personenmethoogsteinkomen,k25tot45jaar,k40huishoudensmetlaagsteinkomen,k40personenmetlaagsteinkomen,k45tot65jaar,k65jaarofouder,klfinancieledienstenonroerendgoed,koopwoningen,mannen,marokko,matevanstedelijkheid,mediaanvermogenvanparticulierehuish,meestvoorkomendepostcode,mnzakelijkedienstverlening,motorfietsen,nederlandseantillenenaruba,nettoarbeidsparticipatie,nietwesterstotaal,omgevingsadressendichtheid,ongehuwd,opleidingsniveauhoog,opleidingsniveaulaag,opleidingsniveaumiddelbaar,oppervlakteland,oppervlaktetotaal,oppervlaktewater,overignietwesters,percentagebewoond,percentageeengezinswoning,percentagejongerenmetjeugdzorg,percentagemeergezinswoning,percentageonbewoond,percentagewerknemers,percentagewoningenmetstadsverwarming,percentagezelfstandigen,personenautos6jaarenouder,personenautosbrandstofbenzine,personenautosjongerdan6jaar,personenautosnaaroppervlakte,personenautosoverigebrandstof,personenautosperhuishouden,personenautostotaal,personenpersoortuitkeringao,personenpersoortuitkeringaow,personenpersoortuitkeringbijstand,personenpersoortuitkeringww,rucultuurrecreatieoverigediensten,scholenbinnen3km,sterfterelatief,sterftetotaal,suriname,totaaldiefstaluitwoningschuured,turkije,vernielingmisdrijftegenopenbareorde,verweduwd,vrouwen,westerstotaal,woningvoorraad,alleenstaande_mannen,alleenstaande_totaal_mannen_en_vrouwen,alleenstaande_vrouwen,ouder_in_eenouderhuishouden_mannen,ouder_in_eenouderhuishouden_totaal_mannen_en_vrouwen,ouder_in_eenouderhuishouden_vrouwen,overig_lid_huishouden_mannen,overig_lid_huishouden_totaal_mannen_en_vrouwen,overig_lid_huishouden_vrouwen,"partner_in_paar,_geen_kind(eren)_mannen","partner_in_paar,_geen_kind(eren)_totaal_mannen_en_vrouwen","partner_in_paar,_geen_kind(eren)_vrouwen",partner_in_paar_met_kind(eren)_mannen,partner_in_paar_met_kind(eren)_totaal_mannen_en_vrouwen,partner_in_paar_met_kind(eren)_vrouwen,persoon_in_institutioneel_huishouden_mannen,persoon_in_institutioneel_huishouden_totaal_mannen_en_vrouwen,persoon_in_institutioneel_huishouden_vrouwen,persoon_in_particulier_huishouden_mannen,persoon_in_particulier_huishouden_totaal_mannen_en_vrouwen,persoon_in_particulier_huishouden_vrouwen,thuiswonend_kind_mannen,thuiswonend_kind_totaal_mannen_en_vrouwen,thuiswonend_kind_vrouwen,totaal_personen_mannen,totaal_personen_totaal_mannen_en_vrouwen,totaal_personen_vrouwen,popaantalbasisonderwijsscholenbinnen3km,popaantalbioscopenbinnen10km,popaantalgrotesupermarktenbinnen3km,popaantalhuisartsenpraktijkenbinnen3km,popaantalkinderdagverblijvenbinnen3km,popaantalrestaurantsbinnen3km,popaantalscholenhavovwobinnen5km,popaantalscholenvmbobinnen5km,popaantalziekenhuizenbinnen20km,popafstandtotbibliotheek,popafstandtotbioscoop,popafstandtotgrotesupermarkt,popafstandtothuisartsenpost,popafstandtothuisartsenpraktijk,popafstandtotkinderdagverblijf,popafstandtotopenbaargroen,popafstandtotoprithoofdverkeersweg,popafstandtotrestaurant,popafstandtotschoolbasisonderwijs,popafstandtotschoolhavovwo,popafstandtotschoolvmbo,popafstandtotsportterrein,popafstandtottreinstation,popafstandtotziekenhuis,popafstandtotzwembad,popagrarischterreinopp,popagrarischterreinperc,popagrarischterreinperinwoner,popakkerbouw,popalandbouwbosbouwenvisserij,popalandbouwbosbouwenvisserijbanen,popalandbouwbosbouwenvisserijbanenrelatief,popaow,poparbeidsongeschiktheidtotaal,popbasisonderwijs,popbebouwdterreinopp,popbebouwdterreinperc,popbebouwdterreinperinwoner,popbedrijfsmotorvoertuigen,popbedrijfsvestigingentotaal,popberoepsbegeleidendeleerweg,popberoepsopleidendeleerweg,popbevolkingsdichtheid,popbevolkingsgroei,popbevolkingsgroeirelatief,popbfnijverheidenenergie,popbfnijverheidenenergiebanen,popbfnijverheidenenergiebanenrelatief,popbijstandgerelateerdtotaowleeftijd,popbijstandgerelateerdvanafaowleeftijd,popbijstandtotdeaowleeftijd,popbinnenlandsmigratiesaldo,popbinnenlandsmigratiesaldorelatief,popbinnenwater,popblijvendgrasland,popbosenopennatuurlijkterreinopp,popbosenopennatuurlijkterreinperc,popbosenopennatuurlijkterreinperinwoner,popbroninkomenalswerknemergembestinkomen,popbroninkomenalswerknemergemgestandaardinkomen,popbroninkomenalswerknemermediaaninkomen,popbroninkomenalszelfstandigegembestinkomen,popbroninkomenalszelfstandigegemgestandaardinkomen,popbroninkomenalszelfstandigemediaaninkomen,popbronoverdrachtsinkomengembestinkomen,popbronoverdrachtsinkomengemgestandaardinkomen,popbronoverdrachtsinkomenmediaaninkomen,popbuitenwater,popbuurten,popcodea,popcodeb,popcodec,popcoded,popcodee,popcodef,popcodeg,popcodeh,popcodei,popcodej,popcodek,popcodel,popcodem,popcoden,popcodeo,popcodep,popcodeq,popcoder,popdunnemest,popedelpelsdieren,popeenpersoonshuishoudens,popeenpersoonshuishoudensrelatief,popeigendomonbekend,popemigratie,popfosfaatuitscheiding,popgeboorte,popgeboorteoverschot,popgeboorteoverschotrelatief,popgeboorterelatief,popgehuwdouderdan,popgehuwdtot,popgeiten,popgemeentelijkeenwaterschapswegen,popgemeenten,popgemiddeldaantalinwoners,popgemiddeldehuishoudensgrootte,popgemiddeldewoningwaarde,popgescheidenouderdan,popgescheidentot,popgftafval,popgihandelenhoreca,popgncommercieledienstverleningbanen,popgncommercieledienstverleningbanenrelatief,popgrijzedruk,popgroenedruk,popgroenvoedergewassen,popgrofhuishoudelijkrestafval,pophjvervoerinformatieencommunicatie,pophogerberoepsonderwijs,pophogerberoepsonderwijsbachelor,pophuishoudelijkrestafval,pophuishoudensmetkinderen,pophuishoudensmetkinderenrelatief,pophuishoudenszonderkinderen,pophuishoudenszonderkinderenrelatief,pophuurwoningen,popid,popimmigratie,popinwoners15jaarofouder,popinwonersop31december,popjongerdan5jaarleeftijdsgroep,popjongerdan5jaarrelatieveleeftijdsgroep,popk10tot15jaarleeftijdsgroep,popk10tot15jaarrelatieveleeftijdsgroep,popk15tot20jaarleeftijdsgroep,popk15tot20jaarrelatieveleeftijdsgroep,popk20tot25jaarleeftijdsgroep,popk20tot25jaarrelatieveleeftijdsgroep,popk25tot45jaarleeftijdsgroep,popk25tot45jaarrelatieveleeftijdsgroep,popk45tot65jaarleeftijdsgroep,popk45tot65jaarrelatieveleeftijdsgroep,popk5tot10jaarleeftijdsgroep,popk5tot10jaarrelatieveleeftijdsgroep,popk65tot80jaarleeftijdsgroep,popk65tot80jaarrelatieveleeftijdsgroep,popk80jaarofouderleeftijdsgroep,popk80jaarofouderrelatieveleeftijdsgroep,popkaliuitscheiding,popkalkoenen,popkippen,popkleinchemischafval,popklfinancieledienstenonroerendgoed,popkonijnen,popkoopwoningen,popkoppelvariabeleregiocode,popland,popmannen,popmarokko,popmarokkorelatief,popmatigstedelijk,popmiddelbaarberoepsonderwijs,popmigratiesaldo,popmigratiesaldorelatief,popmnzakelijkedienstverlening,popmotorfietsen,popmotorfietsenrelatief,popnaama,popnaamb,popnaamc,popnaamd,popnaame,popnaamf,popnaamg,popnaamh,popnaami,popnaamj,popnaamk,popnaaml,popnaamm,popnaamn,popnaamo,popnaamp,popnaamq,popnaamr,popnatuurlijkgrasland,popnederlandseachtergrond,popnederlandseachtergrondrelatief,popnietstedelijk,popnietwoningen,popnieuwbouwwoningen,popnieuwvormingen,popomgevingsadressendichtheid,popongehuwdouderdan,popongehuwdtot,popoudpapierenkarton,popounietcommercieledienstverleningbanen,popounietcommercieledienstverleningbanenrelatief,popoverigedoodsoorzaken,popoverighuishoudelijkafval,popoverignietwestersemigratieachtergrond,popoverignietwestersemigratieachtergrondrelatief,popoverigpluimvee,poppaardenenponys,popparticulierehuishoudensexclstudenten,popparticulierehuishoudensexclstudentengembestinkomen,popparticulierehuishoudensexclstudentengemgestandaardinkomen,popparticulierehuishoudensexclstudentenmediaaninkomen,poppercvoertuigenmetbromfietskenteken,poppersonenautos,poppersonenautosparticulieren,poppersonenautosparticulierenrelatief,poppersonenautosrelatief,popprovincialewegen,poprecreatieterreinopp,poprecreatieterreinperc,poprecreatieterreinperinwoner,poprijkswegen,poprucultuurrecreatieoverigediensten,poprundvee,popsaldovermeerderingwoningenrelatief,popschapen,popsemibebouwdterreinopp,popsemibebouwdterreinperc,popsemibebouwdterreinperinwoner,popslachteenden,popspeciaalbasisonderwijs,popspecialescholen,popsterfte,popsterfterelatief,popsterkstedelijk,popstikstofuitscheiding,popsuriname,popsurinamerelatief,poptextiel,poptijdelijkgrasland,poptotaalaantalbanen,poptotaalaantalparticulierehuishoudens,poptotaalhuishoudelijkafval,poptotaalmetmigratieachtergrond,poptotaalmetmigratieachtergrondrelatief,poptotaalnietwestersemigratieachtergrond,poptotaalnietwestersemigratieachtergrondrelatief,poptotalebevolking,poptotaledruk,poptotaleoppervlakte,poptotaleoppervlaktecultuurgrond,poptotaleweglengte,poptotdeaowleeftijd,poptuinbouwonderglas,poptuinbouwopengrond,popturkije,popturkijerelatief,poptypeeenoudergezingembestinkomen,poptypeeenoudergezingemgestandaardinkomen,poptypeeenoudergezinmediaaninkomen,poptypeeenpersoonshuishoudengembestinkomen,poptypeeenpersoonshuishoudengemgestandaardinkomen,poptypeeenpersoonshuishoudenmediaaninkomen,poptypepaarmetkinderengembestinkomen,poptypepaarmetkinderengemgestandaardinkomen,poptypepaarmetkinderenmediaaninkomen,poptypepaarzonderkindgembestinkomen,poptypepaarzonderkindgemgestandaardinkomen,poptypepaarzonderkindmediaaninkomen,popuitkeringsontvangerstotaalmediaaninkomen,popuitwendigedoodsoorzaken,popvanafdeaowleeftijd,popvarkens,popvastemest,popverhuismobiliteitrelatief,popverkeersterreinopp,popverkeersterreinperc,popverkeersterreinperinwoner,popverpakkingsglas,popvertreknaaranderegemeente,popverweduwdouderdan,popverweduwdtot,popvestiginguitanderegemeente,popvoertuigenmetbromfietskenteken,popvoormaligenederlandseantillenaruba,popvoormaligenederlandseantillenarubarelatief,popvoorraadop1januari,popvoortgezetonderwijs,popvoortgezetonderwijsdiploma,popvrouwen,popwajonguitkering,popwaouitkering,popwatertotaal,popweinigstedelijk,popwerkloosheid,popwestersemigratieachtergrond,popwestersemigratieachtergrondrelatief,popwetenschappelijkonderwijs,popwiauitkeringwgaregeling,popwijken,popwomasterdoctoraal,popwoningbeziteigenwoninggembestinkomen,popwoningbeziteigenwoninggemgestandaardinkomen,popwoningbeziteigenwoningmediaaninkomen,popwoningbezithuurwoninggembestinkomen,popwoningbezithuurwoninggemgestandaardinkomen,popwoningbezithuurwoningmediaaninkomen,popwoningdichtheid,popwoningen,popzeersterkstedelijk,popziektenvanademhalingsstelsel,popziektenvanhartenvaatstelsel,begraafplaatsrechten_gemeenteheffingeuroinwoner,precariobelasting_gemeenteheffingeuroinwoner,reinigingsrechten_en_afvalstoffenheffing_gemeenteheffingeuroinwoner,rioolheffing_gemeenteheffingeuroinwoner,secretarieleges_burgerzaken_gemeenteheffingeuroinwoner,toeristenbelasting_gemeenteheffingeuroinwoner,totaal_onroerendezaakbelasting_gemeenteheffingeuroinwoner,begraafplaatsrechten_gemeenteheffing1000euro,precariobelasting_gemeenteheffing1000euro,reinigingsrechten_en_afvalstoffenheffing_gemeenteheffing1000euro,rioolheffing_gemeenteheffing1000euro,secretarieleges_burgerzaken_gemeenteheffing1000euro,toeristenbelasting_gemeenteheffing1000euro,totaal_onroerendezaakbelasting_gemeenteheffing1000euro
codering_regio,interval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1
GM1680,2019,2019,Hulp bij het huishouden,280.0,11.0,,25386,,2.1,2.4,1.2,1.2,290.0,,2330.0,92.0,335.0,11.0,89.0,,3254,0.0,7,176,11825,Aa en Hunze,1860.0,3320.0,2510.0,2260.0,2220.0,2880.0,3530.0,910.0,1890.0,1430.0,1380.0,1260.0,1520.0,2130.0,,1740.0,2.2,3030.0,246.0,,,1844,,475.0,145.0,,,3562,,,11152,4336,26.0,6.0,20.0,605.0,3462,2610,,,4347,,,8354,6613,190.0,74.0,12659,13,5.0,,.,570.0,1760,33,67.0,632,280.0,10096,5790.0,4810.0,8650.0,27609,27887,279,506,95.0,92.0,10.5,8.0,5.0,80.0,,20.0,,11080,,53.0,3565,1.3,14645,1090.0,6130.0,400.0,380.0,325.0,1.8,11,280,65,,15,,1621,12727,1075,11257,2735.0,6170.0,3430.0,310.0,1175.0,850.0,325.0,665.0,330.0,8035.0,16155.0,8130.0,5545.0,11120.0,5575.0,345.0,635.0,280.0,23290.0,46845.0,23560.0,6340.0,11560.0,5245.0,23635.0,47480.0,23840.0,1.8,0.3,1.5,1.3,3.1,5.1,0.0,0.4,1.4,2.5,11.6,2.1,11.1,2.4,1.2,,1.6,1.1,1.2,11.3,5.2,,12.9,11.1,7.7,,,,48.6,290.0,0.1,1.0,6200.0,1090.0,1723.0,,,,2020.0,2330.0,203.0,631.0,92.0,59.0,2.3,335.0,0.6,10.0,440.0,30.0,380.0,106.0,4.2,2.79,16.9,,,,,,,,,,,,,0.0,54.0,AM01,AR05,CR07,CP0700,CS070,GG0706,JZ04,KK42,LB2205,LG02,LD01,PV22,RE01,RA0102,RT12,TR12,VR03,ZK06,278.0,0.0,3254.0,29.2,0.2,44.0,33.0,176.0,-104.0,-4.1,6.9,53.9,11825.0,128.0,543.0,,25415.5,2.23,246.0,8.4,1844.0,5486.0,475.0,2.8,45.0,48.2,36.7,8.0,602.0,145.0,510.0,,3795.0,3562.0,31.9,4336.0,38.9,25.7,1563.0,101.0,21924.0,25445.0,922.0,3.6,1366.0,5.4,1579.0,6.2,1031.0,4.1,4347.0,17.1,8354.0,32.9,1174.0,4.6,5107.0,20.1,1506.0,5.9,,0.0,468470.0,46.0,190.0,0.0,74.09999999999998,GM1680,276.09,12659.0,13.0,0.1,0.0,,57.0,2.2,570.0,1758.0,69.0,Groningen ...,Noord-Nederland ...,Noord-Drenthe ...,Noord-Drenthe ...,Noord-Drenthe ...,GGD Drenthe ...,Drenthe ...,Noord ...,Drentse Veenkoloniën en Hondsrug ...,Veenkoloniën en Oldambt ...,Noord-Nederland ...,Drenthe ...,Noord-Nederland ...,Drenthe ...,Gerechtshof Arnhem-Leeuwarden ...,"Groningse, Friese en Drentse Zandgronden ...",Drenthe ...,Drenthe ...,14.0,23679.0,93.3,21250.0,387.0,32.0,94.0,280.0,30.3,10096.0,1569.0,2.7,44.0,68.0,4119.0,506.0,2.0,0.0,867.0,,,,,59.0,14645.0,14373.0,566.0,577.0,57.0,,,,26.0,325.0,13723.0,-6.9,3242.0,,,,0.0,0.0,0.0,280.0,11.0,0.0,113.0,65.0,0.3,127.0,12.1,6.1,11152.0,16289.0,1707.0,6.7,632.0,2.5,25386.0,84.9,278.87,1487562.0,627.0,1820.0,0.0,0.4,15.0,0.1,,,,,,,,,,,,,8020.0,14.0,6200.0,0.0,16.0,94.6,,,,545.0,1305.0,7.4,1621.0,1411.0,1499.0,33.0,0.1,11257.0,1406.0,,12727.0,420.0,310.0,2.79,4140.0,310.0,1075.0,4.2,145.0,170.0,21.0,,,,,,,,41.0,2649.0,0.0,18.0,86.0,5.0,0.0,81.0,82.0,12.0,57.0,169.0,130.0,0.0,2065.0,2072.0,310.0,1447.0,4301.0
GM0197,2019,2019,Hulp bij het huishouden,1080.0,40.0,,27011,,1.4,1.3,0.8,0.8,340.0,,2170.0,280.0,435.0,12.0,88.0,,3370,0.0,9,240,11825,Aalten,1870.0,3190.0,2630.0,2180.0,2560.0,3180.0,3710.0,810.0,1500.0,1270.0,1120.0,1150.0,1450.0,1920.0,,1380.0,2.3,2900.0,206.0,,,1503,,470.0,85.0,,,4090,,,11399,3939,29.0,10.0,19.0,605.0,4294,3030,,,5415,,,8308,5964,185.0,71.0,13559,17,4.0,,.,390.0,1435,33,73.0,898,782.0,11993,4240.0,6250.0,9740.0,9654,9705,52,496,96.0,82.0,8.9,18.0,4.0,85.0,,15.0,,12255,,147.0,1940,1.2,14195,1130.0,5540.0,310.0,270.0,265.0,4.6,9,256,39,,313,,1690,13452,2133,11971,11175.0,24040.0,12795.0,1075.0,4275.0,3260.0,990.0,2080.0,1090.0,27895.0,55665.0,27780.0,24800.0,49650.0,24860.0,1275.0,3010.0,1785.0,95695.0,190255.0,94570.0,29760.0,54545.0,24785.0,96970.0,193265.0,96355.0,4.6,0.1,2.2,2.7,3.8,7.2,1.3,1.6,0.8,1.6,14.5,1.4,14.4,1.3,0.8,,1.0,1.0,0.8,4.3,1.8,,4.3,14.4,5.3,,,,15.3,340.0,0.2,3.0,5600.0,1140.0,2270.0,,,,1870.0,2170.0,238.0,668.0,280.0,110.0,4.1,435.0,2.6,27.0,330.0,30.0,290.0,102.0,3.8,0.52,42.2,,,,,,,,,,,,,0.0,29.0,AM09,AR03,CR14,CP1400,CS140,GG1413,JZ25,KK43,LB2508,LG04,LD02,PV25,RE02,RA0207,RT12,TR10,VR06,ZK04,374.0,0.0,3370.0,29.6,0.2,98.0,117.0,240.0,-16.0,-0.6,8.9,52.1,11825.0,69.0,390.0,,27066.0,2.33,206.0,6.6,1503.0,4478.0,470.0,3.8,40.0,39.6,39.7,17.9,506.0,85.0,593.0,,1232.0,4090.0,35.9,3939.0,34.6,29.0,1644.0,123.0,22717.0,27121.0,1197.0,4.4,1682.0,6.2,1685.0,6.2,1345.0,5.0,5415.0,20.0,8308.0,30.8,1415.0,5.2,4501.0,16.7,1463.0,5.4,,0.0,225344.0,35.0,185.0,0.0,70.8,GM0197,96.54,13559.0,17.0,0.1,8540.0,,25.0,0.9,390.0,1433.0,53.0,Achterhoek ...,Gelderland ...,Achterhoek ...,Achterhoek ...,Achterhoek ...,GGD Noord- en Oost-Gelderland ...,Achterhoek ...,Oost ...,Achterhoek ...,Oostelijk Veehouderijgebied ...,Oost-Nederland ...,Gelderland ...,Oost-Nederland ...,Achterhoek ...,Gerechtshof Arnhem-Leeuwarden ...,Achterhoek ...,Noord- en Oost-Gelderland ...,Arnhem ...,2.3,23980.0,88.79999999999998,7890.0,509.0,75.0,85.0,782.0,33.9,11993.0,1592.0,2.9,30.0,64.0,2871.0,496.0,1.8,0.0,227.0,,,,,52.0,14193.0,13633.0,505.0,525.0,31.0,,,,0.0,265.0,15506.0,0.4,1774.0,,,,50954.0,82.0,0.0,256.0,9.5,910.0,360.0,39.0,0.1,223.0,20.6,9.5,11399.0,11585.0,3031.0,11.2,898.0,3.3,27011.0,79.29999999999998,97.05,673973.0,421.0,1680.0,0.0,1.7,313.0,1.2,,,,,,,,,,,,,7280.0,10.0,5600.0,81861.0,9.0,84.09999999999998,,,,648.0,725.0,7.4,1690.0,827.0,1399.0,33.0,0.1,11971.0,1616.0,,13452.0,310.0,310.0,0.52,9690.0,230.0,2133.0,7.9,94.0,270.0,4.0,,,,,,,,124.0,2415.0,0.0,31.0,66.0,8.0,0.0,42.0,69.0,11.0,3.0,170.0,214.0,0.0,1142.0,1855.0,287.0,75.0,4584.0
GM0059,2019,2019,Hulp bij het huishouden,900.0,32.0,,27852,,1.6,1.6,0.8,0.7,210.0,,2450.0,272.0,655.0,11.0,89.0,,3532,0.0,10,288,11868,Achtkarspelen,1600.0,3060.0,2340.0,2060.0,2240.0,2790.0,3210.0,880.0,1640.0,1420.0,1260.0,1300.0,1380.0,1800.0,,1500.0,2.4,2690.0,181.0,,,1809,,640.0,145.0,,,4499,,,11717,3686,36.0,5.0,31.0,710.0,4936,3360,,,6190,,,7908,5458,155.0,64.0,14167,16,5.0,,.,365.0,1230,27,67.0,609,427.0,12607,3350.0,7600.0,9800.0,10223,10398,175,531,97.0,88.0,9.2,12.0,3.0,82.0,,18.0,,10660,,145.0,4215,1.3,14875,1200.0,5030.0,650.0,440.0,285.0,3.5,8,234,27,,8,,1568,13685,633,12118,3530.0,7035.0,3495.0,410.0,1650.0,1240.0,320.0,620.0,305.0,7260.0,14520.0,7250.0,7295.0,14550.0,7275.0,165.0,475.0,300.0,27975.0,54910.0,26925.0,9160.0,16535.0,7360.0,28140.0,55385.0,27225.0,3.5,0.1,2.7,1.8,2.8,3.3,0.3,1.1,0.7,3.0,14.7,1.6,14.0,1.6,0.8,,1.3,1.1,0.7,6.8,3.4,,7.2,15.7,6.2,,,,1.7,210.0,0.2,2.0,5120.0,1170.0,2530.0,,,,2835.0,2450.0,322.0,807.0,272.0,-9.0,-0.3,655.0,2.0,26.0,750.0,30.0,600.0,-50.0,-1.8,1.75,81.9,,,,,,,,,,,,,0.0,24.0,AM02,AR05,CR04,CP0400,CS040,GG7206,JZ02,KK42,LB2103,LG03,LD01,PV21,RE01,RA0103,RT12,TR17,VR02,ZK08,278.0,10200.0,3532.0,30.1,0.1,65.0,115.0,288.0,54.0,1.9,10.3,51.8,11868.0,69.0,312.0,,27847.5,2.36,181.0,7.9,1809.0,4648.0,640.0,3.3,43.0,35.1,43.9,6.9,896.0,145.0,602.0,,5488.0,4499.0,38.4,3686.0,31.5,35.8,1752.0,51.0,22916.0,27843.0,1423.0,5.1,1877.0,6.7,1892.0,6.8,1468.0,5.3,6190.0,22.2,7908.0,28.4,1636.0,5.9,4252.0,15.3,1206.0,4.3,,0.0,928087.0,56.0,155.0,0.0,64.0,GM0059,102.23,14167.0,16.0,0.1,0.0,,-14.0,-0.5,365.0,1231.0,44.0,Friesland ...,Noord-Nederland ...,Noord-Friesland ...,Noord-Friesland ...,Noord-Friesland ...,GGD Fryslân ...,Friesland (Fryslân) ...,Noord ...,De Wouden ...,Noordelijk Weidegebied ...,Noord-Nederland ...,Fryslân ...,Noord-Nederland ...,Friesland ...,Gerechtshof Arnhem-Leeuwarden ...,Overig Nederland ...,Fryslân ...,Friesland ...,5.2,26610.0,95.5,15640.0,417.0,40.0,72.0,427.0,33.5,12607.0,1564.0,2.2,28.0,57.0,9281.0,530.0,1.9,0.0,380.0,,,,,102.0,14873.0,14472.0,520.0,534.0,36.0,,,,0.0,285.0,13617.0,0.9,11327.0,,,,0.0,0.0,0.0,234.0,8.4,0.0,369.0,27.0,0.1,158.0,4.0,7.7,11717.0,22511.0,1242.0,4.5,609.0,2.2,27852.0,78.9,103.98,591573.0,348.0,2340.0,0.0,0.2,8.0,0.0,,,,,,,,,,,,,7450.0,13.0,5120.0,18744.0,18.0,82.79999999999998,,,,421.0,1089.0,6.8,1568.0,1039.0,2835.0,28.0,0.1,12118.0,1717.0,,13685.0,400.0,400.0,1.75,12210.0,450.0,633.0,2.3,119.0,170.0,3.0,,,,,,,,119.0,2106.0,0.0,25.0,67.0,0.0,17.0,117.0,66.0,12.0,0.0,190.0,0.0,466.0,3262.0,1841.0,334.0,0.0,5299.0
GM0482,2019,2019,Hulp bij het huishouden,590.0,29.0,,20069,,0.6,0.8,0.4,0.5,15.0,,1745.0,2286.0,370.0,18.0,82.0,,2442,0.0,11,226,8962,Alblasserdam,1940.0,3160.0,3030.0,2070.0,2890.0,3540.0,4260.0,740.0,1200.0,1290.0,890.0,1060.0,1400.0,2000.0,,1080.0,2.4,2740.0,217.0,,,1257,,415.0,175.0,,,3222,,,8166,2502,39.0,6.0,33.0,605.0,3884,2475,,,4535,,,5215,3960,205.0,61.0,9853,200,2.0,,.,410.0,560,141,70.0,1562,1512.0,8732,3010.0,4570.0,6810.0,878,1006,128,571,97.0,68.0,10.3,32.0,3.0,86.0,,14.0,,7820,,1040.0,1305,1.1,9130,700.0,3710.0,370.0,220.0,160.0,6.0,10,193,103,,547,,1118,10216,1391,8324,8865.0,20970.0,12140.0,880.0,4825.0,3990.0,750.0,1500.0,810.0,21055.0,42270.0,21105.0,21655.0,43505.0,21820.0,490.0,1430.0,990.0,81665.0,166795.0,85100.0,28460.0,53725.0,25235.0,82155.0,168225.0,86090.0,6.0,1.5,4.0,2.4,10.0,10.2,0.9,1.1,9.4,1.1,1.3,0.6,11.2,0.8,0.4,,2.3,0.6,0.5,5.1,4.8,,8.5,8.1,1.5,,,,36.0,15.0,,,3720.0,710.0,2066.0,,,,1606.0,1745.0,243.0,422.0,2286.0,96.0,4.8,370.0,2.1,23.0,400.0,30.0,360.0,25.0,1.2,1.28,30.4,,,,,,,,,,,,,0.0,29.0,AM23,AR08,CR30,CP3002,CS300,GG4816,JZ31,KK45,LB2807,LG09,LD03,PV28,RE07,RA1121,RT13,TR17,VR18,ZK24,4.0,0.0,2442.0,29.9,0.1,40.0,45.0,226.0,33.0,1.6,11.2,55.4,8962.0,0.0,78.0,,20117.0,2.44,217.0,7.8,1257.0,1542.0,415.0,5.8,64.0,36.3,47.5,4.9,,175.0,427.0,,3654.0,3222.0,39.5,2502.0,30.6,38.6,1806.0,78.0,16185.0,20165.0,1273.0,6.3,1293.0,6.4,1305.0,6.5,1170.0,5.8,4535.0,22.6,5215.0,26.0,1318.0,6.6,2906.0,14.5,1054.0,5.3,,0.0,0.0,31.0,205.0,0.0,61.3,GM0482,8.78,9853.0,200.0,1.0,4900.0,,38.0,1.9,410.0,559.0,28.0,Drechtsteden ...,Rotterdam ...,Zuidoost-Zuid-Holland ...,Overig Zuidoost-Zuid-Holland ...,Zuidoost-Zuid-Holland ...,Dienst Gezondheid & Jeugd ZHZ ...,Zuid-Holland Zuid ...,Zuidwest ...,Alblasserwaard en Vijfherenlanden ...,Hollands/Utrechts Weidegebied ...,West-Nederland ...,Zuid-Holland ...,Rotterdam ...,Zuid-Holland Zuid ...,Gerechtshof Den Haag ...,Overig Nederland ...,Zuid-Holland-Zuid ...,Waardenland ...,23.5,17116.0,85.29999999999998,210.0,480.0,33.0,64.0,1512.0,30.0,8732.0,961.0,,,52.0,3121.0,570.0,2.8,0.0,51.0,,,,,55.0,9129.0,8285.0,413.0,455.0,0.0,,,,7.0,160.0,229.0,3.5,56.0,,,,0.0,0.0,0.0,193.0,9.6,11380.0,143.0,103.0,0.5,62.0,4.2,9.1,8166.0,10277.0,2953.0,14.7,1562.0,7.8,20069.0,83.79999999999998,10.06,19245.0,85.0,1280.0,0.0,1.0,547.0,2.7,,,,,,,,,,,,,5000.0,6.0,3720.0,1145.0,1.0,76.0,,,,325.0,745.0,6.9,1118.0,770.0,1101.0,142.0,0.7,8324.0,1212.0,,10216.0,190.0,200.0,1.28,3590.0,190.0,1391.0,6.9,125.0,200.0,6.0,,,,,,,,948.0,1819.0,0.0,18.0,53.0,26.0,32.0,129.0,83.0,11.0,2.0,221.0,522.0,632.0,2580.0,1656.0,222.0,32.0,4441.0
GM0613,2019,2019,Hulp bij het huishouden,480.0,19.0,,25271,,0.9,0.9,0.7,0.8,30.0,,2065.0,1165.0,260.0,37.0,63.0,,2661,0.0,8,209,10930,Albrandswaard,1950.0,3380.0,3060.0,2150.0,2940.0,3610.0,4500.0,670.0,1280.0,1290.0,930.0,1060.0,1540.0,2040.0,,1180.0,2.4,3010.0,287.0,,,1838,,340.0,270.0,,,4422,,,10180,3097,31.0,5.0,26.0,410.0,4568,2788,,,5718,,,7861,4336,300.0,69.0,12518,291,3.0,,.,630.0,955,324,72.0,3282,1022.0,11398,5930.0,4700.0,8530.0,2169,2376,207,1164,97.0,81.0,5.9,19.0,3.0,85.0,,15.0,,10730,,579.0,1820,1.2,12550,750.0,3990.0,300.0,300.0,235.0,7.0,6,157,1051,,452,,1105,12753,1975,10347,7935.0,19120.0,11165.0,940.0,5220.0,4280.0,765.0,1655.0,865.0,20715.0,41140.0,20470.0,22460.0,45040.0,22625.0,755.0,1700.0,870.0,77770.0,159490.0,81790.0,24955.0,47315.0,22385.0,78525.0,161190.0,82660.0,7.0,4.2,5.1,5.6,13.3,9.4,1.9,4.7,10.6,3.6,7.1,0.9,7.3,0.9,0.7,,1.6,0.8,0.8,3.9,3.7,,8.9,6.2,2.7,,,,66.6,30.0,,,4090.0,740.0,2482.0,,,,1033.0,2065.0,127.0,568.0,1165.0,319.0,12.6,260.0,,,400.0,30.0,300.0,237.0,9.3,2.07,23.5,,,,,,,,,,,,,0.0,26.0,AM22,AR08,CR29,CP2910,CS291,GG4607,JZ29,KK45,LB2802,LG07,LD03,PV28,RE07,RA0919,RT13,TR04,VR17,ZK29,3.0,0.0,2661.0,26.1,0.1,66.0,10.0,209.0,52.0,2.0,8.2,52.8,10930.0,1.0,140.0,,25430.5,2.43,287.0,8.9,1838.0,2181.0,340.0,5.5,68.0,29.4,41.9,2.3,599.0,270.0,671.0,,4948.0,4422.0,43.4,3097.0,30.4,31.0,1833.0,97.0,20703.0,25590.0,1283.0,5.1,1714.0,6.8,1611.0,6.4,1177.0,4.7,5718.0,22.6,7861.0,31.1,1571.0,6.2,3346.0,13.2,990.0,3.9,,0.0,0.0,12.0,300.0,0.0,68.9,GM0613,21.69,12518.0,291.0,1.2,13660.0,,31.0,1.2,630.0,956.0,38.0,Rijnmond ...,Rotterdam ...,Groot-Rijnmond ...,Rijnmond ...,Rijnmond ...,GGD Rotterdam-Rijnmond ...,Rijnmond ...,Zuidwest ...,Rotterdam en omgeving ...,Westelijk Holland ...,West-Nederland ...,Zuid-Holland ...,Rotterdam ...,Rijnmond ...,Gerechtshof Den Haag ...,Deltagebied ...,Rotterdam-Rijnmond ...,Zuid-Hollandse Eilanden ...,0.2,20014.0,79.2,850.0,437.0,175.0,49.0,1022.0,33.0,11398.0,1313.0,2.1,26.0,40.0,2970.0,1162.0,4.6,0.0,114.0,,,,,71.0,12552.0,11572.0,458.0,497.0,4.0,,,,5.0,235.0,136.0,16.4,195.0,,,,0.0,0.0,0.0,157.0,6.2,270.0,33.0,1051.0,4.2,116.0,3.6,8.1,10180.0,12603.0,5257.0,20.8,3282.0,13.0,25271.0,71.3,23.76,67570.0,150.0,1430.0,0.4,3.3,452.0,1.8,,,,,,,,,,,,,5530.0,7.0,4100.0,10.0,0.0,85.5,,,,464.0,1284.0,5.3,1105.0,1521.0,1792.0,326.0,1.3,10347.0,1662.0,,12753.0,240.0,180.0,2.07,10500.0,300.0,1975.0,7.8,213.0,190.0,7.0,,,,,,,,477.0,2981.0,0.0,10.0,51.0,10.0,0.0,91.0,115.0,18.0,0.0,253.0,257.0,0.0,2301.0,2902.0,457.0,0.0,6407.0


In [4]:
# datapath = '../data/'
# filename = 'df_preprocessed_202104042151_Boerenverstand_Maikel.parquet.gzip'
# df_preprocessed = pd.read_parquet(datapath + filename)

In [5]:
# df_preprocessed

In [6]:
# print(list(df_preprocessed.reset_index().columns))

# Main_predict

In [7]:
# Settings
periods = [2020,2021,2022]
trained_model = get_latest_file(filename_str_contains='best_model_', datapath=datapath, filetype='pickle')
df_prognoses = get_data_predict(save_all=False, personal_note="")

Get 'regio-indeling'
Number of tables to collect: 1
Pythonic iteration all for table 83859NED
Get 'prognose huishoudens' tables
Number of tables to collect: 1
Pythonic iteration all for table 84526NED


  return func(self, *args, **kwargs)


Get 'prognose bevolking' tables
Number of tables to collect: 1
Pythonic iteration all for table 84528NED
Merge tables
Shape of df_prognoses = (2235, 4)


In [8]:
df_prognoses.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,gemeentenaam,eenpersoonshuishoudens,poptotaalaantalparticulierehuishoudens,aantalinwoners
codering_regio,interval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
GM1680,2020,Aa en Hunze,3300.0,11300.0,
GM1680,2021,Aa en Hunze,3300.0,11300.0,
GM1680,2022,Aa en Hunze,3400.0,11400.0,
GM1680,2023,Aa en Hunze,3400.0,11300.0,
GM1680,2024,Aa en Hunze,3400.0,11300.0,
GM1680,2025,Aa en Hunze,3400.0,11300.0,24600.0
GM0738,2020,Aalburg,1400.0,5200.0,
GM0738,2021,Aalburg,1400.0,5300.0,
GM0738,2022,Aalburg,1400.0,5400.0,
GM0738,2023,Aalburg,1500.0,5400.0,


In [9]:
df = df_prognoses.reset_index().copy()

In [10]:
df.head()

Unnamed: 0,codering_regio,interval,gemeentenaam,eenpersoonshuishoudens,poptotaalaantalparticulierehuishoudens,aantalinwoners
0,GM1680,2020,Aa en Hunze,3300.0,11300.0,
1,GM1680,2021,Aa en Hunze,3300.0,11300.0,
2,GM1680,2022,Aa en Hunze,3400.0,11400.0,
3,GM1680,2023,Aa en Hunze,3400.0,11300.0,
4,GM1680,2024,Aa en Hunze,3400.0,11300.0,


In [12]:
lijst = list(df['interval'].unique())

In [18]:
print("Strategy 2: GroupInterpolateImputer to interpolate for columns:\n", "* With values in the future \n",
          "* With constant values \n", "* That are not used in the trained model")

Strategy 2: GroupInterpolateImputer to interpolate for columns:
 * With values in the future 
 * With constant values 
 * That are not used in the trained model


In [None]:
# datetime_now = datetime.now()
# filename = 'df_get_data_predict_' + datetime.strftime(datetime_now, format='%Y%m%d%H%M')

In [None]:
# df_prognoses.to_csv(datapath+filename+'.csv')

In [None]:
# df_prognoses.to_parquet(datapath + filename + '.parquet.gzip', compression='gzip')

In [None]:
df_get_data.empty

In [None]:
## Get data
if df_get_data.empty:
    df_get_data_WMO = get_data(save=True)
if df_prognoses.empty:
    df_prognoses = get_data_predict(periods=periods, save_all=True, personal_note="")

In [None]:
## Preprocess
# Preprocess predict
df_preprocessed_predict = preprocess_data_predict(df_get_data, df_prognoses, save_all=True, personal_note="")

In [None]:
df_preprocessed_predict

In [None]:
# Preprocess (general)
df_preprocessed = preprocess_data(df=df_preprocessed_predict, save_all=False, personal_note='predict')
df_preprocessed = df_preprocessed.drop(settings.Y_TARGET_COLS, axis=1)

In [None]:
## Predict
y_preds = trained_model.predict(df_preprocessed)

In [None]:
y_preds

## Extend strategy

In [None]:
# import src.settings as settings
# from src.preprocess.preprocess import get_and_combine_cbs_tables, rename_and_subset_cols, \
#     get_region_period_spec_val_subtable, downcast_variables_dataframe


# pickle file inladen voor predict
loaded_model = get_latest_file(filename_str_contains='best_model_', datapath=datapath, filetype='pickle')

In [None]:
from os import listdir
from os.path import isfile, join
filename_str_contains='best_model_'
datapath='../data/'
onlyfiles = sorted([f for f in listdir(datapath) if isfile(join(datapath, f))])
# Get last file
filename = [s for s in onlyfiles if filename_str_contains in s][-1]
filename

In [None]:
from src.run_all.main_predict import predict_data
periods = [2020,2021,2022]

In [None]:
%time df = predict_data(df_get_data=df_get_data, periods=periods, trained_model=loaded_model)
df

In [None]:
df_get_data.shape

In [None]:
loaded_model

In [None]:
2235+936

In [None]:
periods_2 = [2020,2021,2022]

In [None]:
[str(x) for x in periods_2]

In [None]:
# settings.preprocess['MISSING_BOUNDARY'] = 0.99
df_preprocessed = preprocess_data(df=df, save_all=False, personal_note='test')

In [None]:
settings.preprocess

In [None]:
df_preprocessed

In [None]:
## Get data (for extending get data with future)
# Determine boundaries for get prognose data
roundedto5periods = max(periods) + (5 - max(periods)) % 5
total_periods = list(range(min(periods), roundedto5periods+1, 1))

print("Get 'regio-indeling'")
df_regioindeling = get_and_combine_cbs_tables(dict_tables=settings.predict['DICT_TABLES_REGIOINDELING'],
                                              double_trouble_colnames=settings.predict[
                                                  'DICT_DOUBLETROUBLECOLNAMES_REGIOINDELING'],
                                              url=settings.get_data['CBS_OPEN_URL'])
df_regioindeling = rename_and_subset_cols(df=df_regioindeling,
                                          dict_rename=settings.predict['DICT_COLS_RENAMED_REGIOINDELING'],
                                          list_cols=settings.predict['LIST_COLS_SUBSET_REGIOINDELING'])
df_regioindeling[settings.predict['LIST_STR_STRIP_COLS_REGIOINDELING']] = df_regioindeling[
    settings.predict['LIST_STR_STRIP_COLS_REGIOINDELING']].apply(lambda x: x.str.strip())

print("Get 'prognose huishoudens' tables")
df_huishouden_prognose = get_and_combine_cbs_tables(dict_tables=settings.predict['DICT_TABLES_HUISHOUDEN'],
                                                    url=settings.get_data['CBS_OPEN_URL'])
df_huishouden_prognose['interval'] = df_huishouden_prognose['perioden']
df_huishouden_prognose = df_huishouden_prognose.rename(columns=settings.predict['DICT_COLS_RENAMED_HUISHOUDEN'])
df_huishouden_prognose = df_huishouden_prognose[df_huishouden_prognose['prognoseinterval'] == 'Prognose']
df_huishouden_prognose = df_huishouden_prognose[
    (df_huishouden_prognose['gemeentenaam'].str.contains('(CR)') == False) &
    (df_huishouden_prognose['gemeentenaam'].str.contains('(PV)') == False) &
    (df_huishouden_prognose['gemeentenaam'] != 'Nederland')].copy()
df_huishouden_prognose['particulierehuishoudens'] = df_huishouden_prognose['particulierehuishoudens'] * 1000
df_huishouden_prognose['particulierehuishoudens'] = df_huishouden_prognose[
    'particulierehuishoudens'].round().astype(int)
df_huishouden_prognose_pivot = pd.pivot_table(data=df_huishouden_prognose, values='particulierehuishoudens',
                                              index=['gemeentenaam', 'interval'],
                                              columns=['samenstellingvanhethuishouden'],
                                              aggfunc=np.sum).reset_index()
df_huishouden_prognose_pivot = df_huishouden_prognose_pivot[
    df_huishouden_prognose_pivot['interval'].astype(int) <= roundedto5periods]
df_huishouden_prognose_pivot = rename_and_subset_cols(df=df_huishouden_prognose_pivot,
                                                      dict_rename=settings.predict[
                                                          'DICT_COLS_RENAMED_HUISHOUDEN_PIVOT'],
                                                      list_cols=settings.predict[
                                                          'LIST_COLS_SUBSET_HUISHOUDING_PIVOT'])

print("Get 'prognose bevolking' tables")
df_population_prognose = get_and_combine_cbs_tables(dict_tables=settings.predict['DICT_TABLES_BEVOLKING'],
                                                    url=settings.get_data['CBS_OPEN_URL'])
df_population_prognose = rename_and_subset_cols(df=df_population_prognose,
                                                dict_rename=settings.predict['DICT_COLS_RENAMED_BEVOLKING'],
                                                list_cols=settings.predict['LIST_COLS_SUBSET_BEVOLKING'])
df_population_prognose['interval'] = df_population_prognose['perioden'].apply(lambda x: x.split(' ')[-1])
df_population_prognose = df_population_prognose[
    (df_population_prognose['gemeentenaam'].str.contains('(CR)') == False) &
    (df_population_prognose['gemeentenaam'].str.contains('(PV)') == False) &
    (df_population_prognose['gemeentenaam'] != 'Nederland')].copy()
df_population_prognose = df_population_prognose[df_population_prognose['interval'].astype(int) <= roundedto5periods]
df_population_prognose['aantalinwoners'] = df_population_prognose['aantalinwoners'] * 1000
df_population_prognose['aantalinwoners'] = df_population_prognose['aantalinwoners'].round().astype(int)
df_population_prognose = df_population_prognose.drop(['perioden'], axis=1)

# Merge all dataframes
df_prognoses = pd.merge(df_regioindeling, df_huishouden_prognose_pivot, how='left',
                        left_on=['gemeentenaam'], right_on=['gemeentenaam'])
df_prognoses = pd.merge(df_prognoses, df_population_prognose, how='left',
                        left_on=['gemeentenaam', 'interval'],
                        right_on=['gemeentenaam', 'interval'])

# Concat with original 'get data' dataframe (incl. drop multiplicacities that don't occur in original dataset)
list_unchanged_multiplicacities = df_get_data[df_get_data['interval'] == df_get_data['interval'].max()][
    'codering_regio'].unique()
df_prognoses = df_prognoses[df_prognoses['codering_regio'].isin(list_unchanged_multiplicacities)]
df_future = pd.concat([df_get_data, df_prognoses], axis=0)
df_future = df_future.sort_values(['codering_regio', 'interval']).reset_index().drop(['index'], axis=1)




In [None]:
df_future

In [None]:
# list_cols_prognoses

In [None]:
## Extend dataframe for blancs
# Determine columns for each imputing strategy
list_cols_prognoses = df_prognoses.columns
# list_cols_prognoses_str = [x for x in list(df_prognoses.loc[:, df_prognoses.dtypes == object].columns) if x!='codering_regio']
list_cols_prognoses_num = list(df_prognoses.loc[:, df_prognoses.dtypes != object].columns)
list_all_columns = list(df_future.columns)
list_cols_str = list(df_future.loc[:, df_future.dtypes == object].columns)
list_cols_str = list(set(list_cols_str) - set(list_cols_prognoses))
list_cols_trained_model = settings.predict['LIST_COLS_TRAINED_MODEL']
list_cols_trained_model = list(set([x.replace('relative_', '') for x in list_cols_trained_model]))
list_cols_relate_imputer = list(
    set(list_cols_trained_model) - set(settings.predict['LIST_COLS_TRAINED_MODEL_INVARIABLY']) - set(
        list_cols_prognoses))
list_cols_group_imputer = list(set(list_all_columns)-set(list_cols_str)-set(list_cols_relate_imputer))

In [None]:
df_future_cop = df_future.copy()

In [None]:
# ffill for string columns
df_future_cop.loc[:, list_cols_str] = df_future_cop.loc[:, list_cols_str].ffill()

In [None]:
from src.utilities.transformers import ColumnSelector, GroupInterpolateImputer, RelativeColumnScaler, \
    CustomScaler, CustomImputer

In [None]:
# Group imputer for available future / invariably columns / columns not used in trained model
GII = GroupInterpolateImputer(groupcols=settings.predict['GROUP_INTERPOLATE_IMPUTER_GROUPCOLS'],
                        interpolate_method=settings.predict['GROUP_INTERPOLATE_IMPUTER_METHOD'],
                        cols=list_cols_group_imputer)
df_future_cop = GII.fit_transform(df_future_cop)

In [None]:
df_future_cop

In [None]:
# Relational imputer for other columns in trained model
list_cols_relate_imputer

In [None]:
base_col = 'aantalinwoners'
future_years = ['2020', '2021', '2022', '2023', '2024', '2025']
all_relate_cols_necessary = settings.predict['LIST_COLS_GROUPER_RELATE_IMPUTER']+list_cols_relate_imputer+[base_col]

df_base_year = df_future_cop[df_future_cop['interval']=='2019'][all_relate_cols_necessary]

In [None]:
df_base_year.loc[:, list_cols_relate_imputer] = df_base_year.loc[:, list_cols_relate_imputer].div(df_base_year['aantalinwoners'], axis=0)

In [None]:
df_base_year

In [None]:
df_base_year = df_base_year[df_base_year['codering_regio'].isin(df_future_cop[df_future_cop['interval']=='2025'].codering_regio.unique())]

In [None]:
# df_base_year.set_index('codering_regio')[col]

In [None]:
# df_future_2 = df_future_cop.copy()
# df_future_2 = df_future_2.set_index('codering_regio')

In [None]:
# df_future_2[df_future_2['interval']=='2021'][base_col]

In [None]:
# df_future_2[df_future_2['interval']=='2021'].loc[:,col] = df_future_2[df_future_2['interval']=='2021'].loc[:,base_col] * df_base_year.set_index('codering_regio')[col]

In [None]:
# df_future_2[df_future_2['interval']=='2021'].loc[:,col]
df_future_2[df_future_2['interval']==year].loc[:,col]

In [None]:
df_future_2[df_future_2['interval']==year].loc[:,base_col]

In [None]:
df_base_year.set_index('codering_regio')[col]

In [None]:
df_future_cop[df_future_cop['interval'].isin(future_years)].loc[:,['codering_regio']+list_cols_relate_imputer+[base_col]]

In [None]:
df_future_2 = df_future_cop.copy()
df_future_2 = df_future_2.set_index('codering_regio')
for col in list_cols_relate_imputer:
    df_future_2.loc[:,col] = df_future_2.loc[:,base_col]
    
#     for year in future_years:
    base_col_series = df_future_2[df_future_2['interval']==year].loc[:,base_col]
    perc_col_series = df_base_year.set_index('codering_regio')[col]
#         df_future_2[df_future_2['interval']==year].loc[:,col] = base_col_series.multiply(perc_col_series)
    df_future_2.loc[:,col] = df_future_2.loc[:,col] * perc_col_series
#         print(base_col_series.multiply(perc_col_series))
    


In [None]:
0.507697108383607*9528.333333333333940

In [None]:
df_future_2[~df_future_2['interval'].isin(future_years)].loc[:,list_cols_relate_imputer]

In [None]:
# df_future_cop[df_future_cop['interval'].isin(future_years)].loc[:,col]

In [None]:
df_hist_perc = pd.DataFrame({'code_regio': ['AB01', 'AB02', 'AB03'],
                            'interval': ['2019', '2019', '2019'],
                            'allenstaande_vrouwen': [0.4, 0.15, 0.2],
                            'alleenstaande_mannen': [0.3, 0.1, 0.3]})
df_future = pd.DataFrame({'code_regio': ['AB01', 'AB01','AB01','AB02','AB02','AB02', 'AB03','AB03','AB03'],
                            'interval': ['2019', '2020', '2021','2019', '2020', '2021', '2019', '2020', '2021'],
                            'allenstaande_vrouwen': [4, np.nan, np.nan,15, np.nan, np.nan,5, np.nan, np.nan],
                            'alleenstaande_mannen': [3, np.nan, np.nan,11.5, np.nan, np.nan,15, np.nan, np.nan],
                            'aantalinwoners': [10,20,30, 100,115,130, 25,50,75]})
df_uitkomst = pd.DataFrame({'code_regio': ['AB01', 'AB01','AB01','AB02','AB02','AB02', 'AB03','AB03','AB03'],
                            'interval': ['2020', '2021', '2022','2020', '2021', '2022','2020', '2021', '2022'],
                            'allenstaande_vrouwen': [4, 8, 12, 15,17.25,19.5, 5,10,15],
                            'alleenstaande_mannen': [3,6,9, 10,11.5,13, 7.5,15,22.5],
                            'aantalinwoners': [10,20,30, 100,115,130, 25,50,75]})

In [None]:
df_hist_perc

In [None]:
df_future

In [None]:
df_uitkomst

In [None]:
# df_hist_perc = df_base_year.copy()
# df_future = df_future_cop[df_future_cop['interval'].isin(future_years)].copy()

In [None]:
df_uitkomst_test = df_future.copy()
df_uitkomst_test = df_uitkomst_test.set_index('code_regio')
for col in ['allenstaande_vrouwen', 'alleenstaande_mannen']:
# for col in list_cols_relate_imputer:
    df_uitkomst_test.loc[:, col] = df_uitkomst_test['aantalinwoners'] * df_hist_perc.set_index('code_regio')[col]
#     df_uitkomst_test.loc[:, col] = df_uitkomst_test[base_col] * df_hist_perc.set_index('codering_regio')[col]

In [None]:
df_uitkomst_test

In [None]:
df_hist_perc.set_index('code_regio')['alleenstaande_mannen']

In [None]:
list(df_prognoses.loc[:, df_prognoses.dtypes == object].columns)

In [None]:
list_cols_prognoses_num

In [None]:
df_future_cop[df_future_cop['interval'].isin(['2017', '2018', '2019'])][settings.predict['LIST_COLS_GROUPER_RELATE_IMPUTER']+list_cols_prognoses_num+list_cols_relate_imputer]


In [None]:
df_future_cop[df_future_cop['interval'].isin(['2017', '2018', '2019'])][settings.predict['LIST_COLS_GROUPER_RELATE_IMPUTER']+list_cols_prognoses_num+list_cols_relate_imputer].dtypes

In [None]:
list_cols_relate

In [None]:
list_past_period = ['2017', '2018', '2019']
list_cols_relate = settings.predict['LIST_COLS_GROUPER_RELATE_IMPUTER']+list_cols_relate_imputer
df_var = df_future_cop[df_future_cop['interval'].isin(list_past_period)][list_cols_relate+['aantalinwoners']].copy()

# for basecol in list_cols_prognoses_num:
#     print(basecol)
#     df_var.loc[:, list_cols_relate_imputer] = df_var.loc[:, list_cols_relate_imputer] / df_var[basecol]
    
# df_var.loc[:, list_cols_relate_imputer] = df_var.loc[:, list_cols_relate_imputer].div(df_var['aantalinwoners'], axis=0)
# df_var_mean = df_var.groupby(['codering_regio']).mean().drop(['aantalinwoners'], axis=1)
    

In [None]:
df_var

In [None]:
df_var['aantalinwoners']

In [None]:
df_var[df_var['codering_regio'].isin(['GM0085', 'GM0017'])]

In [None]:
df_future_cop[df_future_cop['codering_regio'].isin(['GM0085', 'GM0017'])][['alleenstaande_mannen', 'alleenstaande_vrouwen', 'aantalinwoners', 'gemeentenaam']]

In [None]:
import statistics
# statistics.pvariance

In [None]:
df_var.loc[3]

In [None]:
df_var = df_var.drop(['interval'], axis=1) * 1
df_var = df_var.groupby(['codering_regio'])
gb

In [None]:
gb.apply(lambda grp: statistics.pvariance(grp)) 

In [None]:
list(df_prognoses.loc[:, df_prognoses.dtypes != object].columns)

In [None]:
df_future[df_future['interval'].isin(['2017', '2018', '2019'])]

In [None]:
df_future[df_future['interval'].isin(['2020', '2021', '2022', '2023'])]

In [None]:
import src.settings as settings
from src.preprocess.preprocess import get_and_combine_cbs_tables, rename_and_subset_cols, \
    get_region_period_spec_val_subtable, downcast_variables_dataframe

In [None]:
periods = [2020, 2021, 2022]

In [None]:
round(max(periods), 5)

In [None]:
roundedto5periods = max(periods) + (5 - max(periods)) % 5

In [None]:
total_periods = list(range(min(periods), roundedto5periods+1, 1))

In [None]:
total_periods

In [None]:
print("Get 'progonse bevolking' tables")
df_population_prognose = get_and_combine_cbs_tables(dict_tables=settings.predict['DICT_TABLES_BEVOLKING'],
                                                    url=settings.get_data['CBS_OPEN_URL'])
df_population_prognose = rename_and_subset_cols(df=df_population_prognose,
                                                dict_rename=settings.predict['DICT_COLS_RENAMED_BEVOLKING'],
                                                list_cols=settings.predict['LIST_COLS_SUBSET_BEVOLKING'])
df_population_prognose['interval'] = df_population_prognose['perioden'].apply(lambda x: x.split(' ')[-1])
df_population_prognose = df_population_prognose[(df_population_prognose['gemeentenaam'].str.contains('(CR)')==False) & 
                      (df_population_prognose['gemeentenaam'].str.contains('(PV)')==False) &
                      (df_population_prognose['gemeentenaam']!='Nederland')].copy()
df_population_prognose = df_population_prognose[df_population_prognose['interval'].astype(int)<=roundedto5periods]
df_population_prognose['aantalinwoners'] = df_population_prognose['aantalinwoners'].round().astype(int)

In [None]:
df_population_prognose

In [None]:
print("Get 'prognose huishoudens' tables")
df_huishouden_prognose = get_and_combine_cbs_tables(dict_tables=settings.predict['DICT_TABLES_HUISHOUDEN'],
                                                    url=settings.get_data['CBS_OPEN_URL'])
df_huishouden_prognose['interval'] = df_huishouden_prognose['perioden']
df_huishouden_prognose = df_huishouden_prognose.rename(columns=settings.predict['DICT_COLS_RENAMED_HUISHOUDEN'])
df_huishouden_prognose = df_huishouden_prognose[df_huishouden_prognose['prognoseinterval']=='Prognose']
df_huishouden_prognose = df_huishouden_prognose[(df_huishouden_prognose['gemeentenaam'].str.contains('(CR)')==False) & 
                      (df_huishouden_prognose['gemeentenaam'].str.contains('(PV)')==False) &
                      (df_huishouden_prognose['gemeentenaam']!='Nederland')].copy()
df_huishouden_prognose['particulierehuishoudens'] = df_huishouden_prognose['particulierehuishoudens'].round().astype(int)
df_huishouden_prognose_pivot = pd.pivot_table(data=df_huishouden_prognose, values='particulierehuishoudens',
                                         index=['gemeentenaam', 'interval'],
                                         columns=['samenstellingvanhethuishouden'], aggfunc=np.sum).reset_index()
df_huishouden_prognose_pivot = df_huishouden_prognose_pivot[df_huishouden_prognose_pivot['interval'].astype(int) <= roundedto5periods]
df_huishouden_prognose_pivot = rename_and_subset_cols(df=df_huishouden_prognose_pivot,
                                                dict_rename=settings.predict['DICT_COLS_RENAMED_HUISHOUDEN_PIVOT'],
                                                list_cols=settings.predict['LIST_COLS_SUBSET_HUISHOUDING_PIVOT'])

In [None]:
df_huishouden_prognose_pivot

In [None]:
print("Get 'regio-indeling'")
df_regioindeling = get_and_combine_cbs_tables(dict_tables=settings.predict['DICT_TABLES_REGIOINDELING'],
                                              double_trouble_colnames=settings.predict[
                                                  'DICT_DOUBLETROUBLECOLNAMES_REGIOINDELING'],
                                              url=settings.get_data['CBS_OPEN_URL'])
df_regioindeling = rename_and_subset_cols(df=df_regioindeling,
                                        dict_rename=settings.predict['DICT_COLS_RENAMED_REGIOINDELING'],
                                        list_cols=settings.predict['LIST_COLS_SUBSET_REGIOINDELING'])
df_regioindeling[settings.predict['LIST_STR_STRIP_COLS_REGIOINDELING']] = df_regioindeling[
    settings.predict['LIST_STR_STRIP_COLS_REGIOINDELING']].apply(lambda x: x.str.strip())

In [None]:
df_regioindeling

In [None]:
# Merge all dataframes

df_prognoses = pd.merge(df_regioindeling, df_huishouden_prognose_pivot, how='left', 
                        left_on=['gemeentenaam'], right_on=['gemeentenaam'])
df_prognoses = pd.merge(df_prognoses, df_population_prognose, how='left',
                       left_on=['gemeentenaam', 'interval'],
                       right_on=['gemeentenaam', 'interval'])

In [None]:
df_prognoses

In [None]:
import cbsodata
dict_tables=settings.predict['DICT_TABLES_REGIOINDELING']
url=settings.get_data['CBS_OPEN_URL']

In [None]:
print(f"Number of tables to collect: {len(dict_tables)}")

In [None]:
df = pd.DataFrame()
for interval, table in dict_tables.items():
    print(f"Pythonic iteration {interval} for table {table}")
    df_sub = pd.DataFrame(cbsodata.get_data(table, catalog_url=url))


In [None]:
{i:i for i in df_sub.columns}

In [None]:
{'Code_1': 'Code_1gemeente',
 'Naam_2': 'Naam_2gemeente',
 'SorteringNaam_3': 'SorteringNaam_3gemeente',
 'Code_4': 'Code_4arbeidsmarktregio',
 'Naam_5': 'Naam_5arbeidsmarktregio',
 'Code_6': 'Code_6arrondissementenrechtsgebieden',
 'Naam_7': 'Naam_7arrondissementenrechtsgebieden',
 'Code_8': 'Code_8corop',
 'Naam_9': 'Naam_9corop',
 'Code_10': 'Code_10coropsub',
 'Naam_11': 'Naam_11coropsub',
 'Code_12': 'Code_12coropplus',
 'Naam_13': 'Naam_13coropplus',
 'Code_14': 'Code_14ggdregio',
 'Naam_15': 'Naam_15ggdregio',
 'Code_16': 'Code_16jeugdzorgregio',
 'Naam_17': 'Naam_17jeugdzorgregio',
 'Code_18': 'Code_18kvk',
 'Naam_19': 'Naam_19jkvk',
 'Code_20': 'Code_20landbouwgebieden',
 'Naam_21': 'Naam_21landbouwgebieden',
 'Code_22': 'Code_22landbouwgebiedengroepen',
 'Naam_23': 'Naam_23landbouwgebiedengroepen',
 'Code_24': 'Code_24landsdelen',
 'Naam_25': 'Naam_25landsdelen',
 'Code_26': 'Code_26nutseen',
 'Naam_27': 'Naam_27nutseen',
 'Code_28': 'Code_28nutstwee',
 'Naam_29': 'Naam_29nutstwee',
 'Code_30': 'Code_30nutsdrie',
 'Naam_31': 'Naam_31nutsdrie',
 'Code_32': 'Code_32provincies',
 'Naam_33': 'Naam_33provincies',
 'Code_34': 'Code_34regionaleeenheden',
 'Naam_35': 'Naam_35regionaleeenheden',
 'Code_36': 'Code_36regionaleenergiestrategieregios',
 'Naam_37': 'Naam_37regionaleenergiestrategieregios',
 'Code_38': 'Code_38regionalemeldencoordinatiepunten',
 'Naam_39': 'Naam_39regionalemeldencoordinatiepunten',
 'Code_40': 'Code_40regioplusarbeidsmarktregios',
 'Naam_41': 'Naam_41regioplusarbeidsmarktregios',
 'Code_42': 'Code_42ressortenrechtsgebieden',
 'Naam_43': 'Naam_43ressortenrechtsgebieden',
 'Code_44': 'Code_44subresregios',
 'Naam_45': 'Naam_45subresregios',
 'Code_46': 'Code_46toeristengebieden',
 'Naam_47': 'Naam_47toeristengebieden',
 'Code_48': 'Code_48veiligheidsregios',
 'Naam_49': 'Naam_49veiligheidsregios',
 'Code_50': 'Code_50zorgkantoorregios',
 'Naam_51': 'Naam_51zorgkantoorregios',
 'Code_52': 'Code_52gemeentegrootte',
 'Omschrijving_53': 'Omschrijving_53gemeentegrootte',
 'Code_54': 'Code_54stedelijksheidsklase',
 'Omschrijving_55': 'Omschrijving_55stedelijkheidsklasse',
 'Inwonertal_56': 'Inwonertal_56',
 'Omgevingsadressendichtheid_57': 'Omgevingsadressendichtheid_57'}

In [None]:
{'ID', 'RegioS', 
 'Code_1':'codegemeente', 
 'Naam_2':'naamgemeente', 
 'SorteringNaam_3': , 'Code_4', 'Naam_5', 'Code_6', 'Naam_7', 'Code_8', 'Naam_9', 'Code_10', 'Naam_11', 'Code_12', 'Naam_13', 'Code_14', 'Naam_15', 'Code_16', 'Naam_17', 'Code_18', 'Naam_19', 'Code_20', 'Naam_21', 'Code_22', 'Naam_23', 'Code_24', 'Naam_25', 'Code_26', 'Naam_27', 'Code_28', 'Naam_29', 'Code_30', 'Naam_31', 'Code_32', 'Naam_33', 'Code_34', 'Naam_35', 'Code_36', 'Naam_37', 'Code_38', 'Naam_39', 'Code_40', 'Naam_41', 'Code_42', 'Naam_43', 'Code_44', 'Naam_45', 'Code_46', 'Naam_47', 'Code_48', 'Naam_49', 'Code_50', 'Naam_51', 'Code_52', 'Omschrijving_53', 'Code_54', 'Omschrijving_55', 'Inwonertal_56', 'Omgevingsadressendichtheid_57'}

In [None]:
print(f"Number of tables to collect: {len(dict_tables)}")

df = pd.DataFrame()
for interval, table in dict_tables.items():
    print(f"Pythonic iteration {interval} for table {table}")
    try:
        df_sub = pd.DataFrame(cbsodata.get_data(table, catalog_url=url))
        if double_trouble_colnames:
            df_sub = df_sub.rename(columns=double_trouble_colnames)
        cols_wijk_stripped = [i.rstrip('0123456789').replace("_", "").lower() for i in list(df_sub.columns)]
        dict_wijk_cols_renamed = {key: value for key, value in zip(iter(df_sub.columns), iter(cols_wijk_stripped))}
        df_sub = df_sub.rename(columns=dict_wijk_cols_renamed)
        df_sub['interval'] = interval
        # print(list(df_sub.columns))
    except Exception:
        df_sub = pd.DataFrame()
        pass
    df = pd.concat([df, df_sub], sort=True)
    # print(list(df.columns))
return df

In [None]:
df_huishouden_prognose_pivot = pd.pivot_table(data=df_huishouden_prognose, values='particulierehuishoudens',
                                         index=['regioindeling', 'interval'],
                                         columns=['samenstellingvanhethuishouden'], aggfunc=np.sum).reset_index()

In [None]:
df_huishouden_prognose_pivot

In [None]:
df_huishouden_prognose = df_huishouden_prognose[(df_huishouden_prognose['regioindeling'].str.contains('(CR)')==False) & 
                      (df_huishouden_prognose['regioindeling'].str.contains('(PV)')==False) &
                      (df_huishouden_prognose['regioindeling']!='Nederland')].copy()

In [None]:
df[df["col1"].str.contains('this'|'that')==False and df["col2"].str.contains('foo'|'bar')==True]

In [None]:
df_population_prognose = rename_and_subset_cols(df=df_population_prognose,
                                    dict_rename=settings.get_data['DICT_COLS_RENAMED_WMO'],
                                    list_cols=settings.get_data['LIST_COLS_SUBSET_WMO'])

In [None]:
df_population_prognose['interval'] = df_population_prognose['perioden'].apply(lambda x: x.split(' ')[-1])

# Extend dataframe with future

In [None]:
list_unique_regions = list(df[df['interval']==df['interval'].max()]['codering_regio'].unique())
list_future_years = ['2020', '2021', '2022']
df_future = pd.DataFrame(list(product(list_unique_regions, list_future_years)), columns=['codering_regio', 'interval'])
df_extended = pd.concat([df, df_future])
df_extended['interval'] = df_extended['interval'].astype(int)
df_extended = df_extended.sort_values(['codering_regio', 'interval']).reset_index().drop(['index'], axis=1)
df_extended

# Strategy one: Use GroupInterpolateImputer

In [None]:
from sklearn import preprocessing
from sklearn.pipeline import make_pipeline

import src.settings as settings
from src.preprocess.preprocess import make_df_missing
from src.utilities.transformers import ColumnSelector, GroupInterpolateImputer, RelativeColumnScaler, \
    CustomScaler, CustomImputer

In [None]:
df_preprocess = df_extended.reset_index().copy()
# Determine columns with to much missing values
df_missing = make_df_missing(df_preprocess)
list_drop_missing_cols = list(
    df_missing[df_missing['perc_missing'] > 0.99].index)

# Determine columns which are not numeric but objects
list_drop_object_cols = list(df_preprocess.loc[:, df_preprocess.dtypes == object].columns)

# Determine list of columns for first ColumnSelector
drop_cols_total = list(set(list_drop_missing_cols + list_drop_object_cols))
drop_cols_total = [c for c in drop_cols_total if c not in settings.preprocess['ORIGINAL_INDEX']]
list_column_selector_1 = [c for c in list(df_preprocess.columns) if c not in drop_cols_total]

# Make Pipeline and fit transform df_preprocess
pl_preprocess = make_pipeline(
    ColumnSelector(cols=list_column_selector_1),
    GroupInterpolateImputer(groupcols=settings.preprocess['GROUP_INTERPOLATE_IMPUTER_GROUPCOLS'],
                            interpolate_method='values',
                            cols=settings.preprocess['GROUP_INTERPOLATE_IMPUTER_COLS']))

In [None]:
df_preprocessed = pl_preprocess.fit_transform(df_preprocess)

In [None]:
df_preprocessed[df_preprocessed['codering_regio']=='GM0197']

In [None]:
# # Create sample set for developing visualisation
# df_test_set_for_visualise = df_preprocessed[['codering_regio', 'interval', 'wmoclientenper1000inwoners']].copy()

# mu, sigma = 0, 0.1 
# noise = np.random.normal(mu, sigma, len(df_test_set_for_visualise)) 

# df_test_set_for_visualise['wmoclientenper1000inwoners'] = df_test_set_for_visualise['wmoclientenper1000inwoners'] + noise
# df_test_set_for_visualise.to_csv('../data/sampleset_y_predict.csv')

In [None]:
pd.DataFrame.interpolate.__code__.co_varnames

# Strategy 2: ffill + percentage
Including making method to determine percentage:
   
* Population growth percentage per type of region? Whole country?
* Fixed?
* Certain age?

# Strategy 3: Prognose CBS

Mogelijke tabellen: 
* 84525NED -> niet alle gemeenten
* 84528NED -> Slechts per 5 jaar
* 84526NED -> huishoudens

In [None]:
import cbsodata

In [None]:
table = '84526NED'
url = settings.get_data['CBS_OPEN_URL']
df_prognose_bevolking = pd.DataFrame(cbsodata.get_data(table, catalog_url=url))

In [None]:
table = '84528NED'
url = settings.get_data['CBS_OPEN_URL']
df_prognose_bevolking2 = pd.DataFrame(cbsodata.get_data(table, catalog_url=url))

In [None]:
df_prognose_bevolking.head()

In [None]:
df_prognose_bevolking2.head()

In [None]:
df_prognose_bevolking.SamenstellingVanHetHuishouden.unique()

In [None]:
df_prognose_bevolking.RegioIndeling2018.unique()

# Load model, select columns

In [None]:
def predict_data(df_get_data, periods, trained_model, save_all=False, personal_note=""):


    ## Get data (for extending get data with future)
    # Determine boundaries for get prognose data
    roundedto5periods = max(periods) + (5 - max(periods)) % 5
    total_periods = list(range(min(periods), roundedto5periods+1, 1))
    total_periods_str = [str(x) for x in total_periods]

    print("Get 'regio-indeling'")
    df_regioindeling = get_and_combine_cbs_tables(dict_tables=settings.predict['DICT_TABLES_REGIOINDELING'],
                                                  double_trouble_colnames=settings.predict[
                                                      'DICT_DOUBLETROUBLECOLNAMES_REGIOINDELING'],
                                                  url=settings.get_data['CBS_OPEN_URL'])
    df_regioindeling = rename_and_subset_cols(df=df_regioindeling,
                                              dict_rename=settings.predict['DICT_COLS_RENAMED_REGIOINDELING'],
                                              list_cols=settings.predict['LIST_COLS_SUBSET_REGIOINDELING'])
    df_regioindeling[settings.predict['LIST_STR_STRIP_COLS_REGIOINDELING']] = df_regioindeling[
        settings.predict['LIST_STR_STRIP_COLS_REGIOINDELING']].apply(lambda x: x.str.strip())

    print("Get 'prognose huishoudens' tables")
    df_huishouden_prognose = get_and_combine_cbs_tables(dict_tables=settings.predict['DICT_TABLES_HUISHOUDEN'],
                                                        url=settings.get_data['CBS_OPEN_URL'])
    df_huishouden_prognose['interval'] = df_huishouden_prognose['perioden']
    df_huishouden_prognose = df_huishouden_prognose.rename(columns=settings.predict['DICT_COLS_RENAMED_HUISHOUDEN'])
    df_huishouden_prognose = df_huishouden_prognose[df_huishouden_prognose['prognoseinterval'] == 'Prognose']
    df_huishouden_prognose = df_huishouden_prognose[
        (df_huishouden_prognose['gemeentenaam'].str.contains('(CR)') == False) &
        (df_huishouden_prognose['gemeentenaam'].str.contains('(PV)') == False) &
        (df_huishouden_prognose['gemeentenaam'] != 'Nederland')].copy()
    df_huishouden_prognose['particulierehuishoudens'] = df_huishouden_prognose['particulierehuishoudens'] * 1000
    df_huishouden_prognose['particulierehuishoudens'] = df_huishouden_prognose[
        'particulierehuishoudens'].round().astype(int)
    df_huishouden_prognose_pivot = pd.pivot_table(data=df_huishouden_prognose, values='particulierehuishoudens',
                                                  index=['gemeentenaam', 'interval'],
                                                  columns=['samenstellingvanhethuishouden'],
                                                  aggfunc=np.sum).reset_index()
    df_huishouden_prognose_pivot = df_huishouden_prognose_pivot[
        df_huishouden_prognose_pivot['interval'].astype(int) <= roundedto5periods]
    df_huishouden_prognose_pivot = rename_and_subset_cols(df=df_huishouden_prognose_pivot,
                                                          dict_rename=settings.predict[
                                                              'DICT_COLS_RENAMED_HUISHOUDEN_PIVOT'],
                                                          list_cols=settings.predict[
                                                              'LIST_COLS_SUBSET_HUISHOUDING_PIVOT'])

    print("Get 'prognose bevolking' tables")
    df_population_prognose = get_and_combine_cbs_tables(dict_tables=settings.predict['DICT_TABLES_BEVOLKING'],
                                                        url=settings.get_data['CBS_OPEN_URL'])
    df_population_prognose = rename_and_subset_cols(df=df_population_prognose,
                                                    dict_rename=settings.predict['DICT_COLS_RENAMED_BEVOLKING'],
                                                    list_cols=settings.predict['LIST_COLS_SUBSET_BEVOLKING'])
    df_population_prognose['interval'] = df_population_prognose['perioden'].apply(lambda x: x.split(' ')[-1])
    df_population_prognose = df_population_prognose[
        (df_population_prognose['gemeentenaam'].str.contains('(CR)') == False) &
        (df_population_prognose['gemeentenaam'].str.contains('(PV)') == False) &
        (df_population_prognose['gemeentenaam'] != 'Nederland')].copy()
    df_population_prognose = df_population_prognose[df_population_prognose['interval'].astype(int) <= roundedto5periods]
    df_population_prognose['aantalinwoners'] = df_population_prognose['aantalinwoners'] * 1000
    df_population_prognose['aantalinwoners'] = df_population_prognose['aantalinwoners'].round().astype(int)
    df_population_prognose = df_population_prognose.drop(['perioden'], axis=1)

    # Merge all dataframes
    df_prognoses = pd.merge(df_regioindeling, df_huishouden_prognose_pivot, how='left',
                            left_on=['gemeentenaam'], right_on=['gemeentenaam'])
    df_prognoses = pd.merge(df_prognoses, df_population_prognose, how='left',
                            left_on=['gemeentenaam', 'interval'],
                            right_on=['gemeentenaam', 'interval'])
    print(f"Shape of df_prognoses = {df_prognoses.shape}")

    # Concat with original 'get data' dataframe (incl. drop multiplicacities that don't occur in original dataset)
    list_unchanged_multiplicacities = df_get_data[df_get_data['interval'] == df_get_data['interval'].max()][
        'codering_regio'].unique()
    df_prognoses = df_prognoses[df_prognoses['codering_regio'].isin(list_unchanged_multiplicacities)]
    print(f"Shape of df_prognoses = {df_prognoses.shape}")
    df_future = pd.concat([df_get_data, df_prognoses], axis=0)
    df_future = df_future.sort_values(['codering_regio', 'interval']).reset_index().drop(['index'], axis=1)
    print(f"Shape of df_future = {df_future.shape}")

    ## Extend dataframe for blancs
    print("Start extending blancs in DataFrame with future values")
    # Determine columns for each imputing strategy
    list_cols_prognoses = df_prognoses.columns
    # list_cols_prognoses_str = [x for x in list(df_prognoses.loc[:, df_prognoses.dtypes == object].columns) if x!='codering_regio']
    list_cols_prognoses_num = list(df_prognoses.loc[:, df_prognoses.dtypes != object].columns)
    list_all_columns = list(df_future.columns)
    list_cols_str = list(df_future.loc[:, df_future.dtypes == object].columns)
    list_cols_str = list(set(list_cols_str) - set(list_cols_prognoses))
    list_cols_trained_model = settings.predict['LIST_COLS_TRAINED_MODEL']
    list_cols_trained_model = list(set([x.replace('relative_', '') for x in list_cols_trained_model]))
    list_cols_relate_imputer = list(
        set(list_cols_trained_model) - set(settings.predict['LIST_COLS_TRAINED_MODEL_INVARIABLY']) - set(
            list_cols_prognoses))
    list_cols_group_imputer = list(set(list_all_columns) - set(list_cols_str) - set(list_cols_relate_imputer))

    # ffill for string columns
    print("ffill for string columns")
    df_future.loc[:, list_cols_str] = df_future.loc[:, list_cols_str].ffill()
    print(f"Shape of df_future = {df_future.shape}")

    # Group imputer for available future / invariably columns / columns not used in trained model
    print("Group imputer for available future / invariably columns / columns not used in trained model")
    GII = GroupInterpolateImputer(groupcols=settings.predict['GROUP_INTERPOLATE_IMPUTER_GROUPCOLS'],
                                  interpolate_method=settings.predict['GROUP_INTERPOLATE_IMPUTER_METHOD'],
                                  cols=list_cols_group_imputer)
    df_future = GII.fit_transform(df_future)
    print(f"Shape of df_future = {df_future.shape}")

    # Relational imputer for other columns in trained model
    print("Relational imputer for other columns in trained model")
    base_col = 'aantalinwoners'
    # future_years = ['2020', '2021', '2022', '2023', '2024', '2025']
    all_relate_cols_necessary = settings.predict['LIST_COLS_GROUPER_RELATE_IMPUTER'] + list_cols_relate_imputer + [
        base_col]
    df_base_year = df_future[df_future['interval'] == '2019'][all_relate_cols_necessary]
    df_base_year.loc[:, list_cols_relate_imputer] = df_base_year.loc[:, list_cols_relate_imputer].div(
        df_base_year[base_col], axis=0)
    df_base_year = df_base_year[df_base_year['codering_regio'].isin(
        df_future[df_future['interval'] == total_periods[-1]].codering_regio.unique())]
    df_future = df_future.set_index('codering_regio')
    for col in list_cols_relate_imputer:
        df_future.loc[:, col] = df_future.loc[:, base_col]
        df_future.loc[:, col] = df_future.loc[:, col] * df_base_year.set_index('codering_regio')[col]
    print(f"Shape of df_future = {df_future.shape}")
    df_future = df_future[df_future['interval'].isin(total_periods_str)].reset_index()
    df_future = df_future.set_index(['codering_regio', 'interval'])
    print(f"Shape of df_future = {df_future.shape}")

    ## Preprocess
    df_preprocessed = preprocess_data(df=df_future, save_all=False, personal_note='predict')
    df_preprocessed = df_preprocessed.drop(settings.Y_TARGET_COLS, axis=1)

    ## Predict
    y_preds = trained_model.predict(df_preprocessed)

    # Save
    # ?
    return y_preds