# Imports

In [1]:
# Python packages
import sys
sys.path.append('../../')
from datetime import datetime
import numpy as np
import pandas as pd
import pickle

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold, GridSearchCV, cross_validate, KFold, cross_val_score
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, Lasso, LassoCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVR
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor

from xgboost import XGBRegressor, XGBClassifier

# Custom functions
import src.settings as settings
import src.mapper_cols as mapper_cols
from src.run_all.main_get_data import get_data
from src.run_all.main_preprocess import preprocess_data
from src.utilities.utilities import get_latest_file, list_filenames

# instellingen voor panda weergave aanpassen
pd.set_option('display.max_rows', 500) # alle rijen tonen
pd.set_option('display.max_columns', 500) # alle kolommen tonen
pd.set_option('display.width', 1000) # kolombreedte
pd.set_option("display.precision", 2)     # precisie van de kolommen aanpassen
pd.set_option('display.float_format', lambda x: '{:.15f}'.format(x)) # floats output tot 15 decimalen

### Settings

In [2]:
# Location all data
datapath = '../../data/'
# Drop target values in X set
X_DROP_VALUES = settings.Y_TARGET_COLS
# de kolom die wordt gebruikt als y value
Y_VALUE = ['wmoclientenper1000inwoners']
# test size voor de train/test split
TEST_SIZE = 0.3
# random state voor de train/test split. Bijvoorbeeld random_state = 42 als vaste seed voor reproduceerbaarheid
RANDOM_STATE = 42

## Model parameters
# manier van cross validate in de modellen. Bijvoorbeeld 10 of RepeatedKFold(n_splits=30, n_repeats=5, random_state=1)
CROSS_VALIDATE = 5
# manier van scoren in de modellen
MODEL_SCORING = 'neg_mean_squared_error'
## Grid Search parameters

# parameters die gebruikt worden in de grid search
ALPHA = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
NEIGHBORS = [3, 5, 11, 19]
NORMALIZE = [True, False]
KERNEL = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']
GAMMA = [0.5, 1, 1.5, 2, 5]
N_ESTIMATORS = [50,100,200]
C_REGULARIZATION = [0.001, 0.01, 0,1, 1]

### Functions

In [3]:
def drop_nan_from_specific_columns (df,columns_to_check):
    """
    Drops all rows with nan values in specific columns in a dataframe
    """
    df.dropna(
        axis=0,
        how='any',
        thresh=None,
        subset=columns_to_check,
        inplace=True
    )
    
def split_clf_and_params(best_estimator_clf):
    """
    Takes best estimator[clf] and outputs a list with clf and parameters
    """
    clf_and_params = str(best_estimator_clf)
    clf_and_params = clf_and_params.replace(")", "")
    clf_and_params_split = clf_and_params.split("(")
    return clf_and_params_split

def rmse_from_neg_mean_squared_error(neg_mean_squared_error):
    """
    Calculates RMSE from the neq mean squared error
    """
    rmse = np.sqrt(-(neg_mean_squared_error))
    return (rmse)

# functie maken om op basis van de grid search best estimator, het beste RMSE model te selecteren 
def rmse_from_gridsearch_best_estimator(grid_search):
    """
    Calculates RMSE from the grid search best estimator
    """
    rmse = np.sqrt(mean_squared_error(y_test, grid_search.best_estimator_.predict(X_test)))
    return (rmse)

# Get data
This step will load and combine several tables from CBS statline. 

Note: This step takes a number of minutes and without changes to the settings will give the same result. Therefor this code is commented out and the original dataset is loaded. 

In [4]:
%%time 
# ## CREATE NEW DATASET
# df_get_data_WMO= get_data(save_all=True)

# ## HARDCODED
datapath = '../../data/'
filename = 'df_get_data_WMO_WIJK_HUISHOUDENS_BEVOLKING_HEFFING_202104042111.parquet.gzip'
df_get_data_WMO = pd.read_parquet(datapath + filename)

# ## SELECT LAST FILE
# datapath = '../../data/'
# df = get_latest_file(filename_str_contains='df_WMO_', datapath=datapath, filetype='parquet')

print(f"The shape of the dataframe from step 'Get Data': {df_get_data_WMO.shape}")
df_get_data_WMO.sample(5)

The shape of the dataframe from step 'Get Data': (936, 470)
CPU times: user 168 ms, sys: 60.8 ms, total: 229 ms
Wall time: 189 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,perioden,typemaatwerkarrangement,wmoclienten,wmoclientenper1000inwoners,aantalinkomensontvangers,aantalinwoners,actieven1575jaar,afstandtotgrotesupermarkt,afstandtothuisartsenpraktijk,afstandtotkinderdagverblijf,afstandtotschool,alandbouwbosbouwenvisserij,bedrijfsmotorvoertuigen,bedrijfsvestigingentotaal,bevolkingsdichtheid,bfnijverheidenenergie,bouwjaarvanaf2000,bouwjaarvoor2000,dekkingspercentage,eenpersoonshuishoudens,eigendomonbekend,geboorterelatief,geboortetotaal,gehuwd,gemeentenaam,gemelectriciteitsverbruikappartement,gemelectriciteitsverbruikeigenwoning,gemelectriciteitsverbruikhoekwoning,gemelectriciteitsverbruikhuurwoning,gemelectriciteitsverbruiktussenwoning,gemelectriciteitsverbruiktweeondereenkapwoning,gemelectriciteitsverbruikvrijstaandewoning,gemgasverbruikappartement,gemgasverbruikeigenwoning,gemgasverbruikhoekwoning,gemgasverbruikhuurwoning,gemgasverbruiktussenwoning,gemgasverbruiktweeondereenkapwoning,gemgasverbruikvrijstaandewoning,gemgestandaardiseerdinkomenvanhuish,gemiddeldaardgasverbruiktotaal,gemiddeldehuishoudensgrootte,gemiddeldelektriciteitsverbruiktotaal,gemiddeldewoningwaarde,gemiddeldinkomenperinkomensontvanger,gemiddeldinkomenperinwoner,gescheiden,geweldsenseksuelemisdrijven,gihandelenhoreca,hjvervoerinformatieencommunicatie,huishonderofrondsociaalminimum,huishoudensmeteenlaaginkomen,huishoudensmetkinderen,huishoudenstot110vansociaalminimum,huishoudenstot120vansociaalminimum,huishoudenstotaal,huishoudenszonderkinderen,huurwoningentotaal,inbezitoverigeverhuurders,inbezitwoningcorporatie,jongerenmetjeugdzorginnatura,k0tot15jaar,k15tot25jaar,k20huishoudensmethoogsteinkomen,k20personenmethoogsteinkomen,k25tot45jaar,k40huishoudensmetlaagsteinkomen,k40personenmetlaagsteinkomen,k45tot65jaar,k65jaarofouder,klfinancieledienstenonroerendgoed,koopwoningen,mannen,marokko,matevanstedelijkheid,mediaanvermogenvanparticulierehuish,meestvoorkomendepostcode,mnzakelijkedienstverlening,motorfietsen,nederlandseantillenenaruba,nettoarbeidsparticipatie,nietwesterstotaal,omgevingsadressendichtheid,ongehuwd,opleidingsniveauhoog,opleidingsniveaulaag,opleidingsniveaumiddelbaar,oppervlakteland,oppervlaktetotaal,oppervlaktewater,overignietwesters,percentagebewoond,percentageeengezinswoning,percentagejongerenmetjeugdzorg,percentagemeergezinswoning,percentageonbewoond,percentagewerknemers,percentagewoningenmetstadsverwarming,percentagezelfstandigen,personenautos6jaarenouder,personenautosbrandstofbenzine,personenautosjongerdan6jaar,personenautosnaaroppervlakte,personenautosoverigebrandstof,personenautosperhuishouden,personenautostotaal,personenpersoortuitkeringao,personenpersoortuitkeringaow,personenpersoortuitkeringbijstand,personenpersoortuitkeringww,rucultuurrecreatieoverigediensten,scholenbinnen3km,sterfterelatief,sterftetotaal,suriname,totaaldiefstaluitwoningschuured,turkije,vernielingmisdrijftegenopenbareorde,verweduwd,vrouwen,westerstotaal,woningvoorraad,alleenstaande_mannen,alleenstaande_totaal_mannen_en_vrouwen,alleenstaande_vrouwen,ouder_in_eenouderhuishouden_mannen,ouder_in_eenouderhuishouden_totaal_mannen_en_vrouwen,ouder_in_eenouderhuishouden_vrouwen,overig_lid_huishouden_mannen,overig_lid_huishouden_totaal_mannen_en_vrouwen,overig_lid_huishouden_vrouwen,"partner_in_paar,_geen_kind(eren)_mannen","partner_in_paar,_geen_kind(eren)_totaal_mannen_en_vrouwen","partner_in_paar,_geen_kind(eren)_vrouwen",partner_in_paar_met_kind(eren)_mannen,partner_in_paar_met_kind(eren)_totaal_mannen_en_vrouwen,partner_in_paar_met_kind(eren)_vrouwen,persoon_in_institutioneel_huishouden_mannen,persoon_in_institutioneel_huishouden_totaal_mannen_en_vrouwen,persoon_in_institutioneel_huishouden_vrouwen,persoon_in_particulier_huishouden_mannen,persoon_in_particulier_huishouden_totaal_mannen_en_vrouwen,persoon_in_particulier_huishouden_vrouwen,thuiswonend_kind_mannen,thuiswonend_kind_totaal_mannen_en_vrouwen,thuiswonend_kind_vrouwen,totaal_personen_mannen,totaal_personen_totaal_mannen_en_vrouwen,totaal_personen_vrouwen,popaantalbasisonderwijsscholenbinnen3km,popaantalbioscopenbinnen10km,popaantalgrotesupermarktenbinnen3km,popaantalhuisartsenpraktijkenbinnen3km,popaantalkinderdagverblijvenbinnen3km,popaantalrestaurantsbinnen3km,popaantalscholenhavovwobinnen5km,popaantalscholenvmbobinnen5km,popaantalziekenhuizenbinnen20km,popafstandtotbibliotheek,popafstandtotbioscoop,popafstandtotgrotesupermarkt,popafstandtothuisartsenpost,popafstandtothuisartsenpraktijk,popafstandtotkinderdagverblijf,popafstandtotopenbaargroen,popafstandtotoprithoofdverkeersweg,popafstandtotrestaurant,popafstandtotschoolbasisonderwijs,popafstandtotschoolhavovwo,popafstandtotschoolvmbo,popafstandtotsportterrein,popafstandtottreinstation,popafstandtotziekenhuis,popafstandtotzwembad,popagrarischterreinopp,popagrarischterreinperc,popagrarischterreinperinwoner,popakkerbouw,popalandbouwbosbouwenvisserij,popalandbouwbosbouwenvisserijbanen,popalandbouwbosbouwenvisserijbanenrelatief,popaow,poparbeidsongeschiktheidtotaal,popbasisonderwijs,popbebouwdterreinopp,popbebouwdterreinperc,popbebouwdterreinperinwoner,popbedrijfsmotorvoertuigen,popbedrijfsvestigingentotaal,popberoepsbegeleidendeleerweg,popberoepsopleidendeleerweg,popbevolkingsdichtheid,popbevolkingsgroei,popbevolkingsgroeirelatief,popbfnijverheidenenergie,popbfnijverheidenenergiebanen,popbfnijverheidenenergiebanenrelatief,popbijstandgerelateerdtotaowleeftijd,popbijstandgerelateerdvanafaowleeftijd,popbijstandtotdeaowleeftijd,popbinnenlandsmigratiesaldo,popbinnenlandsmigratiesaldorelatief,popbinnenwater,popblijvendgrasland,popbosenopennatuurlijkterreinopp,popbosenopennatuurlijkterreinperc,popbosenopennatuurlijkterreinperinwoner,popbroninkomenalswerknemergembestinkomen,popbroninkomenalswerknemergemgestandaardinkomen,popbroninkomenalswerknemermediaaninkomen,popbroninkomenalszelfstandigegembestinkomen,popbroninkomenalszelfstandigegemgestandaardinkomen,popbroninkomenalszelfstandigemediaaninkomen,popbronoverdrachtsinkomengembestinkomen,popbronoverdrachtsinkomengemgestandaardinkomen,popbronoverdrachtsinkomenmediaaninkomen,popbuitenwater,popbuurten,popcodea,popcodeb,popcodec,popcoded,popcodee,popcodef,popcodeg,popcodeh,popcodei,popcodej,popcodek,popcodel,popcodem,popcoden,popcodeo,popcodep,popcodeq,popcoder,popdunnemest,popedelpelsdieren,popeenpersoonshuishoudens,popeenpersoonshuishoudensrelatief,popeigendomonbekend,popemigratie,popfosfaatuitscheiding,popgeboorte,popgeboorteoverschot,popgeboorteoverschotrelatief,popgeboorterelatief,popgehuwdouderdan,popgehuwdtot,popgeiten,popgemeentelijkeenwaterschapswegen,popgemeenten,popgemiddeldaantalinwoners,popgemiddeldehuishoudensgrootte,popgemiddeldewoningwaarde,popgescheidenouderdan,popgescheidentot,popgftafval,popgihandelenhoreca,popgncommercieledienstverleningbanen,popgncommercieledienstverleningbanenrelatief,popgrijzedruk,popgroenedruk,popgroenvoedergewassen,popgrofhuishoudelijkrestafval,pophjvervoerinformatieencommunicatie,pophogerberoepsonderwijs,pophogerberoepsonderwijsbachelor,pophuishoudelijkrestafval,pophuishoudensmetkinderen,pophuishoudensmetkinderenrelatief,pophuishoudenszonderkinderen,pophuishoudenszonderkinderenrelatief,pophuurwoningen,popid,popimmigratie,popinwoners15jaarofouder,popinwonersop31december,popjongerdan5jaarleeftijdsgroep,popjongerdan5jaarrelatieveleeftijdsgroep,popk10tot15jaarleeftijdsgroep,popk10tot15jaarrelatieveleeftijdsgroep,popk15tot20jaarleeftijdsgroep,popk15tot20jaarrelatieveleeftijdsgroep,popk20tot25jaarleeftijdsgroep,popk20tot25jaarrelatieveleeftijdsgroep,popk25tot45jaarleeftijdsgroep,popk25tot45jaarrelatieveleeftijdsgroep,popk45tot65jaarleeftijdsgroep,popk45tot65jaarrelatieveleeftijdsgroep,popk5tot10jaarleeftijdsgroep,popk5tot10jaarrelatieveleeftijdsgroep,popk65tot80jaarleeftijdsgroep,popk65tot80jaarrelatieveleeftijdsgroep,popk80jaarofouderleeftijdsgroep,popk80jaarofouderrelatieveleeftijdsgroep,popkaliuitscheiding,popkalkoenen,popkippen,popkleinchemischafval,popklfinancieledienstenonroerendgoed,popkonijnen,popkoopwoningen,popkoppelvariabeleregiocode,popland,popmannen,popmarokko,popmarokkorelatief,popmatigstedelijk,popmiddelbaarberoepsonderwijs,popmigratiesaldo,popmigratiesaldorelatief,popmnzakelijkedienstverlening,popmotorfietsen,popmotorfietsenrelatief,popnaama,popnaamb,popnaamc,popnaamd,popnaame,popnaamf,popnaamg,popnaamh,popnaami,popnaamj,popnaamk,popnaaml,popnaamm,popnaamn,popnaamo,popnaamp,popnaamq,popnaamr,popnatuurlijkgrasland,popnederlandseachtergrond,popnederlandseachtergrondrelatief,popnietstedelijk,popnietwoningen,popnieuwbouwwoningen,popnieuwvormingen,popomgevingsadressendichtheid,popongehuwdouderdan,popongehuwdtot,popoudpapierenkarton,popounietcommercieledienstverleningbanen,popounietcommercieledienstverleningbanenrelatief,popoverigedoodsoorzaken,popoverighuishoudelijkafval,popoverignietwestersemigratieachtergrond,popoverignietwestersemigratieachtergrondrelatief,popoverigpluimvee,poppaardenenponys,popparticulierehuishoudensexclstudenten,popparticulierehuishoudensexclstudentengembestinkomen,popparticulierehuishoudensexclstudentengemgestandaardinkomen,popparticulierehuishoudensexclstudentenmediaaninkomen,poppercvoertuigenmetbromfietskenteken,poppersonenautos,poppersonenautosparticulieren,poppersonenautosparticulierenrelatief,poppersonenautosrelatief,popprovincialewegen,poprecreatieterreinopp,poprecreatieterreinperc,poprecreatieterreinperinwoner,poprijkswegen,poprucultuurrecreatieoverigediensten,poprundvee,popsaldovermeerderingwoningenrelatief,popschapen,popsemibebouwdterreinopp,popsemibebouwdterreinperc,popsemibebouwdterreinperinwoner,popslachteenden,popspeciaalbasisonderwijs,popspecialescholen,popsterfte,popsterfterelatief,popsterkstedelijk,popstikstofuitscheiding,popsuriname,popsurinamerelatief,poptextiel,poptijdelijkgrasland,poptotaalaantalbanen,poptotaalaantalparticulierehuishoudens,poptotaalhuishoudelijkafval,poptotaalmetmigratieachtergrond,poptotaalmetmigratieachtergrondrelatief,poptotaalnietwestersemigratieachtergrond,poptotaalnietwestersemigratieachtergrondrelatief,poptotalebevolking,poptotaledruk,poptotaleoppervlakte,poptotaleoppervlaktecultuurgrond,poptotaleweglengte,poptotdeaowleeftijd,poptuinbouwonderglas,poptuinbouwopengrond,popturkije,popturkijerelatief,poptypeeenoudergezingembestinkomen,poptypeeenoudergezingemgestandaardinkomen,poptypeeenoudergezinmediaaninkomen,poptypeeenpersoonshuishoudengembestinkomen,poptypeeenpersoonshuishoudengemgestandaardinkomen,poptypeeenpersoonshuishoudenmediaaninkomen,poptypepaarmetkinderengembestinkomen,poptypepaarmetkinderengemgestandaardinkomen,poptypepaarmetkinderenmediaaninkomen,poptypepaarzonderkindgembestinkomen,poptypepaarzonderkindgemgestandaardinkomen,poptypepaarzonderkindmediaaninkomen,popuitkeringsontvangerstotaalmediaaninkomen,popuitwendigedoodsoorzaken,popvanafdeaowleeftijd,popvarkens,popvastemest,popverhuismobiliteitrelatief,popverkeersterreinopp,popverkeersterreinperc,popverkeersterreinperinwoner,popverpakkingsglas,popvertreknaaranderegemeente,popverweduwdouderdan,popverweduwdtot,popvestiginguitanderegemeente,popvoertuigenmetbromfietskenteken,popvoormaligenederlandseantillenaruba,popvoormaligenederlandseantillenarubarelatief,popvoorraadop1januari,popvoortgezetonderwijs,popvoortgezetonderwijsdiploma,popvrouwen,popwajonguitkering,popwaouitkering,popwatertotaal,popweinigstedelijk,popwerkloosheid,popwestersemigratieachtergrond,popwestersemigratieachtergrondrelatief,popwetenschappelijkonderwijs,popwiauitkeringwgaregeling,popwijken,popwomasterdoctoraal,popwoningbeziteigenwoninggembestinkomen,popwoningbeziteigenwoninggemgestandaardinkomen,popwoningbeziteigenwoningmediaaninkomen,popwoningbezithuurwoninggembestinkomen,popwoningbezithuurwoninggemgestandaardinkomen,popwoningbezithuurwoningmediaaninkomen,popwoningdichtheid,popwoningen,popzeersterkstedelijk,popziektenvanademhalingsstelsel,popziektenvanhartenvaatstelsel,begraafplaatsrechten_gemeenteheffingeuroinwoner,precariobelasting_gemeenteheffingeuroinwoner,reinigingsrechten_en_afvalstoffenheffing_gemeenteheffingeuroinwoner,rioolheffing_gemeenteheffingeuroinwoner,secretarieleges_burgerzaken_gemeenteheffingeuroinwoner,toeristenbelasting_gemeenteheffingeuroinwoner,totaal_onroerendezaakbelasting_gemeenteheffingeuroinwoner,begraafplaatsrechten_gemeenteheffing1000euro,precariobelasting_gemeenteheffing1000euro,reinigingsrechten_en_afvalstoffenheffing_gemeenteheffing1000euro,rioolheffing_gemeenteheffing1000euro,secretarieleges_burgerzaken_gemeenteheffing1000euro,toeristenbelasting_gemeenteheffing1000euro,totaal_onroerendezaakbelasting_gemeenteheffing1000euro
codering_regio,interval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1
GM0275,2017,2017,Hulp bij het huishouden,385.0,9.0,34800.0,43645,54.2,0.8,1.1,0.6,0.5,60.0,,3180.0,534.0,455.0,7.0,93.0,,8374,3.0,8,390,17928,Rheden,1950.0,3170.0,2840.0,2260.0,2690.0,3110.0,3970.0,1010.0,1790.0,1600.0,1290.0,1380.0,1820.0,2810.0,,1540.0,2.0,2730.0,203.0,31.1,25.6,3738,5.0,725.0,235.0,6.8,8.1,5982,,,20863,6507,45.0,15.0,30.0,,6213,4705,16.8,18.1,8867,42.8,41.3,12675,11185,325.0,52.0,21341,246,3.0,,.,915.0,1755,166,,3028,1452.0,18679,,,,8177,8435,258,1403,95.0,74.0,,26.0,5.0,,,,15835.0,17765,5660.0,263.0,3735,1.0,21500,2390.0,10720.0,1160.0,910.0,465.0,8.3,13,610,173,5.0,1040,5.0,3300,22304,3932,21117,7445.0,16505.0,9045.0,520.0,2810.0,2290.0,580.0,1105.0,530.0,13495.0,26805.0,13280.0,9540.0,19125.0,9615.0,810.0,2315.0,1475.0,43100.0,87480.0,44390.0,11520.0,21130.0,9630.0,43910.0,89795.0,45865.0,8.3,2.9,5.7,2.4,10.2,12.6,3.0,3.1,3.1,1.2,6.2,0.8,,1.1,0.6,,0.8,0.6,0.5,2.1,2.0,,1.3,9.4,4.5,,,,5.6,60.0,0.1,1.0,10830.0,2370.0,3188.0,,,,1791.0,3180.0,223.0,961.0,534.0,-118.0,-2.7,455.0,2.3,14.0,1220.0,80.0,1100.0,-17.0,-0.4,2.58,65.2,,,,46.6,31.2,10.9,65.0,41.8,107.1,30.5,25.0,36.5,0.0,23.0,AM07,AR03,CR15,CP1500,CS150,GG1911,JZ28,KK43,LB2502,LG04,LD02,PV25,RE02,,RT12,TR08,VR07,ZK04,79.0,5706.0,8374.0,40.1,2.8,246.0,72.0,390.0,-220.0,-5.0,8.9,47.9,17928.0,35.0,292.0,,43586.0,2.04,203.0,10.0,3738.0,5790.0,725.0,8.5,51.0,47.2,36.9,15.5,1406.0,235.0,1153.0,160.0,7743.0,5982.0,28.7,6507.0,31.2,45.1,15115.0,372.0,37432.0,43527.0,1776.0,4.1,2341.0,5.4,2540.0,5.8,2165.0,5.0,8867.0,20.3,12675.0,29.0,2096.0,4.8,7827.0,17.9,3358.0,7.7,274.0,0.0,50.0,60.0,325.0,0.0,52.1,GM0275,81.76999999999998,21341.0,246.0,0.6,12780.0,380.0,126.0,2.9,915.0,1754.0,40.0,Midden-Gelderland ...,Gelderland ...,Arnhem/Nijmegen ...,Arnhem/Nijmegen ...,Arnhem/Nijmegen ...,Veiligheids- en Gezondheidsregio Gelderland-Mi...,Arnhem ...,Oost ...,IJsselstreek ...,Oostelijk Veehouderijgebied ...,Oost-Nederland ...,Gelderland ...,Oost-Nederland ...,...,Gerechtshof Arnhem-Leeuwarden ...,Veluwe en Veluwerand ...,Gelderland-Midden ...,Arnhem ...,5.5,36685.0,84.09999999999998,4450.0,557.0,40.0,176.0,1452.0,33.3,18679.0,2720.0,5.9,35.0,194.0,5201.0,1403.0,3.2,0.0,333.0,20.2,40.2,29.0,23.3,68.0,21499.0,20415.0,468.0,493.0,33.0,,,,1.0,465.0,3615.0,-1.4,435.0,,,,0.0,107.0,0.0,610.0,14.0,15570.0,243.0,173.0,0.4,197.0,8.1,16.8,20863.0,24085.0,6960.0,15.9,3028.0,6.9,43645.0,84.09999999999998,84.34999999999998,173447.0,327.0,4290.0,0.0,0.0,1040.0,2.4,33.3,22.8,3.8,24.2,24.2,11.4,61.4,33.1,23.0,45.2,33.1,96.3,15130.0,45.0,10840.0,7.0,2.0,110.1,,,,968.0,2721.0,8.8,3300.0,2704.0,2953.0,166.0,0.4,21117.0,2443.0,464.0,22304.0,1000.0,690.0,2.58,6990.0,760.0,3932.0,9.0,252.0,430.0,3.0,24.0,52.2,36.0,135.7,25.0,20.1,3.4,258.0,4113.0,3850.0,56.0,139.0,12.0,0.0,73.0,100.0,19.0,3.0,184.0,504.0,0.0,3197.0,4378.0,815.0,140.0,8016.0
GM0398,2018,2018,Hulp bij het huishouden,820.0,15.0,43100.0,55850,,0.8,1.0,0.6,0.6,125.0,,4180.0,1462.0,840.0,28.0,72.0,,6952,1.0,11,605,22736,Heerhugowaard,1930.0,3100.0,2870.0,2010.0,2830.0,3060.0,3880.0,610.0,1190.0,1260.0,880.0,1060.0,1310.0,1920.0,,1090.0,2.4,2760.0,211.0,31.4,24.9,4136,5.0,945.0,345.0,4.6,6.0,9229,,,23065,6884,30.0,9.0,21.0,1590.0,10311,6640,21.3,19.8,14307,30.3,37.4,14974,9618,440.0,69.0,28207,735,2.0,,.,970.0,2535,637,70.0,6828,1688.0,26776,,,,3821,3999,178,3390,98.0,78.0,10.1,22.0,2.0,87.0,,13.0,,22855,,786.0,7175,1.3,30030,3200.0,9080.0,890.0,800.0,525.0,10.4,7,387,844,2.0,1222,5.0,2202,27643,4923,23216,27440.0,52105.0,24655.0,2960.0,13780.0,10810.0,2800.0,5590.0,2790.0,50215.0,99965.0,49740.0,53540.0,107385.0,53875.0,4790.0,8550.0,3735.0,201875.0,400155.0,198270.0,64920.0,121330.0,56400.0,206665.0,408705.0,202005.0,10.4,2.8,8.3,6.4,17.7,17.9,1.9,5.3,2.0,2.2,2.4,0.8,6.6,1.0,0.6,,1.8,1.2,0.6,2.1,2.1,,2.7,9.3,2.5,,,,16.2,125.0,0.8,3.0,9310.0,3140.0,5363.0,,,,3601.0,4180.0,568.0,1346.0,1462.0,892.0,16.0,840.0,3.9,16.0,900.0,90.0,850.0,549.0,9.8,1.78,25.8,,,,49.5,31.9,36.8,,,138.8,31.2,24.8,95.0,0.0,39.0,AM15,AR02,CR19,CP1900,CS190,GG2707,JZ06,KK41,LB2705,LG07,LD03,PV27,RE04,RA0614,RT11,TR17,VR10,ZK18,48.0,0.0,6952.0,30.1,0.7,209.0,55.0,605.0,218.0,3.9,10.7,49.9,22736.0,796.0,278.0,,56296.0,2.37,211.0,9.1,4136.0,5009.0,945.0,10.2,43.0,29.9,43.7,4.6,2042.0,345.0,1172.0,138.0,11868.0,9229.0,40.0,6884.0,29.8,30.4,8528.0,341.0,45539.0,56742.0,3188.0,5.7,3594.0,6.4,3740.0,6.7,2900.0,5.2,14307.0,25.6,14974.0,26.8,3529.0,6.3,7883.0,14.1,1735.0,3.1,,0.0,178989.0,86.0,440.0,0.0,68.8,GM0398,38.21,28207.0,735.0,1.3,9770.0,584.0,132.0,2.4,970.0,2533.0,45.0,Noord-Holland Noord ...,Noord-Holland ...,Alkmaar en omgeving ...,Alkmaar en omgeving ...,Alkmaar en omgeving ...,GGD Hollands-Noorden ...,Alkmaar (Noord-Kennemerland) ...,Noordwest ...,West-Friesland en omgeving ...,Westelijk Holland ...,West-Nederland ...,Noord-Holland ...,Noord-Holland ...,Noord-Holland Noord ...,Gerechtshof Amsterdam ...,Overig Nederland ...,Noord-Holland-Noord ...,Noord-Holland Noord ...,0.0,44099.0,79.0,4280.0,1122.0,252.0,142.0,1688.0,36.2,26776.0,2564.0,8.8,37.0,122.0,,3385.0,6.1,0.0,80.0,22.9,43.9,29.9,51.9,73.0,30029.0,24810.0,444.0,538.0,23.0,,,,0.0,525.0,2430.0,8.4,1628.0,,,,0.0,101.0,262.0,387.0,6.9,30690.0,195.0,844.0,1.5,222.0,20.6,23.6,23065.0,31167.0,11751.0,21.0,6828.0,12.2,55850.0,73.5,39.99,192611.0,301.0,4710.0,3.7,29.2,1222.0,2.2,,,10.0,24.5,24.6,19.2,61.3,32.8,55.1,45.6,33.3,133.6,14040.0,17.0,9320.0,2286.0,3.0,99.6,,,,1020.0,2798.0,4.8,2202.0,3347.0,4098.0,642.0,1.1,23216.0,3603.0,717.0,27643.0,1050.0,930.0,1.78,5660.0,730.0,4923.0,8.8,330.0,730.0,21.0,30.0,51.2,33.7,111.5,26.4,20.7,2.4,608.0,4868.0,5450.0,33.0,73.0,3.0,0.0,93.0,84.0,18.0,0.0,197.0,140.0,0.0,5217.0,4684.0,1024.0,0.0,11024.0
GM1959,2019,2019,Hulp bij het huishouden,1270.0,23.0,,55386,,1.0,1.0,0.8,0.7,400.0,,6090.0,276.0,1390.0,18.0,82.0,,5909,0.0,10,563,26124,Altena,2050.0,3640.0,2930.0,2350.0,2780.0,3450.0,4150.0,780.0,1530.0,1290.0,1100.0,1100.0,1480.0,1950.0,,1410.0,2.5,3240.0,254.0,,,2920,,1335.0,600.0,,,8826,,,22014,7279,31.0,6.0,25.0,1335.0,9833,6774,,,12372,,,15658,10749,545.0,68.0,27876,44,4.0,,.,1255.0,2235,87,73.0,1496,500.0,23531,8190.0,14880.0,18500.0,20063,22664,2602,1191,96.0,88.0,8.7,12.0,4.0,80.0,,20.0,,23190,,146.0,6170,1.3,29365,1700.0,9950.0,560.0,490.0,565.0,2.7,8,444,102,,72,,2811,27510,2386,22221,13820.0,29835.0,15965.0,1405.0,6005.0,4635.0,2250.0,4375.0,2205.0,34745.0,69245.0,34465.0,38290.0,76760.0,38470.0,1585.0,4245.0,2670.0,138790.0,275460.0,136655.0,48280.0,89240.0,40915.0,140375.0,279705.0,139325.0,2.7,1.0,1.8,1.5,2.9,3.2,0.4,0.9,2.1,1.6,7.9,1.0,11.7,1.0,0.8,,2.0,1.1,0.7,6.4,3.8,,12.9,11.7,7.8,,,,44.1,400.0,0.6,3.0,10160.0,1680.0,5199.0,,,,5840.0,6090.0,689.0,1106.0,276.0,581.0,10.5,1390.0,4.2,23.0,590.0,30.0,540.0,336.0,6.0,26.02,27.0,,,,,,,,,,,,,0.0,63.0,AM25,AR11,CR34,CP3400,CS340,GG5206,JZ34,KK44,LB3003,LG11,LD04,PV30,RE08,RA1323,RT14,TR04,VR20,ZK25,295.0,0.0,5909.0,26.8,0.5,179.0,40.0,563.0,119.0,2.1,10.1,57.3,26124.0,5127.0,631.0,,55676.5,2.48,254.0,6.4,2920.0,7006.0,1335.0,9.2,50.0,34.5,43.3,8.9,898.0,600.0,1132.0,,4690.0,8826.0,40.1,7279.0,33.1,31.1,2049.0,304.0,45553.0,55967.0,3040.0,5.5,3514.0,6.3,3659.0,6.6,3115.0,5.6,12372.0,22.3,15658.0,28.3,3279.0,5.9,8329.0,15.0,2420.0,4.4,,0.0,433942.0,88.0,545.0,0.0,68.5,GM1959,200.63,27876.0,44.0,0.1,5300.0,,125.0,2.3,1255.0,2237.0,40.0,West-Brabant ...,Zeeland-West-Brabant ...,Midden-Noord-Brabant ...,Midden-Noord-Brabant ...,Midden-Noord-Brabant ...,GGD West-Brabant ...,West Brabant Oost ...,Zuid ...,Biesbosch ...,Zuidwestelijk Akkerbouwgebied ...,Zuid-Nederland ...,Noord-Brabant ...,Zeeland-West-Brabant ...,West-Brabant ...,Gerechtshof 's-Hertogenbosch ...,Deltagebied ...,Midden- en West-Brabant ...,West-Brabant ...,3.7,51504.0,93.0,32480.0,1052.0,184.0,113.0,500.0,30.1,23531.0,3466.0,4.5,24.0,139.0,8058.0,1190.0,2.1,0.0,380.0,,,,,78.0,29363.0,27495.0,496.0,530.0,36.0,,,,19.0,565.0,15953.0,8.0,8225.0,,,,0.0,53.0,0.0,444.0,8.0,0.0,140.0,102.0,0.2,326.0,13.5,18.5,22014.0,25513.0,3882.0,7.0,1496.0,2.7,55386.0,77.79999999999998,226.64,1195018.0,686.0,2710.0,0.2,2.6,72.0,0.1,,,,,,,,,,,,,12870.0,20.0,10160.0,5809.0,12.0,78.5,,,,982.0,1328.0,6.2,2811.0,1664.0,4306.0,88.0,0.2,22221.0,3375.0,,27510.0,440.0,540.0,26.02,17600.0,470.0,2386.0,4.3,341.0,400.0,19.0,,,,,,,,111.0,5512.0,0.0,46.0,126.0,14.0,0.0,86.0,101.0,16.0,3.0,162.0,796.0,0.0,4753.0,5597.0,907.0,144.0,8947.0
GM0576,2018,2018,Hulp bij het huishouden,295.0,18.0,13600.0,16605,,0.7,0.9,0.7,0.8,190.0,,1690.0,735.0,270.0,23.0,77.0,,2504,2.0,10,168,6803,Noordwijkerhout,2020.0,3380.0,3160.0,2310.0,3030.0,3560.0,4460.0,740.0,1450.0,1440.0,960.0,1170.0,1730.0,2350.0,,1290.0,2.2,3000.0,274.0,32.1,26.7,1246,3.0,425.0,95.0,4.6,5.2,2563,,,7331,2264,37.0,10.0,27.0,505.0,2509,1961,23.9,19.8,3836,34.5,38.8,4891,3408,145.0,61.0,8366,81,3.0,,.,390.0,745,45,73.0,745,1094.0,7706,,,,2259,2342,83,538,95.0,73.0,12.2,27.0,5.0,82.0,,18.0,,7160,,362.0,1015,1.1,8175,610.0,3210.0,180.0,240.0,175.0,2.8,8,140,66,2.0,15,3.0,850,8239,1893,7115,4505.0,9270.0,4745.0,355.0,1730.0,1380.0,365.0,695.0,345.0,7975.0,15985.0,8015.0,7415.0,14865.0,7465.0,360.0,825.0,460.0,29715.0,58905.0,29210.0,9100.0,16360.0,7260.0,30075.0,59730.0,29670.0,2.8,1.0,4.6,2.1,6.3,17.1,1.6,1.9,6.0,1.1,7.7,0.7,7.0,0.9,0.7,,1.1,0.5,0.8,2.7,2.3,,5.6,11.8,2.0,,,,7.3,190.0,0.8,12.0,3260.0,590.0,1351.0,,,,1507.0,1690.0,119.0,321.0,735.0,80.0,4.8,270.0,0.9,14.0,210.0,20.0,180.0,20.0,0.0,0.83,7.7,,,,49.9,32.8,41.2,,,260.9,31.5,25.5,126.4,0.0,5.0,AM19,AR07,CR25,CP2500,CS250,GG4506,JZ18,KK45,LB2808,LG07,LD03,PV28,RE06,RA0818,RT13,TR02,VR16,ZK28,3.0,0.0,2504.0,34.2,2.2,337.0,3.0,168.0,28.0,1.7,10.1,48.3,6803.0,3.0,85.0,,16645.0,2.24,274.0,8.8,1246.0,1645.0,425.0,3.3,49.0,35.6,37.7,0.0,507.0,95.0,394.0,63.0,3214.0,2563.0,35.0,2264.0,30.9,36.7,13523.0,370.0,14096.0,16685.0,765.0,4.6,903.0,5.4,1102.0,6.6,859.0,5.2,3836.0,23.1,4891.0,29.5,841.0,5.1,2628.0,15.8,780.0,4.7,,0.0,0.0,12.0,145.0,0.0,61.2,GM0576,22.59,8366.0,81.0,0.5,5150.0,121.0,33.0,2.0,390.0,747.0,45.0,Holland Rijnland ...,Den Haag ...,Agglomeratie Leiden en Bollenstreek ...,Agglomeratie Leiden en Bollenstreek ...,Agglomeratie Leiden en Bollenstreek ...,GGD Hollands-Midden ...,Holland Rijnland ...,Zuidwest ...,Bollenstreek ...,Westelijk Holland ...,West-Nederland ...,Zuid-Holland ...,Den Haag ...,Rijnstreek ...,Gerechtshof Den Haag ...,Noordzeebadplaatsen ...,Hollands-Midden ...,Zuid-Holland Noord ...,1.2,13967.0,84.09999999999998,4010.0,379.0,36.0,50.0,1094.0,36.9,7706.0,688.0,1.7,26.0,41.0,1198.0,538.0,3.2,0.0,1.0,7.3,45.3,31.2,73.79999999999998,141.0,8176.0,7596.0,457.0,492.0,19.0,,,,0.0,175.0,161.0,4.1,419.0,,,,0.0,0.0,0.0,140.0,8.4,6120.0,12.0,66.0,0.4,86.0,0.7,6.7,7331.0,7802.0,2638.0,15.9,745.0,4.5,16605.0,73.2,23.42,150676.0,104.0,1000.0,0.7,82.4,15.0,0.1,,,20.2,,,20.8,,,114.0,,,168.1,4270.0,7.0,3270.0,0.0,0.0,0.0,,,,451.0,723.0,6.0,850.0,743.0,2346.0,45.0,0.3,7115.0,945.0,215.0,8239.0,190.0,200.0,0.83,1330.0,210.0,1893.0,11.4,155.0,120.0,1.0,12.0,56.0,37.0,223.1,27.9,21.8,5.3,315.0,1939.0,0.0,11.0,31.0,0.0,105.0,115.0,133.0,15.0,28.0,169.0,0.0,1748.0,1913.0,2214.0,253.0,458.0,2809.0
GM1735,2017,2017,Hulp bij het huishouden,1135.0,32.0,28400.0,35011,59.3,1.2,1.6,0.9,0.9,610.0,,3575.0,165.0,545.0,15.0,85.0,,4275,1.0,7,277,16060,Hof van Twente,1970.0,3720.0,2920.0,2410.0,2830.0,3420.0,4160.0,950.0,1840.0,1440.0,1310.0,1310.0,1700.0,2250.0,,1660.0,2.4,3280.0,225.0,30.3,24.9,1936,3.0,790.0,145.0,4.7,4.6,5325,,,14692,5092,33.0,12.0,20.0,,5446,3951,23.2,17.6,6823,32.7,41.3,10341,8450,390.0,66.0,17406,20,4.0,,.,695.0,1730,34,,1139,604.0,14513,,,,21261,21541,280,697,96.0,87.0,,13.0,4.0,,,,13670.0,15325,5195.0,89.0,3535,1.3,18865,1270.0,8140.0,480.0,690.0,395.0,3.2,11,407,48,2.0,340,3.0,2502,17605,1947,14969,19880.0,45105.0,25365.0,1970.0,8260.0,6210.0,2265.0,4860.0,2655.0,52190.0,103930.0,51755.0,44235.0,88735.0,44400.0,1870.0,5215.0,3275.0,172055.0,346265.0,174230.0,51515.0,95375.0,43845.0,173925.0,351480.0,177505.0,3.2,0.3,2.7,2.8,3.6,7.3,0.4,0.4,2.1,2.0,13.0,1.2,,1.6,0.9,,1.1,1.0,0.9,4.8,4.5,,3.2,12.9,6.0,,,,4.0,610.0,0.2,2.0,8240.0,1270.0,2779.0,,,,2855.0,3575.0,302.0,902.0,165.0,-81.0,-2.3,545.0,3.4,30.0,560.0,40.0,480.0,22.0,0.6,2.8,49.9,,,,50.4,31.7,47.1,74.59999999999998,44.5,260.1,33.9,26.7,137.3,0.0,48.0,AM05,AR06,CR12,CP1200,CS120,GG1106,JZ14,KK43,LB2303,LG04,LD02,PV23,RE02,,RT12,TR11,VR05,ZK22,923.0,0.0,4275.0,29.1,1.2,152.0,164.0,277.0,-130.0,-3.7,7.9,54.3,16060.0,9711.0,669.0,,34970.5,2.36,227.0,6.5,1936.0,4812.0,790.0,4.9,43.0,44.8,40.8,23.7,418.0,145.0,830.0,126.0,3101.0,5325.0,36.2,5092.0,34.7,32.7,9283.0,175.0,29565.0,34930.0,1418.0,4.1,2192.0,6.3,2252.0,6.4,1699.0,4.9,6823.0,19.5,10341.0,29.5,1836.0,5.2,6294.0,18.0,2156.0,6.2,464.0,0.0,1440135.0,61.0,390.0,8281.0,66.1,GM1735,212.61,17406.0,20.0,0.1,6220.0,384.0,23.0,0.7,695.0,1728.0,49.0,Twente ...,Overijssel ...,Twente ...,Twente ...,Twente ...,GGD Regio Twente ...,Twente ...,Oost ...,Twente ...,Oostelijk Veehouderijgebied ...,Oost-Nederland ...,Overijssel ...,Oost-Nederland ...,...,Gerechtshof Arnhem-Leeuwarden ...,"Twente, Salland en Vechtstreek ...",Twente ...,Twente ...,0.3,31925.0,91.2,13930.0,835.0,76.0,135.0,604.0,30.7,14513.0,2787.0,3.0,26.0,127.0,4184.0,697.0,2.0,0.0,444.0,14.6,45.9,30.9,92.2,60.0,18863.0,17972.0,513.0,539.0,88.0,,,,1.0,395.0,47354.0,5.7,1907.0,,,,14945.0,0.0,0.0,407.0,11.6,0.0,486.0,48.0,0.1,62.0,21.9,11.4,14692.0,16341.0,3086.0,8.8,1139.0,3.3,35011.0,85.59999999999998,215.41,1301610.0,758.0,2410.0,0.0,0.1,340.0,1.0,39.0,26.5,18.2,24.2,24.2,25.2,66.9,35.6,98.8,44.7,32.7,155.9,10660.0,19.0,8250.0,194533.0,29.0,78.29999999999998,,,,916.0,1100.0,8.5,2502.0,1122.0,2113.0,34.0,0.1,14969.0,2224.0,414.0,17605.0,360.0,440.0,2.8,14860.0,620.0,1947.0,5.6,168.0,240.0,10.0,15.0,54.7,35.6,180.3,27.3,20.9,6.5,70.0,3273.0,0.0,24.0,102.0,7.0,0.0,96.0,140.0,17.0,12.0,186.0,247.0,0.0,3378.0,4885.0,587.0,408.0,6523.0


# Preprocess --> Create test sets
This step will transform (select columns, impute, scale) the dataframe to be used in train/predict. 

In [5]:
# %%time 
df_preprocessed = preprocess_data(df=df_get_data_WMO, save_all=True)

# print(f"The shape of the dataframe from step 'Preprocess': {df_preprocessed.shape}")
# df_preprocessed.sample(5)

In [6]:
df_preprocessed

Unnamed: 0_level_0,Unnamed: 1_level_0,wmoclienten,wmoclientenper1000inwoners,aantalinwoners,gemiddeldehuishoudensgrootte,gescheiden,verweduwd,alleenstaande_mannen,alleenstaande_vrouwen,ouder_in_eenouderhuishouden_mannen,ouder_in_eenouderhuishouden_vrouwen,popaantalrestaurantsbinnen3km,popafstandtothuisartsenpraktijk,poparbeidsongeschiktheidtotaal,popbevolkingsdichtheid,popeenpersoonshuishoudensrelatief,popk65tot80jaarrelatieveleeftijdsgroep,popk80jaarofouderrelatieveleeftijdsgroep,popomgevingsadressendichtheid,poppersonenautosrelatief,popwerkloosheid,relative_mannen,relative_vrouwen,relative_alleenstaande_mannen,relative_alleenstaande_vrouwen,relative_gescheiden,relative_ongehuwd,relative_ouder_in_eenouderhuishouden_vrouwen,relative_ouder_in_eenouderhuishouden_mannen,relative_eenpersoonshuishoudens,relative_huishoudenszonderkinderen,relative_huishoudensmetkinderen,relative_popaantalrestaurantsbinnen3km,relative_popafstandtothuisartsenpraktijk,relative_poparbeidsongeschiktheidtotaal,relative_popbevolkingsdichtheid,relative_popk65tot80jaarrelatieveleeftijdsgroep,relative_popk80jaarofouderrelatieveleeftijdsgroep,relative_popomgevingsadressendichtheid,relative_popsterkstedelijk,relative_popmatigstedelijk,relative_popweinigstedelijk,relative_popnietstedelijk,relative_poptotaleoppervlakte
codering_regio,interval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
GM1680,2019,280.000000000000000,11.000000000000000,0.028367823505597,0.352941176470588,0.023919649339823,0.056821469907407,0.002698602712018,0.003476586526890,0.007500604887491,0.003295025315438,0.010270149586961,0.833333333333333,0.031405928969768,0.012152166255724,0.238663484486873,0.908450704225352,0.602941176470588,0.025307912940779,0.296404275996113,0.018951358180670,0.498660679114473,0.501339320885527,0.107736547703459,0.135113842275270,0.072638462144489,0.397699519420153,0.033483022138186,0.012211455132750,0.291786226685796,0.388809182209469,0.319404591104735,0.000200898132829,0.000094540297802,0.042937051918380,0.003624044749074,0.000791774994091,0.000232411565430,0.011029701410226,0.000000000000000,0.000000000000000,0.163082013708343,0.837075553454660,0.010985188686678
GM0197,2019,1080.000000000000000,40.000000000000000,0.030252902151078,0.411764705882353,0.019334713744050,0.059317129629630,0.011408433262471,0.013494504318990,0.026010162109848,0.012979185083983,0.014958696137531,0.375000000000000,0.032873495744056,0.045262416343783,0.248210023866348,0.669014084507042,0.529411764705882,0.110005061582588,0.245869776482021,0.013897662665824,0.501980674540002,0.498019325459998,0.413720336159343,0.473695901669690,0.055643996890156,0.444004294546666,0.120691570101070,0.039798600570138,0.295639968418282,0.345556627774366,0.358803403807352,0.000266558068935,0.000048128540224,0.042205027581356,0.010366147125245,0.000618266632113,0.000199918551701,0.028951168042649,0.033689978157047,0.316167487319981,0.358742734441524,0.292103217207804,0.003592980637518
GM0059,2019,900.000000000000000,32.000000000000000,0.031228502853139,0.470588235294118,0.023449054777207,0.054904513888889,0.003519019215290,0.003546118257428,0.009920154851198,0.004862171502049,0.006251395400759,0.500000000000000,0.033754035808629,0.043853469531525,0.260143198090692,0.570422535211268,0.367647058823529,0.050109667622743,0.254616132167153,0.027795325331649,0.508652879505960,0.491347120494040,0.126741347120494,0.125484704868591,0.064950452391211,0.452642539135430,0.044521039781703,0.014720666379434,0.301442348724076,0.314585644789622,0.383972006486302,0.000118483412322,0.000057446502944,0.042007755277897,0.009765905500503,0.000549332184403,0.000154387476662,0.015331035473216,0.000000000000000,0.000000000000000,0.438388625592417,0.561539566278903,0.003733304610082
GM0482,2019,590.000000000000000,29.000000000000000,0.022199846177583,0.470588235294118,0.016027106246807,0.038628472222222,0.009024581535985,0.012793838418955,0.021292039680619,0.015912561279434,0.021656619781201,0.166666666666667,0.020252421485178,0.398555829517436,0.255369928400955,0.514084507042254,0.514705882352941,0.233170237894382,0.177842565597668,0.011370814908402,0.490956201106184,0.509043798893816,0.441726045144252,0.604913049977577,0.062633913000149,0.435098908764762,0.198814091384723,0.043848721909413,0.299044819985305,0.306392358559882,0.394562821454813,0.000508246549405,0.000039862474463,0.035377946086003,0.113907020778315,0.000722507349644,0.000264088893318,0.075340076735263,0.567043699237630,0.244157656086502,0.178882854153172,0.010463899546564,0.000501270616374
GM0613,2019,480.000000000000000,19.000000000000000,0.028234417939916,0.470588235294118,0.023838975986232,0.038158275462963,0.008064849022724,0.011750862460888,0.022743769658843,0.017077875110504,0.019870506809556,0.208333333333333,0.021132961549751,0.201127157449806,0.164677804295943,0.422535211267606,0.308823529411765,0.150497722287835,0.218658892128280,0.018319646241314,0.495350401646156,0.504649598353844,0.313996280321317,0.441810771239761,0.072731589569071,0.451030825847810,0.169364093229393,0.037196786830755,0.261394891944990,0.304223968565815,0.434381139489194,0.000371967868308,0.000035613944838,0.029282576866764,0.046100273040244,0.000522337857623,0.000154327094298,0.040441612915991,0.010684183451387,0.540540540540541,0.415496023109493,0.033635392346959,0.000940208143722
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM1892,2017,715.000000000000000,17.000000000000000,0.047023721829675,0.470588235294118,0.035375265549789,0.065682870370370,0.017068791149821,0.020656272564384,0.051536414226954,0.030780358434461,0.015628488501898,0.208333333333333,0.036102142647490,0.118351532229658,0.181384248210024,0.457746478873239,0.308823529411765,0.173949721612958,0.215743440233236,0.041061276058117,0.496334523005691,0.503665476994309,0.401755570560432,0.470000964599209,0.065013986688531,0.432984469952735,0.185444197935758,0.051364907880776,0.268310026072529,0.319506992178241,0.412182981749230,0.000180862351693,0.000021703482203,0.030143725282145,0.016759911256873,0.000330375229092,0.000094048422880,0.027997492042057,0.325552233047169,0.290826661522138,0.276839972991222,0.106829362399923,0.001544564483457
GM0879,2017,605.000000000000000,28.000000000000000,0.024042003032367,0.352941176470588,0.015045580444779,0.044921875000000,0.003813130791934,0.003374963228412,0.008105492378418,0.003053925902114,0.015628488501898,0.791666666666667,0.022894041678896,0.027474462839028,0.317422434367542,0.570422535211268,0.544117647058824,0.067150329002868,0.270165208940719,0.023373341756159,0.513506025765341,0.486493974234659,0.176155515537701,0.153991780948423,0.054670545320220,0.438103153714734,0.036477813178187,0.015468439765434,0.324891843410362,0.338820301783265,0.336287854806373,0.000346308352957,0.000106201228240,0.036939557648797,0.008265226023918,0.000706469040033,0.000253959458835,0.024380108048206,0.000000000000000,0.059103292238075,0.484369949669853,0.456665281433255,0.005596804728263
GM0301,2017,1470.000000000000000,31.000000000000000,0.053835526018145,0.294117647058824,0.060585688547073,0.087565104166667,0.017884047800871,0.021041371379670,0.037503024437455,0.026219561199068,0.067425764679616,0.208333333333333,0.084531846199002,0.199542092286016,0.458233890214797,0.577464788732394,0.411764705882353,0.250379618694112,0.153547133138970,0.060012634238787,0.490557667934094,0.509442332065906,0.368610054921842,0.419307139839459,0.096556822982678,0.461829319814111,0.138466413181242,0.032741867342628,0.384083682779595,0.290862172250127,0.325054144970278,0.000648500211238,0.000019011406844,0.061258977608787,0.024419095901986,0.000325306294888,0.000097169412759,0.034093789607098,0.364174059991551,0.344528939585974,0.145965356991973,0.053865652724968,0.000906844106464
GM1896,2017,195.000000000000000,9.000000000000000,0.024798354587353,0.588235294117647,0.010662328232985,0.032154224537037,0.012006976120204,0.012424785387639,0.021533994676990,0.013642208470626,0.009600357222594,0.250000000000000,0.021132961549751,0.043501232828461,0.152744630071599,0.387323943661972,0.235294117647059,0.097013666272988,0.195335276967930,0.016424510423247,0.509211528979336,0.490788471020664,0.526917387601416,0.528710385942893,0.038459814424672,0.434667622932449,0.153525482988928,0.039894213097853,0.255649886404400,0.314958746861174,0.429391366734425,0.000215159800977,0.000044824958537,0.033170469317316,0.012102738804967,0.000569276973419,0.000152404859026,0.031601595768524,0.000000000000000,0.066340938634632,0.771437536420279,0.162266349903626,0.003938320857053


In [6]:
# # For testing train
datapath = '../../data/'
# filename = 'df_preprocessed_202104042151_All.parquet.gzip'
filename = 'df_preprocessed_202104042151_Boerenverstand_Maikel.parquet.gzip'
# filename = 'df_preprocessed_202104042151_Minimum_Maikel.parquet.gzip'
# filename = 'df_preprocessed_202104042151_No_Relative.parquet.gzip'
df_preprocessed = pd.read_parquet(datapath + filename)
input_filename = filename

In [19]:
df_preprocessed.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,wmoclienten,wmoclientenper1000inwoners,aantalinwoners,gemiddeldehuishoudensgrootte,gescheiden,verweduwd,alleenstaande_mannen,alleenstaande_vrouwen,ouder_in_eenouderhuishouden_mannen,ouder_in_eenouderhuishouden_vrouwen,popaantalrestaurantsbinnen3km,popafstandtothuisartsenpraktijk,poparbeidsongeschiktheidtotaal,popbevolkingsdichtheid,popeenpersoonshuishoudensrelatief,popk65tot80jaarrelatieveleeftijdsgroep,popk80jaarofouderrelatieveleeftijdsgroep,popomgevingsadressendichtheid,poppersonenautosrelatief,popwerkloosheid,relative_mannen,relative_vrouwen,relative_alleenstaande_mannen,relative_alleenstaande_vrouwen,relative_gescheiden,relative_ongehuwd,relative_ouder_in_eenouderhuishouden_vrouwen,relative_ouder_in_eenouderhuishouden_mannen,relative_eenpersoonshuishoudens,relative_huishoudenszonderkinderen,relative_huishoudensmetkinderen,relative_popaantalrestaurantsbinnen3km,relative_popafstandtothuisartsenpraktijk,relative_poparbeidsongeschiktheidtotaal,relative_popbevolkingsdichtheid,relative_popk65tot80jaarrelatieveleeftijdsgroep,relative_popk80jaarofouderrelatieveleeftijdsgroep,relative_popomgevingsadressendichtheid,relative_popsterkstedelijk,relative_popmatigstedelijk,relative_popweinigstedelijk,relative_popnietstedelijk,relative_poptotaleoppervlakte
codering_regio,interval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
GM1680,2019,280.0,11.0,0.028367823505597,0.352941176470588,0.023919649339823,0.056821469907407,0.002698602712018,0.00347658652689,0.007500604887491,0.003295025315438,0.010270149586961,0.833333333333333,0.031405928969768,0.012152166255724,0.238663484486873,0.908450704225352,0.602941176470588,0.025307912940779,0.296404275996113,0.01895135818067,0.498660679114473,0.501339320885527,0.107736547703459,0.13511384227527,0.072638462144489,0.397699519420153,0.033483022138186,0.01221145513275,0.291786226685796,0.388809182209469,0.319404591104735,0.000200898132829,9.4540297802e-05,0.04293705191838,0.003624044749074,0.000791774994091,0.00023241156543,0.011029701410226,0.0,0.0,0.163082013708343,0.83707555345466,0.010985188686678
GM0197,2019,1080.0,40.0,0.030252902151078,0.411764705882353,0.01933471374405,0.05931712962963,0.011408433262471,0.01349450431899,0.026010162109848,0.012979185083983,0.014958696137531,0.375,0.032873495744056,0.045262416343783,0.248210023866348,0.669014084507042,0.529411764705882,0.110005061582588,0.245869776482021,0.013897662665824,0.501980674540002,0.498019325459998,0.413720336159343,0.47369590166969,0.055643996890156,0.444004294546666,0.12069157010107,0.039798600570138,0.295639968418282,0.345556627774366,0.358803403807352,0.000266558068935,4.8128540224e-05,0.042205027581356,0.010366147125245,0.000618266632113,0.000199918551701,0.028951168042649,0.033689978157047,0.316167487319981,0.358742734441524,0.292103217207804,0.003592980637518
GM0059,2019,900.0,32.0,0.031228502853139,0.470588235294118,0.023449054777207,0.054904513888889,0.00351901921529,0.003546118257428,0.009920154851198,0.004862171502049,0.006251395400759,0.5,0.033754035808629,0.043853469531525,0.260143198090692,0.570422535211268,0.367647058823529,0.050109667622743,0.254616132167153,0.027795325331649,0.50865287950596,0.49134712049404,0.126741347120494,0.125484704868591,0.064950452391211,0.45264253913543,0.044521039781703,0.014720666379434,0.301442348724076,0.314585644789622,0.383972006486302,0.000118483412322,5.7446502944e-05,0.042007755277897,0.009765905500503,0.000549332184403,0.000154387476662,0.015331035473216,0.0,0.0,0.438388625592417,0.561539566278903,0.003733304610082
GM0482,2019,590.0,29.0,0.022199846177583,0.470588235294118,0.016027106246807,0.038628472222222,0.009024581535985,0.012793838418955,0.021292039680619,0.015912561279434,0.021656619781201,0.166666666666667,0.020252421485178,0.398555829517436,0.255369928400955,0.514084507042254,0.514705882352941,0.233170237894382,0.177842565597668,0.011370814908402,0.490956201106184,0.509043798893816,0.441726045144252,0.604913049977577,0.062633913000149,0.435098908764762,0.198814091384723,0.043848721909413,0.299044819985305,0.306392358559882,0.394562821454813,0.000508246549405,3.9862474463e-05,0.035377946086003,0.113907020778315,0.000722507349644,0.000264088893318,0.075340076735263,0.56704369923763,0.244157656086502,0.178882854153172,0.010463899546564,0.000501270616374
GM0613,2019,480.0,19.0,0.028234417939916,0.470588235294118,0.023838975986232,0.038158275462963,0.008064849022724,0.011750862460888,0.022743769658843,0.017077875110504,0.019870506809556,0.208333333333333,0.021132961549751,0.201127157449806,0.164677804295943,0.422535211267606,0.308823529411765,0.150497722287835,0.21865889212828,0.018319646241314,0.495350401646156,0.504649598353844,0.313996280321317,0.441810771239761,0.072731589569071,0.45103082584781,0.169364093229393,0.037196786830755,0.26139489194499,0.304223968565815,0.434381139489194,0.000371967868308,3.5613944838e-05,0.029282576866764,0.046100273040244,0.000522337857623,0.000154327094298,0.040441612915991,0.010684183451387,0.540540540540541,0.415496023109493,0.033635392346959,0.000940208143722


In [7]:
df = df_preprocessed.copy()

# Train

## Train model

#### Stappen hieronder mogelijk verplaatsten naar prepare stap, later beoordelen

In [8]:
# checken of er rijen in het dataframe zitten waarbij de Y_value leeg is. Die rijen worden eruit gehaald.
drop_nan_from_specific_columns(df,Y_VALUE)

In [9]:
# X en y aanmaken
X = df.drop(X_DROP_VALUES, axis=1)
y = df[Y_VALUE]
# splitsen van X en y in train/test. 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = TEST_SIZE, random_state = RANDOM_STATE)

## Gridsearch

In [10]:
# pipeline maken om in de grid search te kunnen gebruiken
pl_gs_total = Pipeline([('clf', LinearRegression())]) # Placeholder Estimator
    
# param grid waarin alle classifiers + hyper parameters kunnen worden opgenomen. 
# hier classifiers (modellen) + parameters toevoegen
param_grid_total = [{'clf': [LinearRegression()], 
                     'clf__normalize': NORMALIZE,},
                    
                    {'clf': [Ridge()],  
                     'clf__alpha': ALPHA},
                    
                    {'clf': [Lasso()], 
                     'clf__alpha': ALPHA},
                   
                    {'clf': [KNeighborsRegressor()],  
                     'clf__n_neighbors': NEIGHBORS},
                     
                  # {'clf': [SVR()], 
                  #  'clf__kernel': KERNEL,
                  #  'clf__C': C_REGULARIZATION},
                    
                   {'clf': [XGBRegressor()],  
                    'clf__gamma': GAMMA,
                    'clf__n_estimators': N_ESTIMATORS},                   
                   ]
    
# grid search aanmaken
grid_search_total = GridSearchCV(pl_gs_total, param_grid_total, cv=CROSS_VALIDATE,
                           scoring=MODEL_SCORING,
                           return_train_score=True)

In [11]:
%%time
#grid search uitvoeren
grid_search_total.fit(X_train, y_train)

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


CPU times: user 1min 53s, sys: 1.17 s, total: 1min 54s
Wall time: 14.8 s


GridSearchCV(cv=5, estimator=Pipeline(steps=[('clf', LinearRegression())]),
             param_grid=[{'clf': [LinearRegression()],
                          'clf__normalize': [True, False]},
                         {'clf': [Ridge()],
                          'clf__alpha': [0.001, 0.01, 0.1, 1, 10, 100, 1000]},
                         {'clf': [Lasso()],
                          'clf__alpha': [0.001, 0.01, 0.1, 1, 10, 100, 1000]},
                         {'clf': [KNeighborsRegressor()],
                          'clf__n_neighbors': [3, 5, 11, 19]},
                         {'clf': [XGBRegresso...
                                               missing=nan,
                                               monotone_constraints=None,
                                               n_estimators=50, n_jobs=None,
                                               num_parallel_tree=None,
                                               random_state=None,
                                             

### Evaluate

In [12]:
# de best estimator uit de grid search halen (beste train score)
print(f"Het model met de beste train score is:\n{grid_search_total.best_estimator_['clf']}")
# de RMSE berekenen voor de best estimator
print(f"Dit model heeft een train score RMSE van {rmse_from_neg_mean_squared_error(grid_search_total.best_score_)}") 
print(f"Dit model heeft een test score RMSE van  {rmse_from_gridsearch_best_estimator(grid_search_total)}")

Het model met de beste train score is:
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=2, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=50, n_jobs=8, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)
Dit model heeft een train score RMSE van 5.4440744371876235
Dit model heeft een test score RMSE van  5.2714100899898275


### Save best model and best model properties

In [13]:
# opslaan van beste estimator vanuit de gridsearch naar een Pickle file
suffix_datetime = datetime.strftime(datetime.now(), format='%Y%m%d%H%M')
output_filename = f'../../data/best_model_{suffix_datetime}.pickle'
pickle.dump(grid_search_total.best_estimator_, open(output_filename, 'wb'))

In [14]:
# extra regel om tijdelijk een dummy bij input_filename te krijgen
# input_filename = 'Hier komt uiteindelijk de input_filename_locatie'
# dictionary maken van alle properties die van het beste model moeten worden opgeslagen
best_model_properties_dict = {"Model": [split_clf_and_params(grid_search_total.best_estimator_['clf'])[0]],
                        "Gridsearch_Params": [split_clf_and_params(grid_search_total.best_estimator_['clf'])[1]],
                        "Train_RMSE": [rmse_from_neg_mean_squared_error(grid_search_total.best_score_)],
                        "Test_RMSE": [rmse_from_gridsearch_best_estimator(grid_search_total)],
                        "Number_of_features": [len(X.columns)],
                        "Y_value": Y_VALUE,
                        "Input_filename": [input_filename],
                        "Output_filename": [output_filename],
                                     }
best_model_properties = pd.DataFrame(best_model_properties_dict)

In [15]:
best_model_properties

Unnamed: 0,Model,Gridsearch_Params,Train_RMSE,Test_RMSE,Number_of_features,Y_value,Input_filename,Output_filename
0,XGBRegressor,"base_score=0.5, booster='gbtree', colsample_by...",5.444074437187624,5.271410089989828,41,wmoclientenper1000inwoners,df_preprocessed_202104042151_Boerenverstand_Ma...,../../data/best_model_202104192018.pickle


In [16]:
# opslaan van beste model properties naar csv
best_model_properties.to_csv(f'../../data/log_train/best_model_properties_{suffix_datetime}.csv', index = False, header=True)

# Feature importance

In [17]:
feature_importances = grid_search_total.best_estimator_.named_steps["clf"].feature_importances_
important_attributes = sorted(zip(feature_importances, X_train.columns), reverse=True)
important_attributes[:40]

[(0.1931905, 'relative_huishoudensmetkinderen'),
 (0.103897125, 'relative_poparbeidsongeschiktheidtotaal'),
 (0.061789, 'relative_eenpersoonshuishoudens'),
 (0.04981223, 'popk80jaarofouderrelatieveleeftijdsgroep'),
 (0.029199164, 'relative_alleenstaande_vrouwen'),
 (0.027832935, 'ouder_in_eenouderhuishouden_vrouwen'),
 (0.027375886, 'relative_popnietstedelijk'),
 (0.026788916, 'relative_huishoudenszonderkinderen'),
 (0.025175598, 'relative_mannen'),
 (0.02457151, 'relative_popmatigstedelijk'),
 (0.024438309, 'relative_poptotaleoppervlakte'),
 (0.024085274, 'popafstandtothuisartsenpraktijk'),
 (0.022022355, 'relative_ouder_in_eenouderhuishouden_mannen'),
 (0.021079514, 'ouder_in_eenouderhuishouden_mannen'),
 (0.021029627, 'relative_popbevolkingsdichtheid'),
 (0.020642133, 'popbevolkingsdichtheid'),
 (0.020444486, 'popk65tot80jaarrelatieveleeftijdsgroep'),
 (0.019716877, 'relative_popomgevingsadressendichtheid'),
 (0.019303676, 'verweduwd'),
 (0.01772483, 'relative_gescheiden'),
 (0.0174

## Code voor combineren output CSV's voor visualisatie en vergelijking

In [59]:
# ## combineren van de verschillende best model properties csv's naar één dataframe
# all_filenames = list_filenames(settings.train['LOG_PATH'], filename_str_contains='.csv')
# combined_logging = pd.concat([pd.read_csv(f"{settings.train['LOG_PATH']}{f}") for f in all_filenames ])
# combined_logging

## Code voor testen pickle file & predict

In [34]:
# # pickle file inladen voor predict
# loaded_model = get_latest_file(output_filename_str_contains='best_model_', datapath=datapath, filetype='pickle')
# # hoe moet ik deze score interpreteren?
# result = loaded_model.score(X_test, y_test)
# print(result)
# loaded_model.predict(X_test)

In [35]:
# regel om te testen of opgeslagen pickle file overeen komt met model
#grid_search_total.best_estimator_.predict(X_test)

In [None]:
# dit is het beste model uit de grid search
#grid_search_total.best_estimator_