In [1]:
import os
os.sys.path.append('../src/')

%load_ext autoreload
%autoreload 2

In [11]:
import gentrification as gn
import gentriviz as gv
import pandas as pd
import geopandas as gpd
from tqdm import tqdm

import hvplot.pandas
import holoviews as hv
from holoviews.operation.datashader import datashade, spread
from holoviews.element import tiles
from holoviews import opts

import os
from dotenv import load_dotenv

load_dotenv()
PATH = os.environ.get('PATH_RAW')

BG_TILES = tiles.CartoLight()
hv.extension('bokeh')

In [20]:
# Define some helper functions

def filter_corporaties(gdf: gpd.GeoDataFrame, 
                       corporaties: gpd.GeoDataFrame):
    if gdf.crs != 28992:
        gdf = gdf.to_crs(28992)
    if corporaties.crs != 28992:
        corporaties = corporaties.to_crs(28992)
    if not 'naam' in corporaties.columns:
        corporaties = corporaties.rename(columns={'Corporatie': 'naam',
                                                  'categorie_pbl': 'naam'})
    gdf = gdf.sjoin(corporaties[['naam', 'geometry']], predicate='intersects', how='left')
    gdf = gdf[gdf.naam.isna()].copy()
    gdf.drop(['naam', 'index_right'], axis=1, inplace=True)
    
    return gdf

def write_scenarios(df, scenario, jaar):
    return df.to_csv(PATH + f'for_viz/{jaar}_{scenario}.csv', index=False)

## Import data

In [4]:
# Import WOZ-waarden with geometries
woz = gn.import_data('woz_bag')

# Import neighborhood data (CBS takes a while)

cbs = gn.import_data('cbs')
ses_woa = gn.import_data('ses_woa')
leefbaarometer = gn.import_data('leefbaarometer')
overwaarde = gn.import_data('overwaarde')
gemeenten = gn.import_data('gemeenten')
zh = gpd.read_parquet(PATH + 'filters/zuid_holland_corporaties.parquet')
nh = gpd.read_file(PATH + 'filters/amsterdam_metropool_corporaties.geojson')
corp = gpd.read_parquet(PATH + 'filters/corporaties_vves.parquet')

print(f'''Imported\n--------\nWOZ: {woz.shape[0]} rows and {woz.shape[1]} columns
CBS: {cbs.shape[0]} rows and {cbs.shape[1]} columns
SES_WOA: {ses_woa.shape[0]} rows and {ses_woa.shape[1]} columns
LEEFBAAROMETER: {leefbaarometer.shape[0]} rows and {leefbaarometer.shape[1]} columns
OVERWAARDE: {overwaarde.shape[0]} rows and {overwaarde.shape[1]} columns
WOONPLAATSEN: {gemeenten.shape[0]} rows and {gemeenten.shape[1]} columns''')

Imported
--------
WOZ: 8128486 rows and 14 columns
CBS: 17918 rows and 3 columns
SES_WOA: 28634 rows and 12 columns
LEEFBAAROMETER: 13808 rows and 10 columns
OVERWAARDE: 8 rows and 6 columns
WOONPLAATSEN: 342 rows and 2 columns


In [126]:
params = {'min_oppervlakte': 70,
          'max_oppervlakte': 250,
          'plaats': ['Groningen'],
          'income1': 29000,
          'income2': 25000,
          'region': False,
          'inflation': True,
          'overwaarde': False,
          'mortgage_range': 0.9,
          'divorced': False}

In [114]:
# Get local dataset

dict_2016, dict_2022 = gn.compare_jaren(gdf=woz, **params)
df16 = dict_2016.get('df')
df22 = dict_2022.get('df')
print(f'2016: {len(df16)} objects\n2022: {len(df22)} objects')

2016: 16157 objects
2022: 16662 objects


In [115]:
df16_filtered = filter_corporaties(df16, corp)
df22_filtered = filter_corporaties(df22, corp)
print(f'2016: {len(df16_filtered)} objects\n2022: {len(df22_filtered)} objects')

2016: 5877 objects
2022: 6796 objects


In [108]:
onv = gv.plot_histograms(df16_filtered, df22_filtered, ses_woa, leefbaarometer, 'onv')
woz_dif = gv.plot_histograms(df16_filtered, df22_filtered, ses_woa, leefbaarometer, 'woz')
lbm = gv.plot_histograms(df16_filtered, df22_filtered, ses_woa, leefbaarometer, 'lbm')
verm = gv.plot_histograms(df16_filtered, df22_filtered, ses_woa, leefbaarometer, 'vermogen_percentielgroep')
soc = gv.plot_histograms(df16_filtered, df22_filtered, ses_woa, leefbaarometer, 'soc')
afw = gv.plot_histograms(df16_filtered, df22_filtered, ses_woa, leefbaarometer, 'ses_woa_score')
maps = gv.plot_maps(df16_filtered, df22_filtered, **params)

(maps + onv + woz_dif + lbm + verm + soc + afw).cols(2)

In [59]:
# Write filtered data to file for visualisation


scenario = 'utrecht_noregion_29000_geenoverwaarde_sochuisgefilterd_60+m2'

dfs = [df16, df22]
jaren = ['2016', '2022']
for df, jaar in zip(dfs, jaren):
    df = df.to_crs(4326)
    df['latitude'] = df.geometry.y
    df['longitude'] = df.geometry.x
    df = df[['woz_2016_m2', 'woz_2022_m2', 'woz_difference_abs', 'woz_difference_postcode_5', 'latitude', 'longitude']].copy()
    write_scenarios(df, scenario, jaar)

### Get stats 2016-2022

In [10]:

gemeenten_list = gemeenten.gemeente.tolist()
len(gemeenten_list)

342

In [125]:
params

{'min_oppervlakte': 70,
 'max_oppervlakte': 120,
 'plaats': ['Stadskanaal'],
 'income1': 90000,
 'income2': 60300,
 'region': True,
 'inflation': True,
 'overwaarde': False,
 'mortgage_range': 0.9,
 'divorced': False}

In [159]:
buurten = gpd.read_file(PATH + 'cbs/wijkbuurt2023/buurt_2023_v0.shp')

In [163]:
buurten = buurten[buurten['WATER'] == 'NEE'][['geometry', 'BU_CODE', 'BU_NAAM']].copy()

In [179]:
ses_woa = ses_woa.to_crs(28992)

dfs = []
scenarios = ['overwaarde', 'geen_overwaarde']

for scenario in scenarios:
    if scenario == 'overwaarde':
        params.update({'overwaarde': True})
    else:
        params.update({'overwaarde': False})

    for wp in gemeenten_list:
        
        for i in range(20000, 80000, 10000):
            params.update({'plaats': [wp],
                            'income1': i,
                            'income2': round(i * 0.67)})
                        
            df16, df22 = gn.compare_jaren(woz, **params)
            df16_ = df16.get('df')
            df22_ = df22.get('df')

            # Get stats
            
            df16_ = df16_[['geometry', 'woz_2016', 'woz_2022', 'woz_difference_postcode_5']].sjoin(leefbaarometer[['geometry', 'lbm', 'onv', 'soc']], predicate='intersects', how='left')
            df16_.drop('index_right', axis=1, inplace=True)
            df16_ = df16_.sjoin(ses_woa[['geometry', 'ses_woa_score', 'ses_woa_financiele_welvaart']], predicate='intersects', how='left')

            df22_ = df22_[['geometry', 'woz_2016', 'woz_2022', 'woz_difference_postcode_5']].sjoin(leefbaarometer[['geometry', 'lbm', 'onv', 'soc']], predicate='intersects', how='left')
            df22_.drop('index_right', axis=1, inplace=True)
            df22_ = df22_.sjoin(ses_woa[['geometry', 'ses_woa_score', 'ses_woa_financiele_welvaart']], predicate='intersects', how='left')
            
            
            inkomen = round(i + (i * 0.67))
            df16_['jaar'] = 2016
            df22_['jaar'] = 2022
            
            df = pd.concat([df16_, df22_]).reset_index(drop=True)
            df.drop('index_right', axis=1, inplace=True)
            df['inkomen_laag'] = inkomen
            df['inkomen_hoog'] = inkomen + 10000
            if scenario == 'overwaarde': 
                df['overwaarde'] = 'overwaarde'
            else:
                df['overwaarde'] = 'geen overwaarde'
            df['gemeente'] = wp

            gdf = df.sjoin(buurten, predicate='intersects', how='left')
            
            df = gdf.groupby(['BU_NAAM', 'BU_CODE', 'overwaarde', 'inkomen_laag', 'inkomen_hoog', 'jaar']).agg(median_woz_diff_p5 = ('woz_difference_postcode_5', 'median'),
                                                                                                            woz_2016 = ('woz_2016', 'median'),
                                                                                                            woz_2022 = ('woz_2022', 'median'),
                                                                                                            ses_woa_score = ('ses_woa_score', 'median'),
                                                                                                            ses_woa_fin_welvaart = ('ses_woa_financiele_welvaart', 'median'),
                                                                                                            leefbaarometer = ('lbm', 'median'),
                                                                                                            onveiligheid = ('onv', 'median'),
                                                                                                            sociale_cohesie = ('soc', 'median'),
                                                                                                            number_of_houses = ('woz_2016', 'count')
                                                                                                            )
            df.reset_index(inplace=True)
            dfs.append(df)
        
df = pd.concat(dfs)
df.to_parquet(PATH + 'processed/gemeentestats.parquet', index=False)


In [180]:
len(df)

219219

In [181]:
df.to_parquet(PATH + 'processed/gemeentestats.parquet', index=False)

In [182]:
df.columns

Index(['BU_NAAM', 'BU_CODE', 'overwaarde', 'inkomen_laag', 'inkomen_hoog',
       'jaar', 'median_woz_diff_p5', 'woz_2016', 'woz_2022', 'ses_woa_score',
       'ses_woa_fin_welvaart', 'leefbaarometer', 'onveiligheid',
       'sociale_cohesie', 'number_of_houses'],
      dtype='object')

In [184]:
df.head()

Unnamed: 0,BU_NAAM,BU_CODE,overwaarde,inkomen_laag,inkomen_hoog,jaar,median_woz_diff_p5,woz_2016,woz_2022,ses_woa_score,ses_woa_fin_welvaart,leefbaarometer,onveiligheid,sociale_cohesie,number_of_houses
0,Badstratenbuurt,BU00140105,overwaarde,33400,43400,2022,1.94,130000.0,245000.0,-0.4445,-0.3365,4.229771,-0.007689,-0.04695,86
1,Bangeweer,BU00140809,overwaarde,33400,43400,2022,1.64,167000.0,250500.0,0.137,0.0205,4.241401,0.032001,0.052961,36
2,Bedrijventerrein Ten Boer,BU00141506,overwaarde,33400,43400,2022,1.49,164000.0,245000.0,,,,,,2
3,Beijum-Oost,BU00141101,overwaarde,33400,43400,2016,1.79,77000.0,138000.0,-0.4245,-0.2455,3.919765,-0.13547,-0.047114,50
4,Beijum-Oost,BU00141101,overwaarde,33400,43400,2022,1.8,136000.0,250000.0,-0.4245,-0.2455,3.919765,-0.13547,-0.047114,908


In [183]:
df.hvplot.scatter(x=df[df.overwaarde=='geen_overwaarde'].number_of_houses,
                  y=df[df.overwaarde=='overwaarde'].number_of_houses,
                  )


TypeError: argument of type 'int' is not iterable