In [None]:
import json
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.express as px
import requests
from pymongo import MongoClient
from pymongoarrow.api import find_pandas_all

import creds

In [None]:
cluster = MongoClient(creds.Creds.URI)

query = {"title": "Romance"}
df = find_pandas_all(cluster.test2.BE_houses, None)

In [None]:
df.isna().sum().sort_values(ascending=True).iloc[:20]

In [None]:
# data from https://www.spotzi.com/en/data-catalog/categories/postal-codes/belgium/


def assign_province(value):
    if value is None:
        return None
    first_two_digits = int(value[:2])
    if 10 <= first_two_digits <= 12:
        return "Brussels"
    elif 13 <= first_two_digits <= 14:
        return "Walloon Brabant"
    elif (15 <= first_two_digits <= 19) or (30 <= first_two_digits <= 34):
        return "Flemish Brabant"
    elif 20 <= first_two_digits <= 29:
        return "Antwerp"
    elif 35 <= first_two_digits <= 39:
        return "Limburg"
    elif 40 <= first_two_digits <= 49:
        return "Liege"
    elif 50 <= first_two_digits <= 59:
        return "Namur"
    elif (60 <= first_two_digits <= 65) or (70 <= first_two_digits <= 79):
        return "Hainaut"
    elif 66 <= first_two_digits <= 69:
        return "Luxembourg"
    elif 80 <= first_two_digits <= 89:
        return "West Flanders"
    elif 90 <= first_two_digits <= 99:
        return "East Flanders"
    else:
        return None

In [None]:
provinces = pd.Series([assign_province(item) for item in df.zip_code])

df_province = (
    pd.concat([df, provinces], axis=1)
    .rename(columns={0: "province"})
    .loc[:, ["province", "list_price"]]
    .assign(list_price=lambda df: df.list_price.astype(float))
)
df_province

In [None]:
BE_provinces = requests.get(
    "https://raw.githubusercontent.com/mathiasleroy/Belgium-Geographic-Data/master/dist/polygons/be-provinces-unk-WGS84.geo.json"
).json()

fig = px.choropleth(
    df_province.groupby("province").mean().reset_index(),
    geojson=BE_provinces,
    locations="province",
    color="list_price",
    featureidkey="properties.name",
    projection="mercator",
    color_continuous_scale="Magenta",
    labels={"list_price": "List Price"},
    hover_data={"list_price": True, "province": True},
)

fig.update_geos(
    showcountries=True, showcoastlines=True, showland=True, fitbounds="locations"
)

# Add title and labels
fig.update_layout(
    title_text="Average House Prices by Province",
    autosize=False,
    width=800,
    height=600,
    geo=dict(showframe=False, showcoastlines=False, projection_type="mercator"),
)


fig.show()