![Photo by Stephen Phillips - Hostreviews.co.uk on UnSplash](https://cf.bstatic.com/xdata/images/hotel/max1024x768/408003083.jpg?k=c49b5c4a2346b3ab002b9d1b22dbfb596cee523b53abef2550d0c92d0faf2d8b&o=&hp=1){fig-align="center" width=50%}


# Import data

In [1]:
import json
import re
import time
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import requests
import seaborn as sns
import tqdm
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from lets_plot import *
from lets_plot.mapping import as_discrete
from requests_html import HTMLSession
from scipy import stats
# selenium 4
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager

LetsPlot.setup_html()

::: {.callout-note}
After taking a look at all three datasets, I think I will continue working with the `sauces` and `episodes` ones.
:::

## Functions to extract the data (not needing selenium)

In [60]:
def extract(type_, page):
    """
    Extracts and returns the BeautifulSoup object from a specified Immoweb search page.

    Args:
        type_ (str): The type of property to search for (e.g., "rent", "sale").
        page (int): The page number of the search results.

    Returns:
        BeautifulSoup: A BeautifulSoup object containing the parsed HTML content of the Immoweb search page.
    """
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36"
    }
    url = f"https://www.immoweb.be/en/search/house/for-{type_}?countries=BE&page={page}&orderBy=relevance"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.content, "html.parser")
    return soup

In [None]:
def transform(soup):
    """
    Extracts property information from a BeautifulSoup object and appends it to the hitlist.

    Args:
        soup (BeautifulSoup): The BeautifulSoup object containing the parsed HTML content.

    Returns:
        None
    """
    hitlist = []  # Initialize the hitlist within the function

    divs = soup.find_all("article", class_="card card--result card--xl")
    for item in divs:
        # get the type of the property
        title_value = item.find("h2").text.strip()

        # get the price
        price_tag = item.find("p", class_="card--result__price")
        price_attr_value = price_tag.find("iw-price")[":price"]
        price_json = json.loads(price_attr_value)
        price_value = price_json["mainValue"]

        try:
            # get the number of bedrooms
            bedroom_tag = item.find(
                "p",
                class_="card__information card--result__information card__information--property",
            )
            br_attr = bedroom_tag.find("iw-abbreviation")[":abbreviation"]
            br_value = int(re.search(r"\d+", br_attr).group())
        except:
            br_value = np.nan

        try:
            # get the square meter
            sqm_tag = item.find(
                "p",
                class_="card__information card--result__information card__information--property",
            ).text
            sqm_value = int(re.search(r"\d+", sqm_tag).group())
        except:
            sqm_value = np.nan

        # get the ZIP code and location
        ZIP_tag = item.find(
            "p",
            class_="card__information card--results__information--locality card__information--locality",
        ).text
        ZIP_value = int(re.search(r"\d+", ZIP_tag).group())

        location_value = re.sub(r"\b\d+\b", "", ZIP_tag).strip()
        website = item.find("a", href=True)["href"]

        data = {
            # 'time' : pd.Timestamp.now(),
            "title": title_value,
            "price": price_value,
            "ZIP": ZIP_value,
            "city": location_value,
            "bedroom": br_value,
            "surface": sqm_value,
            "website": website,
        }

        hitlist.append(data)

# New script

In [47]:
# https://github.com/psf/requests-html/issues/275#issuecomment-513992564
session = HTMLSession(
    browser_args=[
        "--no-sandbox",
        "--user-agent=Mozilla/5.0 (Windows NT 5.1; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
    ]
)
URL = "https://www.immoweb.be/en/search/house/for-rent?countries=BE&page=1&orderBy=relevance"
r = session.get(URL)
r.html.arender(sleep=1)

print(r.status_code)


ads = r.html.xpath(
    '//*[@id="searchResults"]/div[4]/div/div[1]/div[1]/div[1]', first=True
)

print(ads)

all_tables_from_given_page = []
for item in ads.absolute_links:
    try:
        r = session.get(item)
        r.html.arender(sleep=1)
        tables_from_add = pd.concat(pd.read_html(r.text))
        all_tables_from_given_page.append(tables_from_add)

    except AttributeError:
        pass


URL = "https://www.immoweb.be/en/classified/house/for-rent/woluwe-saint-lambert/1200/10757268"
session = HTMLSession(
    browser_args=[
        "--no-sandbox",
        "--user-agent=Mozilla/5.0 (Windows NT 5.1; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
    ]
)
r = session.get(URL)
r.html.render(sleep=1)
print(pd.read_html(r.text)[6])

RuntimeError: Cannot use HTMLSession within an existing event loop. Use AsyncHTMLSession instead.

In [51]:
ads

{'https://www.immoweb.be/en/classified/10786920',
 'https://www.immoweb.be/en/classified/exceptional-property/for-rent/ixelles/1050/10815451',
 'https://www.immoweb.be/en/classified/exceptional-property/for-rent/keerbergen/3140/10788298',
 'https://www.immoweb.be/en/classified/exceptional-property/for-rent/waterloo/1410/10612427',
 'https://www.immoweb.be/en/classified/exceptional-property/for-rent/woluwe-saint-pierre/1150/10822313',
 'https://www.immoweb.be/en/classified/house/for-rent/ixelles/1050/10821217',
 'https://www.immoweb.be/en/classified/house/for-rent/kraainem/1950/10802699',
 'https://www.immoweb.be/en/classified/house/for-rent/kraainem/1950/10818148',
 'https://www.immoweb.be/en/classified/house/for-rent/oppuurs/2880/10816362',
 'https://www.immoweb.be/en/classified/house/for-rent/overijse/3090/10803446',
 'https://www.immoweb.be/en/classified/house/for-rent/rhode-saint-genese/1640/10813232',
 'https://www.immoweb.be/en/classified/house/for-rent/rixensart/1330/10722833',


In [50]:
ads = {
    "https://www.immoweb.be/en/classified/house/for-rent/tervuren/3080/10795962",
    "https://www.immoweb.be/en/classified/house/for-rent/waterloo/1410/10403855",
    "https://www.immoweb.be/en/classified/house/for-rent/tervuren/3080/10820557",
    "https://www.immoweb.be/en/classified/house/for-rent/rixensart/1330/10818504",
    "https://www.immoweb.be/en/classified/house/for-rent/zaventem%20sterrebeek/1933/10792058",
    "https://www.immoweb.be/en/classified/house/for-rent/woluwe-saint-pierre/1150/10785377",
    "https://www.immoweb.be/en/classified/exceptional-property/for-rent/ixelles/1050/10815451",
    "https://www.immoweb.be/en/classified/mansion/for-rent/tervuren/3080/10799605",
    "https://www.immoweb.be/en/search/house/for-rent?countries=BE&page=110&orderBy=relevance",
    "https://www.immoweb.be/en/classified/10786920",
    "https://www.immoweb.be/en/classified/house/for-rent/kraainem/1950/10818148",
    "https://www.immoweb.be/en/classified/house/for-rent/waterloo/1410/10633743",
    "https://www.immoweb.be/en/classified/house/for-rent/woluwe-saint-lambert/1200/10810362",
    "https://www.immoweb.be/en/classified/exceptional-property/for-rent/woluwe-saint-pierre/1150/10822313",
    "https://www.immoweb.be/en/classified/house/for-rent/ixelles/1050/10821217",
    "https://www.immoweb.be/en/classified/house/for-rent/waterloo/1410/10597388",
    "https://www.immoweb.be/en/classified/house/for-rent/woluwe-saint-lambert/1200/10816194",
    "https://www.immoweb.be/en/classified/exceptional-property/for-rent/keerbergen/3140/10788298",
    "https://www.immoweb.be/en/search/house/for-rent?countries=BE&page=2&orderBy=relevance",
    "https://www.immoweb.be/en/classified/house/for-rent/kraainem/1950/10802699",
    "https://www.immoweb.be/en/classified/house/for-rent/woluwe-saint-pierre/1150/10802614",
    "https://www.immoweb.be/en/classified/house/for-rent/oppuurs/2880/10816362",
    "https://www.immoweb.be/en/classified/house/for-rent/woluwe-saint-lambert/1200/10763997",
    "https://www.immoweb.be/en/classified/house/for-rent/woluwe-saint-lambert/1200/10738911",
    "https://www.immoweb.be/en/search/house/for-rent?countries=BE&orderBy=relevance",
    "https://www.immoweb.be/en/classified/house/for-rent/rixensart/1330/10722833",
    "https://www.immoweb.be/en/classified/villa/for-rent/waterloo/1410/10809750",
    "https://www.immoweb.be/en/classified/town-house/for-rent/kraainem/1950/10737139",
    "https://www.immoweb.be/en/classified/house/for-rent/rhode-saint-genese/1640/10813232",
    "https://www.immoweb.be/en/classified/house/for-rent/woluwe-saint-lambert/1200/10757268",
    "https://www.immoweb.be/en/classified/house/for-rent/overijse/3090/10803446",
    "https://www.immoweb.be/en/classified/exceptional-property/for-rent/waterloo/1410/10612427",
    "https://www.immoweb.be/en/classified/villa/for-rent/tervuren/3080/10818145",
    "https://www.immoweb.be/en/classified/house/for-rent/woluwe-saint-pierre/1150/10800984",
}

In [189]:
# https://github.com/psf/requests-html/issues/275#issuecomment-513992564

session = HTMLSession(
    browser_args=[
        "--no-sandbox",
        "--user-agent=Mozilla/5.0 (Windows NT 5.1; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
    ]
)
URL = "https://www.immoweb.be/en/search/house/for-rent?countries=BE&page=1&orderBy=relevance"
r = session.get(URL)
r.html.arender(sleep=1)


# ads = r.html.xpath('//*[@id="searchResults"]/div[4]/div/div[1]/div[1]/div[1]', first=True)

all_tables_from_given_page = []
for item in list(ads):
    try:
        r = session.get(item)

        tables_from_ad = pd.concat(pd.read_html(r.text)).dropna().set_index(0)
        tables_from_ad.loc["day_of_retrieval", 1] = pd.Timestamp.now()
        tables_from_ad.loc["ad_url", 1] = item

        all_tables_from_given_page.append(tables_from_ad)
    except:
        pass
dfs = [
    df.rename(columns={1: f"source_{i}"})
    for i, df in enumerate(all_tables_from_given_page)
]
dfs[0].join(dfs[1:])

  r.html.arender(sleep=1)


Unnamed: 0_level_0,source_0,source_1,source_2,source_3,source_4,source_5,source_6,source_7,source_8,source_9,...,source_21,source_22,source_23,source_24,source_25,source_26,source_27,source_28,source_29,source_30
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Accessible for disabled people,No,No,,,No,,Yes,No,No,,...,No,,,No,,No,No,,,No
Address,"Avenue des Cerisiers, 95 1200 - Woluwe-St-Lam...",drève richelle 96 1410 - Waterloo,Avenue de L'Armée 41 1040 - Etterbeek,Bld. Brand Whitlock 108 1200 - Woluwe-Saint-L...,Avenue de Mérode 16B 1330 - Rixensart,Hoogstraat 5 2800 - Mechelen,"Avenue des Cerisiers, 212 1200 - Woluwe-St-La...",Avenue Fond Jean Rosy 17A 1330 - Rixensart,"Val des Seigneurs, 9a 1150 - Woluwe-St-Pierre","Avenue des Cerisiers, 212 1200 - Woluwe-St-La...",...,Avenue de l’Etoile Polaire 1410 - Waterloo,Avenue Louise 200 1050 - Ixelles,"Val des Seigneurs, 9a 1150 - Woluwe-St-Pierre",Avenue Fond Jean Rosy 17A 1330 - Rixensart,Brusselsesteenweg 415 3090 - Overijse,Avenue de l’Etoile Polaire 1410 - Waterloo,Avenue de l’Etoile Polaire 1410 - Waterloo,Boulevard de la Cambre 42 1000 - Bruxelles,"Avenue des Cerisiers, 95 1200 - Woluwe-St-Lam...",Avenue Louise 589 1050 - Ixelles
Available as of,Depending on the tenant,Immediately,Depending on the tenant,Immediately,Depending on the tenant,Depending on the tenant,Immediately,To be defined,Immediately,Depending on the tenant,...,Depending on the tenant,,Immediately,Immediately,Immediately,Depending on the tenant,Immediately,,At delivery,To be defined
Available date,November 1 2023 - 12:00 AM,July 15 2023 - 12:00 AM,August 31 2023 - 12:00 AM,September 5 2023 - 12:00 AM,October 1 2023 - 12:00 AM,October 1 2023 - 12:00 AM,September 1 2023 - 12:00 AM,October 1 2023 - 12:00 AM,October 1 2023 - 12:00 AM,October 1 2023 - 12:00 AM,...,August 1 2023 - 12:00 AM,,October 1 2023 - 12:00 AM,October 1 2023 - 12:00 AM,April 1 2019 - 12:00 AM,August 1 2023 - 12:00 AM,June 1 2023 - 12:00 AM,October 30 2023 - 12:00 AM,October 5 2023 - 12:00 AM,October 1 2023 - 12:00 AM
Basement,Yes,,,,Yes,,Yes,,Yes,Yes,...,Yes,,,,,Yes,,,,Yes
Bathrooms,2,3,4,2,1,1,2,2,2,3,...,4,,1,8,2,6,3,2,2,2
Bedroom 1 surface,17 m² square meters,33 m² square meters,21 m² square meters,20 m² square meters,13 m² square meters,13 m² square meters,30 m² square meters,15 m² square meters,20 m² square meters,20 m² square meters,...,15 m² square meters,,16 m² square meters,8 m² square meters,8 m² square meters,15 m² square meters,35 m² square meters,20 m² square meters,15 m² square meters,20 m² square meters
Bedroom 2 surface,18 m² square meters,22 m² square meters,14 m² square meters,20 m² square meters,12 m² square meters,12 m² square meters,18 m² square meters,24 m² square meters,16 m² square meters,18 m² square meters,...,12 m² square meters,,12 m² square meters,8 m² square meters,20 m² square meters,20 m² square meters,9 m² square meters,20 m² square meters,15 m² square meters,20 m² square meters
Bedroom 3 surface,18 m² square meters,21 m² square meters,13 m² square meters,18 m² square meters,20 m² square meters,9 m² square meters,15 m² square meters,16 m² square meters,13 m² square meters,16 m² square meters,...,15 m² square meters,,8 m² square meters,16 m² square meters,30 m² square meters,15 m² square meters,19 m² square meters,20 m² square meters,14 m² square meters,23 m² square meters
Bedrooms,3,4,4,5,3,3,4,5,5,5,...,4,,3,8,4,7,4,5,4,3


In [243]:
dfs_on_disk = []
for i in Path.cwd().glob("*.csv"):
    temp = pd.read_csv(i)
    dfs_on_disk.append(temp)

In [275]:
columns_to_keep = (
    pd.concat(dfs_on_disk, axis=0)
    .loc[:, lambda df: ~df.columns.str.contains("Armored")]
    .isna()
    .sum()
    .div(1458)
    .mul(100)
    .sort_values()
    .head(30)
    .index.to_list()
)

In [280]:
(
    pd.concat(dfs_on_disk, axis=0)
    .loc[:, lambda df: df.columns.isin(columns_to_keep)]
    .ad_url.unique()
    .shape
)

(1426,)

In [46]:
URL = "https://www.immoweb.be/en/classified/house/for-rent/zaventem%20sterrebeek/1933/10792058"
session = HTMLSession(
    browser_args=[
        "--no-sandbox",
        "--user-agent=Mozilla/5.0 (Windows NT 5.1; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
    ]
)
r = session.get(URL)
r.html.arender(sleep=1)
pd.read_html(r.text)[6]

  r.html.arender(sleep=1)


Unnamed: 0,0,1
0,Address,Bld. Brand Whitlock 108 1200 - Woluwe-Saint-L...
1,External reference,5512777


In [40]:
dfs = [
    add1,
    add2,
]
dfs = [df.set_index(0) for df in dfs]
dfs[0].join(dfs[1:])

Unnamed: 0_level_0,1_x,1_y
0,Unnamed: 1_level_1,Unnamed: 2_level_1
Accessible for disabled people,No,
Address,"Avenue des Cerisiers, 212 1200 - Woluwe-St-La...",Bld. Brand Whitlock 108 1200 - Woluwe-Saint-L...
Attic,Yes,
Available as of,Immediately,Immediately
Available date,September 1 2023 - 12:00 AM,September 5 2023 - 12:00 AM
Basement,Yes,
Bathrooms,2,2
Bedroom 1 surface,15 m² square meters,20 m² square meters
Bedroom 2 surface,15 m² square meters,20 m² square meters
Bedroom 3 surface,30 m² square meters,18 m² square meters


In [36]:
add1

Unnamed: 0,0,1
0,Available as of,Immediately
1,Available date,September 1 2023 - 12:00 AM
2,Neighbourhood or locality,Bruxelles (19 communes)
3,Construction year,1950
4,Building condition,As new
5,Street frontage width,6 m
6,Number of frontages,2
7,Covered parking spaces,1
8,Outdoor parking spaces,1
9,Surroundings type,Isolated


## Functions to extract the last page number, selenium is needed

In [5]:
# this one needs selenium


def find_last_page(soup):
    divs = int(soup.find_all("span", class_="button__label")[-1].text)
    return divs

In [6]:
def extract_selenium(type_, page):
    """
    Extracts and returns the BeautifulSoup object from a specified Immoweb search page.

    Args:
        type_ (str): The type of property to search for (e.g., "rent", "sale").
        page (int): The page number of the search results.

    Returns:
        BeautifulSoup: A BeautifulSoup object containing the parsed HTML content of the Immoweb search page.
    """
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36"
    }
    url = f"https://www.immoweb.be/en/search/house/for-{type_}?countries=BE&page={page}&orderBy=relevance"
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

    driver.get(url)
    soup = BeautifulSoup(driver.page_source, "lxml")
    return soup

First we need to find the total number of pages to scrape, for this we will use selenium. Once we have that we do not really need it to scrape the website contents.

In [7]:
%%script echo skipping

rent_last_page=find_last_page(extract_selenium('rent',1)) + 1

Couldn't find program: 'echo'


In [8]:
%%script echo skipping

buy_last_page=find_last_page(extract_selenium('buy',1)) + 1

Couldn't find program: 'echo'


In [13]:
%%script echo skipping

hitlist = []

for i in tqdm.tqdm(range(1, buy_last_page)):
    results = extract(type_ = 'sale',page = i)
    transform(results)
    time.sleep(2)
     
(pd.DataFrame(hitlist)
 .to_parquet('for_sale.parquet.gzip')
)

Couldn't find program: 'echo'


In [47]:
%%script echo skipping
hitlist = []

for i in tqdm.tqdm(range(1, 5)):
    results = extract(type_ = 'rent',page = i)
    transform(results)
    time.sleep(2)
     
(pd.DataFrame(hitlist)
 .to_parquet('for_rent.parquet.gzip')
)

100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:12<00:00,  3.16s/it]


In [53]:
df_sale = pd.read_parquet("for_sale.parquet.gzip")
df_rent = pd.read_parquet("for_rent.parquet.gzip")

## Getting the coordinates using GeoPy

Since we managed to obtain the ZIP codes and locations of the rental properties, we could try to geolocate these apartments and later on plot them based on their addresses. 

**NOTE: the addresses we will obtain in the following steps represent approximate locations, based on the city and ZIP code information displayed within the ads.** They are in no way accurate, however, they do allow us to observe some spatial trends when in comes to apartment prices.

The library we will use next is the [GeoPy library](https://geopy.readthedocs.io/en/stable/). GeoPy makes it easy for Python developers to locate the coordinates of addresses, cities, countries, and landmarks across the globe using third-party geocoders and other data sources.

In [23]:
%%script echo skipping

locations = []

unique_apartment_locations = (df_rent['ZIP'].astype(str) + ',' + df_rent['city']).unique()

geolocator = Nominatim(user_agent="myApp")

for idx, element in tqdm.tqdm(enumerate(unique_apartment_locations)):
    try:
        location = geolocator.geocode(unique_apartment_locations[idx])
        case = {
            'latitude' : location.latitude,
            'longitude' : location.longitude,
            'address' : location.address
        }

        locations.append(case)
    
    except AttributeError:
        location = geolocator.geocode(unique_apartment_locations[idx])
        case = {
            'latitude' : np.nan,
            'longitude' : np.nan,
            'address' : np.nan
        }

        locations.append(case)
pd.DataFrame(locations).to_parquet('locations_for_rent.parquet.gzip', compression='gzip') #saving coordinates to disk

458it [03:59,  1.91it/s]


In [24]:
locations = pd.read_parquet(
    "locations_for_rent.parquet.gzip"
)  # reading back the saved locations data
locations

Unnamed: 0,latitude,longitude,address
0,50.843045,4.425673,"Woluwe-Saint-Lambert - Sint-Lambrechts-Woluwe,..."
1,50.842570,4.469203,"Kraainem, Halle-Vilvoorde, Vlaams-Brabant, Vla..."
2,50.859132,4.517437,"Sterrebeek, Zaventem, Halle-Vilvoorde, Vlaams-..."
3,50.715373,4.396367,"Waterloo, Nivelles, Brabant wallon, Wallonie, ..."
4,50.837025,4.427464,"Woluwe-Saint-Pierre - Sint-Pieters-Woluwe, Bru..."
...,...,...,...
453,50.748109,4.276727,"Huizingen, Beersel, Halle-Vilvoorde, Vlaams-Br..."
454,50.719210,4.391897,"Waterloo, Nivelles, Brabant wallon, Wallonie, ..."
455,50.759848,4.157699,"Pepingen, Halle-Vilvoorde, Vlaams-Brabant, Vla..."
456,50.781184,4.245219,"Sint-Pieters-Leeuw, Halle-Vilvoorde, Vlaams-Br..."


In [43]:
locations2 = pd.concat(
    [locations, pd.Series(unique_apartment_locations, name="unique_address")], axis=1
)

In [44]:
(
    df_rent.assign(
        ZIP_city=lambda df: df["ZIP"].astype(str) + "," + df["city"],
        full_address=lambda df: df.ZIP_city.map(
            locations2.set_index("unique_address").address.to_dict()
        ),
        latitude=lambda df: df.ZIP_city.map(
            locations2.set_index("unique_address").latitude.to_dict()
        ),
        longitude=lambda df: df.ZIP_city.map(
            locations2.set_index("unique_address").longitude.to_dict()
        ),
    )
)

Unnamed: 0,title,price,ZIP,city,bedroom,surface,ZIP_city,full_address,latitude,longitude
0,House,2500,1200,Woluwe-Saint-Lambert,4.0,216.0,"1200,Woluwe-Saint-Lambert","Woluwe-Saint-Lambert - Sint-Lambrechts-Woluwe,...",50.843045,4.425673
1,House,2450,1950,Kraainem,4.0,125.0,"1950,Kraainem","Kraainem, Halle-Vilvoorde, Vlaams-Brabant, Vla...",50.842570,4.469203
2,House,2500,1933,Zaventem Sterrebeek,5.0,270.0,"1933,Zaventem Sterrebeek","Sterrebeek, Zaventem, Halle-Vilvoorde, Vlaams-...",50.859132,4.517437
3,House,2350,1410,Waterloo,5.0,250.0,"1410,Waterloo","Waterloo, Nivelles, Brabant wallon, Wallonie, ...",50.715373,4.396367
4,House,2700,1150,Woluwe-Saint-Pierre,3.0,160.0,"1150,Woluwe-Saint-Pierre","Woluwe-Saint-Pierre - Sint-Pieters-Woluwe, Bru...",50.837025,4.427464
...,...,...,...,...,...,...,...,...,...,...
1218,Mixed-use building,2500,1600,SINT-PIETERS-LEEUW,,405.0,"1600,SINT-PIETERS-LEEUW","Sint-Pieters-Leeuw, Halle-Vilvoorde, Vlaams-Br...",50.781184,4.245219
1219,Mixed-use building,2500,1620,Drogenbos,,405.0,"1620,Drogenbos","Drogenbos, Halle-Vilvoorde, Vlaams-Brabant, Vl...",50.786532,4.317354
1220,House,1100,7300,Boussu,1.0,,"7300,Boussu","Boussu, Mons, Hainaut, Wallonie, 7300, België ...",50.427474,3.794269
1221,House,950,8460,Oudenburg,2.0,,"8460,Oudenburg","Oudenburg, Oostende, West-Vlaanderen, Vlaander...",51.184188,3.004941


In [12]:
# | fig-cap: "Most Commonly Used Terms in Spam Messages"
# | label: fig-fig1