In [44]:
import pandas as pd
import numpy as np
import folium
from sklearn.cluster import KMeans
import os

In [45]:
default_n_threads = 8
os.environ['OPENBLAS_NUM_THREADS'] = f"{default_n_threads}"
os.environ['MKL_NUM_THREADS'] = f"{default_n_threads}"
os.environ['OMP_NUM_THREADS'] = f"{default_n_threads}"

In [46]:
#Display
pd.options.display.max_columns = None
pd.options.display.min_rows = 500

In [47]:
# Load the dataset
data = pd.read_csv("../data/interventions_bxl.parquet.csv")

In [48]:
bxldf = data[['latitude_intervention', 'longitude_intervention', 'waiting_time', 'abandon_reason']]
bxldf = bxldf.loc[bxldf['abandon_reason'].isna()]
bxldf = bxldf.loc[bxldf['waiting_time'].notna()]
bxldf = bxldf.drop('abandon_reason', axis=1)
bxldf

Unnamed: 0,latitude_intervention,longitude_intervention,waiting_time
1,5085139.0,436918.00,18.0
5,508561.0,443169.00,10.0
6,508561.0,443169.00,29.0
7,5084039.0,446537.00,42.0
12,5080364.0,439922.00,9.0
14,5086305.0,432287.00,13.0
15,5084889.0,435262.00,21.0
18,5088931.0,433315.00,13.0
20,5082904.0,437555.00,19.0
21,5083794.0,447403.00,63.0


In [49]:
coordinates = bxldf[['longitude_intervention', 'latitude_intervention', 'waiting_time']]

In [50]:
def fix_latitude(df, column_name):
    df[column_name] = df[column_name].astype(int)
    df[column_name] = df[column_name].astype(str).apply(lambda x: float(x[:2] + '.' + x[2:]))

def fix_longitude(df, column_name):
    df[column_name] = df[column_name].astype(int)
    df[column_name] = df[column_name].astype(str).apply(lambda x: float(x[:1] + '.' + x[1:]))

In [51]:
fix_latitude(coordinates, 'latitude_intervention')
fix_longitude(coordinates, 'longitude_intervention')

coordinates

Unnamed: 0,longitude_intervention,latitude_intervention,waiting_time
1,4.36918,50.85139,18.0
5,4.43169,50.85610,10.0
6,4.43169,50.85610,29.0
7,4.46537,50.84039,42.0
12,4.39922,50.80364,9.0
14,4.32287,50.86305,13.0
15,4.35262,50.84889,21.0
18,4.33315,50.88931,13.0
20,4.37555,50.82904,19.0
21,4.47403,50.83794,63.0


In [52]:
# Print information about the coordinates before fitting the model
print("Number of rows with missing coordinates:", coordinates.isnull().any(axis=1).sum())
print("Number of unique coordinates:", coordinates.nunique())

Number of rows with missing coordinates: 0
Number of unique coordinates: longitude_intervention    14158
latitude_intervention     10715
waiting_time                210
dtype: int64


In [10]:
coordinates

Unnamed: 0,longitude_intervention,latitude_intervention,cluster
1,4.36918,50.85139,3
5,4.43169,50.85610,2
6,4.43169,50.85610,2
7,4.46537,50.84039,2
12,4.39922,50.80364,2
...,...,...,...
115634,4.36009,50.85465,3
115639,4.35040,50.84644,3
115641,4.35254,50.90470,3
115645,4.30780,50.83525,0


In [32]:
sorted_long = coordinates.sort_values(by='longitude_intervention')
sorted_long

Unnamed: 0,longitude_intervention,latitude_intervention,cluster
76097,3.39538,50.59788,23
75028,4.00000,50.84358,8
54954,4.00000,50.84255,8
107822,4.00000,50.83181,8
22715,4.00000,50.82983,8
28363,4.00000,50.83181,8
98666,4.00000,50.84868,8
49264,4.00000,50.85437,8
95538,4.00000,50.84054,8
50249,4.00000,50.83930,8


In [26]:
sorted_lat = coordinates.sort_values(by='latitude_intervention')
sorted_lat

Unnamed: 0,longitude_intervention,latitude_intervention,cluster
102855,4.39393,50.00000,154
18272,4.38579,50.00000,154
81076,4.33414,50.00000,1
80770,4.28542,50.00000,139
80769,4.28542,50.00000,139
22732,4.45272,50.00000,63
18271,4.38579,50.00000,154
88322,4.40484,50.00000,154
68502,4.29002,50.00000,139
68501,4.29002,50.00000,139


In [53]:
coordinates = coordinates.loc[coordinates['latitude_intervention'] > 50]
coordinates = coordinates.loc[(coordinates['longitude_intervention'] > 4) & (coordinates['longitude_intervention'] <5)]
coordinates

Unnamed: 0,longitude_intervention,latitude_intervention,waiting_time
1,4.36918,50.85139,18.0
5,4.43169,50.85610,10.0
6,4.43169,50.85610,29.0
7,4.46537,50.84039,42.0
12,4.39922,50.80364,9.0
14,4.32287,50.86305,13.0
15,4.35262,50.84889,21.0
18,4.33315,50.88931,13.0
20,4.37555,50.82904,19.0
21,4.47403,50.83794,63.0


In [54]:
sorted_latclean = coordinates.sort_values(by='latitude_intervention')
sorted_latclean

Unnamed: 0,longitude_intervention,latitude_intervention,waiting_time
46429,4.64492,50.62683,28.0
52988,4.64665,50.62741,20.0
74247,4.30167,50.68824,18.0
29348,4.61981,50.69938,12.0
38593,4.41628,50.71448,22.0
47752,4.38590,50.71498,73.0
40038,4.39823,50.71660,20.0
77527,4.40572,50.71690,19.0
32094,4.39762,50.71700,13.0
56536,4.39304,50.71896,45.0


In [41]:
file_path = "../data/aed_locations.parquet.csv"
aeddf = pd.read_csv(file_path)
aeddf

Unnamed: 0,id,type,address,number,postal_code,municipality,province,location,public,available,hours
0,13.0,,Blvd. Fr. Roosevelt,24.0,7060.0,SOIGNIES,Hainaut,,Y,,
1,70.0,,Ch. De Wégimont,76.0,4630.0,Ayeneux,Liège,,,,
2,71.0,,Place Saint - Lambert,,4020.0,Liège,Liège,,,,
3,72.0,,Rue du Doyard,,4990.0,Lierneux,Liège,,,,
4,73.0,,Fond Saint Servais,,4000.0,Liège,Liège,,,,
5,74.0,,Rue des Prémontrès,12.0,4020.0,Liège,Liège,,,,
6,75.0,,Route de Bastogne,1.0,4920.0,Harzé,Liège,,,,
7,76.0,,Rue du Parc,1.0,4540.0,Jehay,Liège,,,,
8,77.0,,Blvd. De la Constitution,19.0,4020.0,Liège,Liège,,,,
9,78.0,,Place de la République française,,4000.0,Liège,Liège,,,,


In [None]:
aeddf = aeddf.loc[aeddf['available'] != "Non-Nee"]
aeddf = aeddf.loc[aeddf['public'] != "Non-Nee"]
aedbxl = aeddf.loc[aeddf['province'] == "Bruxelles-Brussel"]
aedbxl

#There are 1608 aeds in brussels. Now we gotta join with the aed coordinates on id

In [43]:
file_path = "../data/aed_bxl.parquet.csv"
aedcoord = pd.read_csv(file_path)
aedcoord
#It's full of missings, gotta be kidding me

Unnamed: 0,id,type,address,number,postal_code,municipality,province,location,public,available,hours,latitude,longitude
0,86.0,,Schumanplein,11.0,1040.0,Brussel,Bruxelles-Brussel,,,,,,
1,96.0,,Graafschap - Jettelaan,2.0,1090.0,Brussel,Bruxelles-Brussel,,,,,50.883014,4.333189
2,97.0,,Sint-Pieterskerstraat,1.0,1090.0,Brussel,Bruxelles-Brussel,,,,,,
3,98.0,,Wemmelsestweg,100.0,1090.0,Brussel,Bruxelles-Brussel,,,,,,
4,101.0,,Tweestationsstraat,80.0,1070.0,Brussel,Bruxelles-Brussel,,,,,,
5,214.0,,Rue de l église,1.0,1150.0,Bruxelles,Bruxelles-Brussel,,,,,50.875586,4.353907
6,315.0,,Wetstraat,23.0,1040.0,Brussel,Bruxelles-Brussel,,,,,,
7,335.0,,Chaussée de Haecht,1405.0,1130.0,Bruxelles,Bruxelles-Brussel,,,,,,
8,354.0,,Bld Leopold II,44.0,1080.0,Bruxelles,Bruxelles-Brussel,,,,,,
9,355.0,,Adolphe Lavallée,1.0,1080.0,Bruxelles,Bruxelles-Brussel,,,,,50.858666,4.343933


In [38]:
def create_cluster_map(coordinates, num_clusters, icon_path, map_save_path):
    # Fit KMeans model
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    coordinates['cluster'] = kmeans.fit_predict(coordinates[['latitude_intervention', 'longitude_intervention']])

    # Find the optimal locations as the centroids of the clusters with the lowest waiting time
    cluster_centers = []

    for label in range(num_clusters):
        cluster_points = coordinates[coordinates['cluster'] == label]
        centroid = cluster_points[['latitude_intervention', 'longitude_intervention']].mean().tolist()
        cluster_centers.append((centroid, label))

    # Sort cluster centers based on cluster label
    cluster_centers.sort(key=lambda x: x[1])

    # Center coordinates for Brussels
    brussels_center = (50.8503, 4.3517)

    # Initialize the map
    m = folium.Map(location=brussels_center, zoom_start=12)

    # Add markers for cluster centers with custom icon
    for i, (location, _) in enumerate(cluster_centers):
        folium.Marker(location=(location[0], location[1]), popup=f"Cluster Center {i+1}",
                      icon=folium.CustomIcon(icon_path, icon_size=(32, 32))).add_to(m)

    # Save the map to an HTML file
    m.save(map_save_path)


In [57]:
icon_path = '../assets/Aed_logo.jpg'
map_save_path = 'cluster_centers_map.html'
create_cluster_map(coordinates, 100, icon_path, map_save_path)

In [58]:
#Now try with waiting time as a covariate
def create_cluster_map(coordinates, num_clusters, icon_path, map_save_path):
    # Fit KMeans model
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    coordinates['cluster'] = kmeans.fit_predict(coordinates[['latitude_intervention', 'longitude_intervention', 'waiting_time']])

    # Find the optimal locations as the centroids of the clusters with the lowest waiting time
    cluster_centers = []

    for label in range(num_clusters):
        cluster_points = coordinates[coordinates['cluster'] == label]
        centroid = cluster_points[['latitude_intervention', 'longitude_intervention', 'waiting_time']].mean().tolist()
        cluster_centers.append((centroid, label))

    # Sort cluster centers based on cluster label
    cluster_centers.sort(key=lambda x: x[1])

    # Center coordinates for Brussels
    brussels_center = (50.8503, 4.3517)

    # Initialize the map
    m = folium.Map(location=brussels_center, zoom_start=12)

    # Add markers for cluster centers with custom icon
    for i, (location, _) in enumerate(cluster_centers):
        folium.Marker(location=(location[0], location[1]), popup=f"Cluster Center {i+1}",
                      icon=folium.CustomIcon(icon_path, icon_size=(32, 32))).add_to(m)

    # Save the map to an HTML file
    m.save(map_save_path)

In [59]:
icon_path = '../assets/Aed_logo.jpg'
map_save_path = 'cluster_centersadjusted_map.html'
create_cluster_map(coordinates, 100, icon_path, map_save_path)
#this does not change the results lol