In [14]:
import pandas as pd
import numpy as np
import folium
from sklearn.cluster import DBSCAN

In [24]:
# Load the dataset
data = pd.read_csv("data/interventions_bxl.parquet.csv").drop_duplicates()

# For Permanence Locations
data['latitude_permanence']= data['latitude_permanence'].apply(lambda x: float(str(x)[:2] + '.' + str(x)[2:]))
data['longitude_permanence']= data['longitude_permanence'].apply(lambda x: float(str(x)[:1] + '.' + str(x)[1:]))

# For Intervention Locations
data['latitude_intervention']= data['latitude_intervention'].astype(int).apply(lambda x: float(str(x)[:2] + '.' + str(x)[2:]))
data['longitude_intervention']= data['longitude_intervention'].astype(int).apply(lambda x: float(str(x)[:1] + '.' + str(x)[1:]))


# Filter the dataset to only include relevant columns
relevant_data = data[['latitude_permanence', 'longitude_permanence', 
                      'latitude_intervention', 'longitude_intervention', 
                      'vector_type', 'waiting_time']].dropna().sample(2000)


  data = pd.read_csv("data/interventions_bxl.parquet.csv").drop_duplicates()


In [25]:
# Concatenate permanence and intervention locations horizontally
locations = pd.concat([relevant_data[['latitude_permanence', 'longitude_permanence']],
                       relevant_data[['latitude_intervention', 'longitude_intervention']]], axis=1)

# Convert DataFrame to numpy array
X = locations.values


In [37]:
# Compute epsilon using a heuristic based on the distance between points
min_pts = 5  # Minimum number of points in a cluster
epsilon = np.sqrt(2) * np.std(X)  # Heuristic for epsilon

# Apply DBSCAN clustering algorithm
dbscan = DBSCAN(eps=0.01, min_samples=10, metric='euclidean').fit(X)

# Get the cluster labels
labels = dbscan.labels_

# Find the optimal AED locations as the centroids of the clusters with the lowest waiting time
unique_labels = np.unique(labels)
aed_locations = []
for label in unique_labels:
    if label != -1:  # Ignore noise points
        cluster_points = X[labels == label]
        cluster_waiting_times = relevant_data.iloc[labels == label]['waiting_time']
        weighted_waiting_time = np.sum(cluster_waiting_times) / len(cluster_waiting_times)
        centroid = np.mean(cluster_points, axis=0)
        aed_locations.append((centroid, weighted_waiting_time))

# Sort AED locations based on weighted waiting time
aed_locations.sort(key=lambda x: x[1])

# Center coordinates for Brussels
brussels_center = (50.8503, 4.3517)

# Initialize the map
m = folium.Map(location=brussels_center, zoom_start=12)

# Add markers for AED locations with custom icon
for i, (location, _) in enumerate(aed_locations[:5]):
    folium.Marker(location=(location[0], location[1]), popup=f"AED Location {i+1}",
                  icon=folium.CustomIcon('images/Aed_logo.jpg',icon_size=(32, 32))).add_to(m)

# Save the map to an HTML file
m.save("aed_locations_map.html")

In [31]:
print("Unique Cluster Labels:", unique_labels)
print("AED Locations:", aed_locations)

Unique Cluster Labels: [0]
AED Locations: [(array([50.84569665,  4.3615751 , 50.84582672,  4.35867038]), 17.835)]
