In [2]:
import pandas as pd
import numpy as np
import folium
from sklearn.cluster import DBSCAN

In [23]:
# Load the dataset
data = pd.read_csv("../data/interventions_bxl.parquet.csv").drop_duplicates()

# For Permanence Locations
data['latitude_permanence']= data['latitude_permanence'].apply(lambda x: float(str(x)[:2] + '.' + str(x)[2:]))
data['longitude_permanence']= data['longitude_permanence'].apply(lambda x: float(str(x)[:1] + '.' + str(x)[1:]))

# For Intervention Locations
data['latitude_intervention']= data['latitude_intervention'].astype(int).apply(lambda x: float(str(x)[:2] + '.' + str(x)[2:]))
data['longitude_intervention']= data['longitude_intervention'].astype(int).apply(lambda x: float(str(x)[:1] + '.' + str(x)[1:]))


# Filter the dataset to only include relevant columns
relevant_data = data[['latitude_permanence', 'longitude_permanence', 
                      'latitude_intervention', 'longitude_intervention', 
                      'vector_type', 'waiting_time']].dropna()


aed_bxl = pd.read_csv('C:/Users/souna/Desktop/x/Datathon-2024-From-Bystander-To-Hero/data/aed_bxl.parquet.csv')


  data = pd.read_csv("../data/interventions_bxl.parquet.csv").drop_duplicates()


In [24]:
# Concatenate permanence and intervention locations horizontally
locations = pd.concat([relevant_data[['latitude_permanence', 'longitude_permanence']],
                       relevant_data[['latitude_intervention', 'longitude_intervention']]], axis=1)

# Convert DataFrame to numpy array
X = locations.values

In [53]:
# Compute epsilon using a heuristic based on the distance between points
min_pts = 5  # Minimum number of points in a cluster
epsilon = np.sqrt(2) * np.std(X)  # Heuristic for epsilon

# Apply DBSCAN clustering algorithm
dbscan = DBSCAN(eps=0.0001, min_samples=10, metric='euclidean').fit(X)

# Get the cluster labels
labels = dbscan.labels_

# Find the optimal AED locations as the centroids of the clusters with the lowest waiting time
unique_labels = np.unique(labels)
aed_locations = []
for label in unique_labels:
    if label != -1:  # Ignore noise points
        cluster_points = X[labels == label]
        cluster_waiting_times = relevant_data.iloc[labels == label]['waiting_time']
        weighted_waiting_time = np.sum(cluster_waiting_times) / len(cluster_waiting_times)
        centroid = np.mean(cluster_points, axis=0)
        aed_locations.append((centroid, weighted_waiting_time))

# Sort AED locations based on weighted waiting time
aed_locations.sort(key=lambda x: x[1])

# Center coordinates for Brussels
brussels_center = (50.8503, 4.3517)

# Initialize the map
m = folium.Map(location=brussels_center)

# Add markers for AED locations with custom icon
unique_locations = set()  # Keep track of unique locations added
locations_added = 0  # Track the number of unique locations added
for locations, _ in aed_locations:
    for i in range(0, len(locations), 2):
        lat, lon = locations[i], locations[i + 1]
        if (lat, lon) in zip(aed_bxl['latitude'], aed_bxl['longitude']):
            print(f"Skipping location {lat}, {lon} as it's already in the dataframe")
            continue  # Skip this location if it's already present in the dataframe
        if (lat, lon) in unique_locations:
            continue  # Skip this location if it's already added
        folium.Marker(location=(lat, lon), popup=f"AED Location {locations_added + 1}",
                      icon=folium.CustomIcon('../images/Aed_logo.jpg', icon_size=(32, 32))).add_to(m)
        unique_locations.add((lat, lon))
        locations_added += 1
        if locations_added == 5:  # Stop after adding 5 unique locations
            break
    if locations_added == 5:  # Stop outer loop after adding 5 unique locations
        break


# Save the map to an HTML file
m.save("dbscan_aed_locations_map.html")