In [2]:
#Libraries
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import matplotlib.pyplot as plt
from deep_translator import GoogleTranslator
from datetime import datetime

In [47]:
#Load data
aed_locations=pd.read_parquet('aed_locations.parquet.gzip', engine='pyarrow')
ambulance_locations=pd.read_parquet('ambulance_locations.parquet.gzip', engine='pyarrow')
cad9=pd.read_parquet('cad9.parquet.gzip', engine='pyarrow')

interventions1=pd.read_parquet('interventions1.parquet', engine='pyarrow')
interventions2=pd.read_parquet('interventions2.parquet', engine='pyarrow')
interventions3=pd.read_parquet('interventions3.parquet', engine='pyarrow')

interventions_bxl=pd.read_parquet('interventions_bxl.parquet.gzip', engine='pyarrow')
interventions_bxl2=pd.read_parquet('interventions_bxl2.parquet.gzip', engine='pyarrow')

mug_locations=pd.read_parquet('mug_locations.parquet.gzip', engine='pyarrow')
pit_locations=pd.read_parquet('pit_locations.parquet.gzip', engine='pyarrow')

In [157]:
aed_locations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15227 entries, 0 to 15226
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            15225 non-null  float64
 1   type          5167 non-null   object 
 2   address       15225 non-null  object 
 3   number        13085 non-null  float64
 4   postal_code   15193 non-null  float64
 5   municipality  15142 non-null  object 
 6   province      15190 non-null  object 
 7   location      8423 non-null   object 
 8   public        8079 non-null   object 
 9   available     4290 non-null   object 
 10  hours         1077 non-null   object 
dtypes: float64(3), object(8)
memory usage: 1.3+ MB


# Convert to same datetime variable T0

In [144]:
# Convert the 'T0' column to datetime format
datetime_format = "%Y-%m-%d %H:%M:%S.%f"  # Specify the format of your datetime string
cad9['T0'] = pd.to_datetime(cad9['T0'], format=datetime_format)
cad9['T3'] = pd.to_datetime(cad9['T3'], format=datetime_format)

# Define the format string to match the datetime string
datetime_format2 = "%d%b%y:%H:%M:%S"
interventions1['T0'] = pd.to_datetime(interventions1['T0'], format=datetime_format2)
interventions2['T0'] = pd.to_datetime(interventions2['T0'], format=datetime_format2)
interventions3['T0'] = pd.to_datetime(interventions3['T0'], format=datetime_format2)
interventions_bxl2['T0'] = pd.to_datetime(interventions_bxl2['T0'], format=datetime_format2)

# Function to remove the timezone offset
def remove_timezone_offset(datetime_str):
    return datetime_str.split()[0] + " " + datetime_str.split()[1].split(".")[0]
interventions_bxl['t0'] = pd.to_datetime(interventions_bxl['t0'].apply(remove_timezone_offset))

In [146]:
cad9['Waiting time'] = cad9['T3'] - cad9['T0']
cad9['Waiting time']

0           0 days 00:11:56
1           0 days 00:11:32
2        -44 days +08:23:08
3        -44 days +08:23:08
4           0 days 00:16:14
                ...        
289396                  NaT
289397                  NaT
289398                  NaT
289399                  NaT
289400                  NaT
Name: Waiting time, Length: 289401, dtype: timedelta64[ns]

In [150]:
# Convert "Time_To_Arrive" column to timedelta dtype
cad9['Waiting time'] = pd.to_timedelta(cad9['Waiting time'])

# Define the filter condition to remove unrealistic values
filter_condition = (cad9['Waiting time'] >= pd.Timedelta(days=0)) & (cad9['Waiting time'] <= pd.Timedelta(hours=2))

# Apply the filter to remove rows where the value is more than 1 day or less than 0 days
filtered_df = cad9[filter_condition]

filtered_df['Waiting time'].describe()

count                       197849
mean     0 days 00:15:18.218762793
std      0 days 00:10:04.629051323
min                0 days 00:00:26
25%                0 days 00:09:45
50%                0 days 00:12:49
75%                0 days 00:17:06
max                0 days 02:00:00
Name: Waiting time, dtype: object

# check if Cad9 contain same info as in interventions datasets based on time and event type, Latitude intervention and Longitude intervention

In [132]:
# Merge datasets on common variables

#Interventions1 and Cad9 ==> No match 
#Interventions 2 and Cad9 ==> No match
#Interventions 3 and Cad9 ==> No match
#interventions_bxl and Cad9 ==> No match
#interventions_bxl2 and Cad9 ==> No match

merged_data = pd.merge(interventions1, interventions3, left_on=['Latitude intervention','Longitude intervention','EventType Trip', 'T0'], 
                      right_on=['Latitude intervention','Longitude intervention','EventType Trip','T0'],how='inner')

if merged_data.empty:
    print("NO matching information found")
else:
    print("Matching information found")
    print("Number of matched rows:", len(merged_data))
    
#Interesting that some match happened: some events (between interventions datasets) same location and eventype yet different T0

NO matching information found


In [154]:
interventions_bxl["latitude_intervention"]

0         5085139.0
1         5085139.0
2         5083336.0
3         5085076.0
4          508561.0
            ...    
115642    5086697.0
115643    5086697.0
115644    5083525.0
115645    5083525.0
115646    5085315.0
Name: latitude_intervention, Length: 115647, dtype: float64

In [155]:
interventions_bxl["longitude_intervention"]

0         436918.0
1         436918.0
2         434504.0
3         436359.0
4         443169.0
            ...   
115642    436657.0
115643    436657.0
115644     43078.0
115645     43078.0
115646     43526.0
Name: longitude_intervention, Length: 115647, dtype: float64

In [156]:
cad9["Latitude intervention"]

0         50.896027
1         50.745594
2         50.931427
3         50.931427
4         50.884240
            ...    
289396    50.563888
289397    50.689942
289398    50.698750
289399    50.474562
289400    50.240768
Name: Latitude intervention, Length: 289401, dtype: float64

In [135]:
cad9.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 289401 entries, 0 to 289400
Data columns (total 35 columns):
 #   Column                           Non-Null Count   Dtype         
---  ------                           --------------   -----         
 0   province                         289401 non-null  object        
 1   Mission ID                       289401 non-null  int64         
 2   Service Name                     196821 non-null  object        
 3   Latitude permanence              233726 non-null  float64       
 4   Longitude permanence             233726 non-null  float64       
 5   Permanence short name            289204 non-null  object        
 6   Permanence long name             196821 non-null  object        
 7   Vector Type                      289401 non-null  object        
 8   EventType Trip                   289343 non-null  object        
 9   EventSubType Trip                289401 non-null  object        
 10  EventLevel Trip                  289331 non-

In [136]:
# Check for duplicate rows
duplicate_rows = cad9[cad9.duplicated()]

# Print duplicate rows if any
if not duplicate_rows.empty:
    print("Duplicate Rows:")
    print(duplicate_rows)
else:
    print("No duplicate rows found.")


No duplicate rows found.


In [138]:
cad9['EventLevel Trip'].unique()

array(['N5', 'T', 'N1', 'N3', 'N2', 'N4', 'TS', 'N6', 'S', 'G', 'B', 'VO',
       None, 'TP', 'N7', 'EK', '5Z', 'L', 'GE', 'N8', 'IF', 'PO', 'KN',
       'SO', 'AG', 'TC', 'PR', 'OR', 'OF', 'VL', 'IZ', 'OD', 'BI', 'EU'],
      dtype=object)

In [139]:
interventions1['EventLevel Trip'].unique()

array(['N5', 'N1', 'N4', 'N2', None, 'N3', 'N0', 'N6', 'N8', 'N7A', 'N7B'],
      dtype=object)

In [60]:
cad9_f=cad9.dropna(subset=['EventType Trip'])

In [61]:
# Delete rows where the 'EventType Trip' column starts with 'Z' or 'Y'
cad9_f = cad9_f[~cad9_f['EventType Trip'].str.startswith(('Z', 'Y'))]
cad9_f["EventType Trip"].unique()

array(['P034 - SCHEDELTRAUMA', 'P010 - ADEMHALINGSMOEILIJKHEDEN',
       'P020 - INTOXICATIE ALCOHOL',
       'P075 - ONCOLOGISCHE PATIËNT IN BEHANDELING',
       'P039 - CARDIAAL PROBLEEM (NIET PIJN OP DE BORST)',
       'P011 - PIJN OP DE BORST', 'P016 - ZWANGERSCHAP-BEVALLING',
       'P068 - UROGENITAAL PROBLEEM',
       'P026 - ONWEL/ZIEK ZONDER DUIDELIJKE FOCUS', 'P033 - TRAUMA',
       'P031 - PSYCHIATRISCH PROBLEEM', 'P009 - SUIKERZIEKTE - DIABETES',
       'P013 - NIET-TRAUMATISCHE RUGPIJN', 'P001 - VERKEERSONGEVAL',
       'P080 - COVID-19', 'P015 - EPILEPSIE - STUIPEN',
       'P059 - DUIZELIGHEID - ONPASSELIJK',
       'P019 - BEWUSTELOOS - COMA - SYNCOPE',
       'P012 - NIET-TRAUMATISCHE BUIKLAST',
       'P099 - INTERHOSPITAALTRANSPORT', 'P004 - CVA-TIA',
       'P007 - VAL VAN GROTE HOOGTE (>3 METER)',
       'P032 - ALLERGISCHE REACTIE',
       'P061 - LIDMAAT/BEWEGINGSSTELSEL NIET TRAUMA',
       'P005 - WONDE DOOR WAPEN', 'P066 - POSTOP PROBLEEM',
       'P022 - INTO

In [131]:
values_to_filter = ["P010 - ADEMHALINGSMOEILIJKHEDEN", "P039 - CARDIAAL PROBLEEM (NIET PIJN OP DE BORST)",
                   "P011 - PIJN OP DE BORST", "P013 - NIET-TRAUMATISCHE RUGPIJN", 
                    "P059 - DUIZELIGHEID - ONPASSELIJK","P003 - HARTSTILSTAND - DOOD - OVERLEDEN", 
                   "P008 - PATIËNT MET DEFIBRILLATOR OF PACEMAKER"]  # Or use a list comprehension/generator for dynamic selection
subset_df = cad9_f[cad9_f["EventType Trip"].isin(values_to_filter)]  # Exclude rows with these values
subset_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 62221 entries, 1 to 289393
Data columns (total 35 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   province                         62221 non-null  object 
 1   Mission ID                       62221 non-null  int64  
 2   Service Name                     43584 non-null  object 
 3   Latitude permanence              51877 non-null  float64
 4   Longitude permanence             51877 non-null  float64
 5   Permanence short name            62201 non-null  object 
 6   Permanence long name             43584 non-null  object 
 7   Vector Type                      62221 non-null  object 
 8   EventType Trip                   62221 non-null  object 
 9   EventSubType Trip                62221 non-null  object 
 10  EventLevel Trip                  62221 non-null  object 
 11  CityName intervention            62216 non-null  object 
 12  CitysectionName i

In [132]:
##Possible calls for heart attack
#P010 - ADEMHALINGSMOEILIJKHEDEN --> P010 - Respiratory problems
#P039 - CARDIAAL PROBLEEM (NIET PIJN OP DE BORST) -->P039 - Cardiac problem (other than thoracic pain)
#P011 - PIJN OP DE BORST -->P011 - Chest pain
#P013 - NIET-TRAUMATISCHE RUGPIJN -->P013 - Non-traumatic back pain
#P059 - DUIZELIGHEID - ONPASSELIJK -->'P059 - Dizziness - Nausea'
#P003 - HARTSTILSTAND - DOOD - OVERLEDENg -->P003 - Cardiac arrest
#P008 - PATIËNT MET DEFIBRILLATOR OF PACEMAKER -->P008 - Patient with defibrillator - pacemaker

In [133]:
subset_df['EventType Trip'] = subset_df['EventType Trip'].replace('P010 - ADEMHALINGSMOEILIJKHEDEN', 'P010 - Respiratory problems')
subset_df['EventType Trip'] = subset_df['EventType Trip'].replace('P039 - CARDIAAL PROBLEEM (NIET PIJN OP DE BORST)', 'P039 - Cardiac problem (other than thoracic pain)')
subset_df['EventType Trip'] = subset_df['EventType Trip'].replace('P011 - PIJN OP DE BORST', 'P011 - Chest pain')
subset_df['EventType Trip'] = subset_df['EventType Trip'].replace('P013 - NIET-TRAUMATISCHE RUGPIJN', 'P013 - Non-traumatic back pain')
subset_df['EventType Trip'] = subset_df['EventType Trip'].replace('P059 - DUIZELIGHEID - ONPASSELIJK', 'P059 - Dizziness - Nausea')
subset_df['EventType Trip'] = subset_df['EventType Trip'].replace('P003 - HARTSTILSTAND - DOOD - OVERLEDEN', 'P003 - Cardiac arrest')
subset_df['EventType Trip'] = subset_df['EventType Trip'].replace('P008 - PATIËNT MET DEFIBRILLATOR OF PACEMAKER', 'P008 - Patient with defibrillator - pacemaker')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_df['EventType Trip'] = subset_df['EventType Trip'].replace('P010 - ADEMHALINGSMOEILIJKHEDEN', 'P010 - Respiratory problems')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_df['EventType Trip'] = subset_df['EventType Trip'].replace('P039 - CARDIAAL PROBLEEM (NIET PIJN OP DE BORST)', 'P039 - Cardiac problem (other than thoracic pain)')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https

In [134]:
# Create a table of counts
counts_table = pd.DataFrame(subset_df['EventType Trip'].value_counts())
counts_table.columns = ['Count']
counts_table

Unnamed: 0,Count
P010 - Respiratory problems,24225
P011 - Chest pain,14210
P039 - Cardiac problem (other than thoracic pain),10017
P013 - Non-traumatic back pain,5189
P003 - Cardiac arrest,4806
P059 - Dizziness - Nausea,3429
P008 - Patient with defibrillator - pacemaker,345


In [127]:
#What is this?
subset_df["EventLevel Trip"].unique()

array(['N5', 'N1', 'N3', 'N2', 'N4', 'N6', 'N7', 'N8'], dtype=object)

In [135]:
def convert_to_datetime(string):
    # Extract components from the string
    day = string[:2]
    month = string[2:5]
    year = '20' + string[5:7]
    hour, minute, second = string[9:].split(':')[:3]  # Extract hour, minute, and second parts
    # Map month abbreviations to month numbers
    months = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06',
              'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}
    # Format the components into the desired format
    formatted_datetime = f"{year}-{months[month]}-{day} {hour.zfill(2)}:{minute}:{second}"
    return formatted_datetime

subset_df['T1'] = subset_df['T1'].apply(convert_to_datetime)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_df['T1'] = subset_df['T1'].apply(convert_to_datetime)


In [138]:
subset_df['T0'] = pd.to_datetime(subset_df['T0'])
subset_df['T1'] = pd.to_datetime(subset_df['T1'])
subset_df['T1confirmed'] = pd.to_datetime(subset_df['T1confirmed'])
subset_df['T2'] = pd.to_datetime(subset_df['T2'])
subset_df['T3'] = pd.to_datetime(subset_df['T3'])
subset_df['T4'] = pd.to_datetime(subset_df['T4'])
subset_df['T5'] = pd.to_datetime(subset_df['T5'])
subset_df['T6'] = pd.to_datetime(subset_df['T6'])
subset_df['T7'] = pd.to_datetime(subset_df['T7'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_df['T0'] = pd.to_datetime(subset_df['T0'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_df['T1'] = pd.to_datetime(subset_df['T1'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_df['T1confirmed'] = pd.to_datetime(subset_df['T1confirmed'])
A value is trying to be set on a copy

In [140]:
subset_df['Time_To_Arrive'] = subset_df['T3'] - subset_df['T0']
subset_df['Time_To_Arrive']

#Difference: T5-T4 (go to hospital time)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_df['Time_To_Arrive'] = subset_df['T3'] - subset_df['T0']


1        0 days 00:11:32
6        0 days 00:16:31
7        0 days 00:20:04
8        0 days 00:20:04
13       0 days 00:14:30
               ...      
289368   0 days 00:16:13
289390   0 days 00:14:25
289391   0 days 00:14:25
289392   0 days 00:20:03
289393   0 days 00:20:03
Name: Time_To_Arrive, Length: 62221, dtype: timedelta64[ns]

In [143]:
subset_df["Time_To_Arrive"]. describe()

count                         57017
mean      4 days 10:57:28.282178999
std      10 days 22:43:45.991793410
min              -44 days +22:49:37
25%                 0 days 00:11:12
50%                 0 days 00:15:38
75%                 0 days 00:38:22
max                55 days 19:01:14
Name: Time_To_Arrive, dtype: object

In [158]:
# Convert "Time_To_Arrive" column to timedelta dtype
subset_df['Time_To_Arrive'] = pd.to_timedelta(subset_df['Time_To_Arrive'])

# Define the filter condition to remove unrealistic values
filter_condition = (subset_df['Time_To_Arrive'] >= pd.Timedelta(days=0)) & (subset_df['Time_To_Arrive'] <= pd.Timedelta(days=1))

# Apply the filter to remove rows where the value is more than 1 day or less than 0 days
filtered_df = subset_df[filter_condition]

filtered_df['Time_To_Arrive'].describe()

#Max 2 hours

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_df['Time_To_Arrive'] = pd.to_timedelta(subset_df['Time_To_Arrive'])


count                        44305
mean     0 days 00:23:10.911070985
std      0 days 01:27:11.253218773
min                0 days 00:00:37
25%                0 days 00:10:20
50%                0 days 00:13:30
75%                0 days 00:17:50
max                0 days 23:57:53
Name: Time_To_Arrive, dtype: object

In [159]:
## Troubles of kickout unrealistic differences!
stats_table = filtered_df.groupby('EventType Trip')['Time_To_Arrive'].describe()
stats_table

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
EventType Trip,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
P003 - Cardiac arrest,3237,0 days 00:22:38.782823602,0 days 01:14:48.757017196,0 days 00:00:37,0 days 00:10:50,0 days 00:13:59,0 days 00:18:31,0 days 23:03:58
P008 - Patient with defibrillator - pacemaker,231,0 days 00:35:47.450216450,0 days 02:26:46.361105863,0 days 00:05:49,0 days 00:11:48,0 days 00:14:38,0 days 00:18:28.500000,0 days 19:37:45
P010 - Respiratory problems,17510,0 days 00:24:02.726442033,0 days 01:34:40.762736837,0 days 00:01:27,0 days 00:10:07,0 days 00:13:16,0 days 00:17:40,0 days 23:48:10
P011 - Chest pain,10355,0 days 00:22:32.165910188,0 days 01:19:07.982461061,0 days 00:01:12,0 days 00:11:18,0 days 00:14:36,0 days 00:18:31,0 days 23:57:53
P013 - Non-traumatic back pain,3777,0 days 00:20:50.069896743,0 days 01:22:50.784940123,0 days 00:01:48,0 days 00:09:28,0 days 00:12:15,0 days 00:16:13,0 days 23:49:37
P039 - Cardiac problem (other than thoracic pain),6778,0 days 00:23:33.347890233,0 days 01:23:02.011282091,0 days 00:01:27,0 days 00:10:20,0 days 00:13:34,0 days 00:17:56,0 days 23:44:55
P059 - Dizziness - Nausea,2417,0 days 00:21:49.420769549,0 days 01:29:09.395650440,0 days 00:02:00,0 days 00:09:19,0 days 00:11:56,0 days 00:15:43,0 days 21:06:39


# Making map

In [41]:
import pickle

def load_variable(filename):
  f = open(filename, "rb")
  variable = pickle.load(f)
  f.close()
  return variable
aed_bxl=load_variable('lat_and_lon.txt')

[(50.8427922, 4.3843513),
 (50.8829618, 4.3352484),
 nan,
 nan,
 (50.831942299999994, 4.328980253636651),
 nan,
 (50.8451496, 4.3698932),
 (50.8841141, 4.4198414),
 nan,
 (50.85829095, 4.3438999186073755),
 nan,
 nan,
 (50.8507699, 4.458388980519481),
 (50.8156416, 4.2988092),
 (50.844450800000004, 4.404375173662215),
 (50.8571949, 4.4301756),
 nan,
 (50.8551889, 4.320073),
 nan,
 nan,
 (50.839455650000005, 4.365872359677628),
 (50.8577304, 4.4328716),
 (50.8421512, 4.3655577640453265),
 (50.8327735, 4.3757608720581604),
 (50.88607845, 4.305620692118104),
 (50.844113, 4.3705805790868455),
 (50.8482839, 4.3630102),
 nan,
 (50.841875200000004, 4.374467493180811),
 (50.8307814, 4.455376773637178),
 (50.841018, 4.4363198),
 (50.8631621, 4.3522683),
 nan,
 (50.8404768, 4.3584968),
 (50.8388287, 4.3569562087874765),
 nan,
 (50.8370534, 4.4270298),
 (50.8171126, 4.3720834),
 nan,
 (50.8332318, 4.3571802),
 (50.8553685, 4.3582398),
 (50.8530072, 4.3674968),
 (50.8532291, 4.3624502),
 (50.82048

In [42]:
import numpy as np

# Remove NaN values from the list
aed_bxl = [value for value in aed_bxl if not isinstance(value, float) or not np.isnan(value)]

Original data: [(50.8427922, 4.3843513), (50.8829618, 4.3352484), nan, nan, (50.831942299999994, 4.328980253636651), nan, (50.8451496, 4.3698932), (50.8841141, 4.4198414), nan, (50.85829095, 4.3438999186073755), nan, nan, (50.8507699, 4.458388980519481), (50.8156416, 4.2988092), (50.844450800000004, 4.404375173662215), (50.8571949, 4.4301756), nan, (50.8551889, 4.320073), nan, nan, (50.839455650000005, 4.365872359677628), (50.8577304, 4.4328716), (50.8421512, 4.3655577640453265), (50.8327735, 4.3757608720581604), (50.88607845, 4.305620692118104), (50.844113, 4.3705805790868455), (50.8482839, 4.3630102), nan, (50.841875200000004, 4.374467493180811), (50.8307814, 4.455376773637178), (50.841018, 4.4363198), (50.8631621, 4.3522683), nan, (50.8404768, 4.3584968), (50.8388287, 4.3569562087874765), nan, (50.8370534, 4.4270298), (50.8171126, 4.3720834), nan, (50.8332318, 4.3571802), (50.8553685, 4.3582398), (50.8530072, 4.3674968), (50.8532291, 4.3624502), (50.8204849, 4.3052856), (50.8549544,

In [81]:
filename = "bxl_itv_coor.txt"
# Read the file without considering the first row as header
df = pd.read_csv(filename, header=None)

# Remove brackets and split into latitude and longitude columns
df[0] = df[0].str.strip('()')
df[1] = df[1].str.strip('()')
df['latitude_intervention']=df[0]
df['longitude_intervention']=df[1]
df.drop(columns=[0, 1], inplace=True)

df['latitude_intervention'] = df['latitude_intervention'].astype(float)
df['longitude_intervention'] = df['longitude_intervention'].astype(float)

df

Unnamed: 0,latitude_intervention,longitude_intervention
0,50.850760,4.363590
1,50.840550,4.342190
2,50.840550,4.342190
3,50.848240,4.379800
4,50.816730,4.365400
...,...,...
20601,50.837257,4.335535
20602,50.823885,4.377838
20603,50.814307,4.334841
20604,50.851438,4.369210


In [None]:
import folium
from geopy.distance import geodesic
import pandas as pd
import numpy as np

interventions_bxl=df
# Extract latitude and longitude values
latitudes = interventions_bxl['latitude_intervention']
longitudes = interventions_bxl['longitude_intervention']

# Calculate mean latitude and longitude
mean_lat = latitudes.mean()
mean_lon = longitudes.mean()

# Create Folium Map centered at mean of incidences
mymap = folium.Map(location=[mean_lat, mean_lon], zoom_start=15)

# Plot incidences from interventions_bxl DataFrame
for index, row in interventions_bxl.iterrows():
    incidence = (row['latitude_intervention'], row['longitude_intervention'])
    folium.Marker(incidence, popup="Incidence", icon=folium.Icon(color='blue')).add_to(mymap)

# Plot AED locations from aed_bxl list of tuples
for aed_location in aed_bxl:
    folium.Marker(aed_location, popup="AED Location", icon=folium.Icon(color='red')).add_to(mymap)
    folium.Circle(aed_location, radius=200, color='red', fill=True, fill_opacity=0.1).add_to(mymap)

# Check incidences within AED circles
for index, row in interventions_bxl.iterrows():
    incidence = (row['latitude_intervention'], row['longitude_intervention'])
    for aed_location in aed_bxl:
        if geodesic(incidence, aed_location).meters <= 200:
            folium.Marker(incidence, popup="Incidence within AED circle", icon=folium.Icon(color='green')).add_to(mymap)

# Save the map to an HTML file
mymap.save("map_with_incidences.html")
