In [1]:
import folium
from folium.plugins import MarkerCluster, HeatMap
import pandas as pd
import numpy as np

### __Import accident dataset with records of mainland France for 2022__

In [2]:
accident_data = pd.read_csv('../data/accidents_2022.csv', index_col = 0)

# subset interesting variables
accident_data = accident_data[['year', 'month', 'day', 'atm', #'hrmn', 'catv', 
                               'severity', 'gender', 'age', 'catu', 
                               'lat', 'long']]
# discovered issue with catv for 2022: all entries have 0 value
# issue with time: all entries have missing values for hrmn

# Drop entries without geographical coordinates
accident_data = accident_data.dropna(subset=['lat', 'long'])

# Combine year, month, and day columns into a single string column
accident_data['date_str'] = accident_data['year'].astype(str) + '-' + accident_data['month'].astype(str) + '-' + accident_data['day'].astype(str)

# Convert the string column to datetime without including the time
#accident_data['date'] = pd.to_datetime(accident_data['date_str'], format='%Y-%m-%d', errors='coerce')

# Define mapping dictionary
gender_mapping = {1: 'Male', 
                  2: 'Female'}
catu_mapping = {1: 'Driver', 
                2: 'Passenger',
                3: 'Pedestrian',
                4: 'Skater/Scooter rider' # this category was introduced in 2018
               } 

# Map values in the 'gender' and 'catu' columns using the defined mapping
accident_data['gender'] = accident_data['gender'].map(gender_mapping)
accident_data['catu'] = accident_data['catu'].map(catu_mapping)

display(accident_data.head())
accident_data.info() 

Unnamed: 0,year,month,day,atm,severity,gender,age,catu,lat,long,date_str
2538867,2022,10,19,1,0.0,Male,74.0,Driver,44.55942,4.72572,2022-10-19
2538868,2022,10,20,1,1.0,Male,34.0,Driver,46.92581,6.3462,2022-10-20
2538869,2022,10,20,1,0.0,Male,52.0,Driver,46.92581,6.3462,2022-10-20
2538870,2022,10,20,1,0.0,Male,20.0,Driver,48.493162,-2.760439,2022-10-20
2538871,2022,10,20,1,1.0,Female,35.0,Driver,48.493162,-2.760439,2022-10-20


<class 'pandas.core.frame.DataFrame'>
Index: 111859 entries, 2538867 to 2665527
Data columns (total 11 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   year      111859 non-null  int64  
 1   month     111859 non-null  int64  
 2   day       111859 non-null  int64  
 3   atm       111859 non-null  int64  
 4   severity  111859 non-null  float64
 5   gender    111857 non-null  object 
 6   age       111859 non-null  float64
 7   catu      111859 non-null  object 
 8   lat       111859 non-null  float64
 9   long      111859 non-null  float64
 10  date_str  111859 non-null  object 
dtypes: float64(4), int64(4), object(3)
memory usage: 10.2+ MB


### 2022 road accident fatalities mapped to mainland France

In [3]:
# Filter data for severity 3 (deaths)
severity_3_data = accident_data[accident_data['severity'] == 3]

# Create a folium map centered around France
map_center = [46.6031, 1.7191]  # Approximate center of mainland France
mymap = folium.Map(location=map_center, zoom_start=3, tiles='OpenStreetMap')

# Create a HeatMap layer for severity 3 accidents
heat_data = [[row['lat'], row['long']] for _, row in severity_3_data.iterrows()]
heatmap_colors = ['#00a0a3', '#ff8c00', '#c8102e']  # custom color palette
HeatMap(heat_data, radius=15, blur=8, gradient={0.4: heatmap_colors[0], 0.8: heatmap_colors[1], 1: heatmap_colors[2]}).add_to(mymap)

# Create a custom MarkerCluster to handle a large number of data points
marker_cluster = MarkerCluster(control=False).add_to(mymap)

# Define icons for each 'catu' category
catu_icons = {
    'Driver': 'car',   
    'Passenger': 'car-side',  
    'Pedestrian': 'person'  
}

# Define colors for each 'catu' category
catu_colors = {
    'Driver': 'black',   
    'Passenger': 'darkblue',  
    'Pedestrian': 'red'  
}

# Iterate through the DataFrame and add markers to the cluster with pop-up information
for _, row in severity_3_data.iterrows():
    # Determine the color based on the 'catu' category
    marker_color = catu_colors.get(row['catu'], 'gray')  # Default to gray if category not found
    
    marker_icon = catu_icons.get(row['catu'], 'question')  # Default to a question mark if category not found
    
    popup_content = (
        #f"<b>Severity:</b> {row['severity']}<br>"
        f"<b>Date:</b> {row['date_str']}<br>"
        f"<b>User Type:</b> {row['catu']}<br>"
        f"<b>Gender:</b> {row['gender']}<br>"
        f"<b>Age:</b> {row['age']}<br>"
        #f"<b>Security Equipment:</b> {row['security_equipment']}<br>"
        #f"<b>Vehicle Type:</b> {row['catv']}"
    )

    # Create a Marker with a customized icon and color
    marker = folium.Marker(location=[row['lat'], row['long']],
                           popup=folium.Popup(popup_content, max_width=300),
                           icon=folium.Icon(icon=marker_icon, prefix='fa', color=marker_color)
                           )

    marker.add_to(marker_cluster)

# Save the map to an HTML file
mymap.save('../plots/2022_map_with_colored_markers_final.html')

del severity_3_data, accident_data;

### Import and merge all accident datasets (w/o pre-processing)

In [5]:
# Define folder path and file names
path = "../data/merge_data/"   # Define the path to the data folder containing individual data files for merging.
prefix = "merge_parts_"   # Define the common prefix for the data files.
file_ids = range(1, 21)   # Generate a range of file IDs to be merged.
extension = '.csv'   # Specify the file extension for the data files.

# Iterate over the file IDs and merge the corresponding data files.
for ID in file_ids:
    # Construct the filename based on the prefix, file ID, and extension.
    filename = prefix + str(ID) + extension
    
    # Print a message indicating the processing of the current file.
    print('Processing', filename)
    
    # Read the data from the current file into a DataFrame.
    df_temp = pd.read_csv(path + filename, sep=',')

    # Merge the DataFrames based on the file ID.
    if ID == 1:
        df = df_temp
    else:
        df = pd.concat([df, df_temp])

del df_temp;

# Display the head of the merged DataFrame.
df.head()

Processing merge_parts_1.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_2.csv
Processing merge_parts_3.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_4.csv
Processing merge_parts_5.csv
Processing merge_parts_6.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_7.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_8.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_9.csv
Processing merge_parts_10.csv
Processing merge_parts_11.csv
Processing merge_parts_12.csv
Processing merge_parts_13.csv
Processing merge_parts_14.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_15.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_16.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_17.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_18.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Processing merge_parts_19.csv
Processing merge_parts_20.csv


  df_temp = pd.read_csv(path + filename, sep=',')


Unnamed: 0,Num_Acc,place,catu,severity,gender,trajet,secu,locp,actp,comp,...,agg,int,atm,col,com,adr,gps,lat,long,dep
0,200500000001,1.0,1.0,4.0,1.0,1.0,11.0,0.0,0.0,0.0,...,2,1,1.0,3.0,11.0,CD41B,M,5051500.0,294400.0,590
1,200500000001,1.0,1.0,3.0,2.0,3.0,11.0,0.0,0.0,0.0,...,2,1,1.0,3.0,11.0,CD41B,M,5051500.0,294400.0,590
2,200500000001,2.0,2.0,1.0,1.0,0.0,11.0,0.0,0.0,0.0,...,2,1,1.0,3.0,11.0,CD41B,M,5051500.0,294400.0,590
3,200500000001,4.0,2.0,1.0,1.0,0.0,31.0,0.0,0.0,0.0,...,2,1,1.0,3.0,11.0,CD41B,M,5051500.0,294400.0,590
4,200500000001,5.0,2.0,1.0,1.0,0.0,11.0,0.0,0.0,0.0,...,2,1,1.0,3.0,11.0,CD41B,M,5051500.0,294400.0,590


### __Pre-processing__

In [6]:
df = df[['an', 'mois', 'jour', 'hrmn',
         'severity', 'gender', 'birth_year', 'catu', 
         'lat', 'long']]

# Also substitute the Severity to be sorted
severity_mapping = {
    1: 0,
    2: 3,
    3: 2,
    4: 1
}

df['severity'] = df['severity'].replace(severity_mapping)

# Deleting the -1 in severity
df = df[df['severity'] != -1]

# Changing name of columns from French to English
df.rename(columns={'an': 'year'}, inplace=True)
df.rename(columns={'mois': 'month'}, inplace=True)
df.rename(columns={'jour': 'day'}, inplace=True)

# Correct years to be in a consistent format
years = {5: 2005, 6: 2006, 7: 2007, 8:2008, 9:2009, 10:2010, 11:2011,
         12:2012, 13:2013, 14:2014, 15:2015, 16:2016, 17:2017, 18:2018}
df['year'] = df['year'].replace(to_replace=years)

# We create a new column with the age of the driver/passenger
df['age'] = df['year'] - df['birth_year']


# Converting 'lat' to float
df['lat'] = df['lat'].astype('str')
df['lat'] = df['lat'].str.replace(',', '.')
df['lat'] = df['lat'].astype('float64')

# Convert 'long' to float
df['long'] = df['long'].astype('str')
df['long'] = df['long'].str.replace(',', '.').replace('-', np.nan)
df['long'] = df['long'].astype('float64')

# Convert time to string, remove :, fill with zeros
df['hrmn'] = df['hrmn'].astype(str)
df['hrmn'] = df['hrmn'].str.replace(':','')
df['hrmn'] = df['hrmn'].str.zfill(4)

# Create a datetime column. First add zeros to left of single-digit months/days
# I will delete these columns after I am finished
df['new_month'] = df['month'].astype(str)
df['new_month'] = df['new_month'].str.zfill(2)
df['new_day'] = df['day'].astype(str)
df['new_day'] = df['new_day'].str.zfill(2)

# Build date_time out of year, new_month, new_day, hrmn
df['date_time'] = df['year'].astype(str) + df['new_month'].astype(str) + df['new_day'].astype(str) + df['hrmn'].astype(str)
df['date_time'] = pd.to_datetime(df['date_time'], format="%Y%m%d%H%M")

# drop the temp columns, don't need them anymore
df = df.drop(['new_month', 'new_day'], axis=1)


# Select features to be mapped to France

In [7]:
# subset interesting variables
accident_data = df[['date_time', #'hrmn', 'catv', 'atm',
                    'severity', 'gender', 'age', 'catu', 
                    'lat', 'long']]

del df;

accident_data = accident_data[accident_data['date_time'] >= '2019-01-01']

# Drop entries without geographical coordinates
accident_data = accident_data.dropna(subset=['lat', 'long'])

# Convert the string column to datetime without including the time
#accident_data['date'] = pd.to_datetime(accident_data['date_str'], format='%Y-%m-%d', errors='coerce')

# Define mapping dictionary
gender_mapping = {1: 'Male', 
                  2: 'Female'}
catu_mapping = {1: 'Driver', 
                2: 'Passenger',
                3: 'Pedestrian',
                4: 'Skater/Scooter rider' # this category was introduced in 2018
               } 

# Map values in the 'gender' and 'catu' columns using the defined mapping
accident_data['gender'] = accident_data['gender'].map(gender_mapping)
accident_data['catu'] = accident_data['catu'].map(catu_mapping)

display(accident_data.tail())
accident_data.info() 

Unnamed: 0,date_time,severity,gender,age,catu,lat,long
133272,2022-01-01 08:40:00,0.0,Female,20.0,Driver,43.927265,1.915637
133273,2022-01-01 08:40:00,2.0,Female,18.0,Passenger,43.927265,1.915637
133274,2022-01-01 08:40:00,1.0,Female,69.0,Driver,43.927265,1.915637
133275,2022-03-01 16:55:00,2.0,Male,30.0,Driver,47.594404,1.353329
133276,2022-03-01 16:55:00,0.0,Male,22.0,Driver,47.594404,1.353329


<class 'pandas.core.frame.DataFrame'>
Index: 497953 entries, 34852 to 133276
Data columns (total 7 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   date_time  497953 non-null  datetime64[ns]
 1   severity   494009 non-null  float64       
 2   gender     488503 non-null  object        
 3   age        488368 non-null  float64       
 4   catu       494009 non-null  object        
 5   lat        497953 non-null  float64       
 6   long       497953 non-null  float64       
dtypes: datetime64[ns](1), float64(4), object(2)
memory usage: 30.4+ MB


# Map containing all fatalities between 2019 - 2022

In [8]:
# Assuming you have a DataFrame named 'accident_data' with necessary columns including 'lat', 'long', 'severity', 'catu'
# Example:
# accident_data = pd.read_csv('your_data.csv')

# Filter data for severity 3 (deaths)
severity_3_data = accident_data[accident_data['severity'] == 3]

# Create a folium map centered around France
map_center = [46.6031, 1.7191]  # Approximate center of mainland France
mymap = folium.Map(location=map_center, zoom_start=3, tiles='OpenStreetMap')

# Create a HeatMap layer for severity 3 accidents
heat_data = [[row['lat'], row['long']] for _, row in severity_3_data.iterrows()]
heatmap_colors = ['#00a0a3', '#ff8c00', '#c8102e']  # custom color palette
HeatMap(heat_data, radius=15, blur=8, gradient={0.4: heatmap_colors[0], 0.8: heatmap_colors[1], 1: heatmap_colors[2]}).add_to(mymap)

# Create a custom MarkerCluster to handle a large number of data points
# marker_cluster = MarkerCluster(control=False).add_to(mymap)

# Define icons for each 'catu' category
catu_icons = {
    'Driver': 'car',   
    'Passenger': 'car-side',  
    'Pedestrian': 'person'  
}

# Define colors for each 'catu' category
catu_colors = {
    'Driver': 'black',   
    'Passenger': 'darkblue',  
    'Pedestrian': 'red'  
}

# Iterate through the DataFrame and add markers to the cluster with pop-up information
for _, row in severity_3_data.iterrows():
    # Determine the color based on the 'catu' category
    marker_color = catu_colors.get(row['catu'], 'gray')  # Default to gray if category not found
    
    marker_icon = catu_icons.get(row['catu'], 'question')  # Default to a question mark if category not found
    
    popup_content = (
        #f"<b>Severity:</b> {row['severity']}<br>"
        f"<b>Date:</b> {row['date_time']}<br>"
        f"<b>User Type:</b> {row['catu']}<br>"
        f"<b>Gender:</b> {row['gender']}<br>"
        f"<b>Age:</b> {row['age']}<br>"
        #f"<b>Security Equipment:</b> {row['security_equipment']}<br>"
        #f"<b>Vehicle Type:</b> {row['catv']}"
    )

    # Create a Marker with a customized icon and color
    marker = folium.Marker(location=[row['lat'], row['long']],
                           popup=folium.Popup(popup_content, max_width=300),
                           icon=folium.Icon(icon=marker_icon, prefix='fa', color=marker_color)
                           )

    marker.add_to(mymap)

# Save the map to an HTML file
mymap.save('../plots/2019-2022_map_with_colored_markers_final.html')