In [1]:
import pandas as pd
import folium as fm
from folium.plugins import MarkerCluster
from random import sample
import json
from shapely.geometry import Point, Polygon

The columns in the data are:
- `ano` is the accident number
- `tway` stands for "Trafficway"
- `day` is the day of the week
- `lat` is the latitude
- `lon` is the longitude

Make an interactive map using Folium

### Data Acquisition & Preprocessing

In [2]:
# Load the data
df = pd.read_csv('../../usc_data/sc_loc2021.csv', low_memory=False)
df_len = df.shape[0]
print(f"Initial length of the data : {df_len:,}")

Initial length of the data : 147,724


In [3]:
# Check the type of lat and lon columns
print(f"Type of lat column : {df['lat'].dtype}")
print(f"Type of lon column : {df['lon'].dtype}")

Type of lat column : object
Type of lon column : int64


In [4]:
# Since the lat column is of type object, we need to check if it contains any non-numeric values
print(f"Number of rows with non-numeric lat : {pd.to_numeric(df['lat'], errors='coerce').isnull().sum():,}")

# Get the rows with non-numeric lat
df[pd.to_numeric(df['lat'], errors='coerce').isnull()]['lat'].to_dict()

Number of rows with non-numeric lat : 1


{49714: '3409-001'}

In [5]:
# Convert the lat column to numeric
# Remove hyphens from the column
df['lat'] = df['lat'].str.replace('-', '')

# Step 3: Convert the column to integer type
df['lat'] = df['lat'].astype(int)

In [6]:
print(f"Number of rows with lat = 0               : {(df['lat'] == 0).sum():,}")
print(f"Number of rows with lon = 0               : {(df['lon'] == 0).sum():,}")
print(f"Number of rows with lat and lon = 0       : {((df['lat'] == 0) & (df['lon'] == 0)).sum():,}")
print(f"Number of rows with either lat or lon = 0 : {((df['lat'] == 0) | (df['lon'] == 0)).sum():,}")

Number of rows with lat = 0               : 2,122
Number of rows with lon = 0               : 2,136
Number of rows with lat and lon = 0       : 2,122
Number of rows with either lat or lon = 0 : 2,136


In [7]:
# Remove rows with lat = 0 or lon = 0
df = df[(df['lat'] != 0) & (df['lon'] != 0)]
print(f"Length of the data after removing rows with lat = 0 or lon = 0 : {df.shape[0]:,}")
print(f"Percentage of rows removed                                     : {((df_len - df.shape[0]) / df_len):.2%}")

Length of the data after removing rows with lat = 0 or lon = 0 : 145,588
Percentage of rows removed                                     : 1.45%


In [8]:
# Convert lat and lon to correct decimal degrees
df['lat'] = df['lat'] / 1_000_000
df['lon'] = - (df['lon'] / 1000000)  # Note the negative sign for longitude

### Data Filtering

In [9]:
# Load the GeoJSON file
with open("south carolina.geojson", 'r') as f:
    sc_geojson = json.load(f)

# Extract coordinates and create a Shapely polygon
sc_coords = sc_geojson['geometry']['coordinates'][0]
sc_polygon = Polygon(sc_coords)

In [10]:
# Filter points and calculate exclusion percentage
df_len = df.shape[0]

df['in_sc'] = df.apply(lambda row: sc_polygon.contains(Point(row['lon'], row['lat'])), axis=1)
df = df[df['in_sc']]  # Keep only points within South Carolina

print(f"Total points         : {df_len:,}")
print(f"Points within SC     : {df.shape[0]:,}")
print(f"Excluded points      : {(df_len - df.shape[0]):,}")
print(f"Exclusion percentage : {(df_len - df.shape[0])/ df_len:.2%}")

Total points         : 145,588
Points within SC     : 108,100
Excluded points      : 37,488
Exclusion percentage : 25.75%


### Map Creation

In [11]:
# Create a map centered on South Carolina
sc_center_lat, sc_center_lon = 33.8361, -81.1637  # Approximate center of SC
m = fm.Map(location=[sc_center_lat, sc_center_lon], zoom_start=7)

# Create a MarkerCluster
marker_cluster = MarkerCluster().add_to(m)

In [12]:
# The `tway` column has this meaning:
# 1. Two-way, not divided
# 2. Two-way, divided, unprotected median
# 3. Two-way, divided, barrier
# 4. One way
# 8. Other
# Create mapping for the `tway` column:
tway_map = {1: 'Two-way, not divided',
            2: 'Two-way, divided, unprotected median',
            3: 'Two-way, divided, barrier',
            4: 'One way',
            8: 'Other'}

# The `day` column has this meaning:
# 1. Sunday
# 2. Monday and so on...
# Create mapping for the `day` column:
day_map = {1: 'Sunday',
           2: 'Monday',
           3: 'Tuesday',
           4: 'Wednesday',
           5: 'Thursday',
           6: 'Friday',
           7: 'Saturday'}

In [13]:
possible_colors = ['blue', 'darkgreen', 'cadetblue', 'lightred', 
                   'beige', 'pink', 'green', 'darkred', 'lightgreen', 
                   'lightblue', 'darkblue', 'darkpurple', 'gray', 
                   'purple', 'orange', 'lightgray', 'red', 'black']  # Not including white

# Define color mapping for 'tway'
color_map = {num: col for num, col in zip(df['tway'].unique(), sample(possible_colors, len(df['tway'].unique())))}
color_map

{4: 'blue', 1: 'green', 2: 'red', 8: 'lightblue', 3: 'darkpurple'}

In [14]:
# Add markers to the cluster
for idx, row in df.iterrows():    
    fm.Marker(
        popup = fm.Popup(f"""
        <b>Accident Number:</b> {row['ano']}<br>
        <b>Latitude:</b> {row['lat']}<br>
        <b>Longitude:</b> {row['lon']}<br>
        <b>Trafficway:</b> {tway_map.get(row['tway'], 'Other')}<br>
        <b>Day:</b> {day_map.get(row['day'], 'Unknown')}<br>
        """, max_width="100%"),
        location=[row['lat'], row['lon']],
        icon=fm.Icon(color=color_map.get(row['tway'], 'gray')),
        lazy=True
    ).add_to(marker_cluster)

In [15]:
# Create a list to hold each line of the legend
legend_lines = []

# Iterate over the color_map dictionary
for tway, color in color_map.items():
    # Create a line for the legend
    line = f'&nbsp; <i class="fa fa-map-marker fa-2x" style="color:{color}"></i>&nbsp; {tway_map.get(tway, "Other")} <br>'
    # Add the line to the list
    legend_lines.append(line)

# Join all the lines together to form the complete legend
legend_content = '\n'.join(legend_lines)

# Create the legend HTML
legend_html = f'''
<div style="position: fixed; bottom: 20px; left: 50px; width: auto; height: auto;
    border:2px solid grey; z-index:9999; font-size:14px; background-color:white;
    padding: 10px;">&nbsp; <b> Type of Way </b><br>
    {legend_content}
</div>
'''

# Add the legend to the map
m.get_root().html.add_child(fm.Element(legend_html));
print(legend_html)


<div style="position: fixed; bottom: 20px; left: 50px; width: auto; height: auto;
    border:2px solid grey; z-index:9999; font-size:14px; background-color:white;
    padding: 10px;">&nbsp; <b> Type of Way </b><br>
    &nbsp; <i class="fa fa-map-marker fa-2x" style="color:blue"></i>&nbsp; One way <br>
&nbsp; <i class="fa fa-map-marker fa-2x" style="color:green"></i>&nbsp; Two-way, not divided <br>
&nbsp; <i class="fa fa-map-marker fa-2x" style="color:red"></i>&nbsp; Two-way, divided, unprotected median <br>
&nbsp; <i class="fa fa-map-marker fa-2x" style="color:lightblue"></i>&nbsp; Other <br>
&nbsp; <i class="fa fa-map-marker fa-2x" style="color:darkpurple"></i>&nbsp; Two-way, divided, barrier <br>
</div>



In [16]:
# Add borders to the map for South Carolina
# Source: https://nagasudhir.blogspot.com/2021/07/draw-borders-from-geojson-paths-in.html
# style options - https://leafletjs.com/reference-1.7.1.html#path
bordersStyle = {
    'color': 'green',
    'weight': 2,
    'fillColor': 'blue',
    'fillOpacity': 0.1
}

# File (`south carolina.geojson`) downloaded from https://github.com/glynnbird/usstatesgeojson/blob/master/south%20carolina.geojson
# File (`South Carolina County Boundaries.geojson`) downloaded from https://cartographyvectors.com/map/1123-south-carolina-with-county-boundaries
fm.GeoJson(
    data=(open("South Carolina County Boundaries.geojson", 'r').read()),
    name="South Carolina",
    style_function=lambda x: bordersStyle).add_to(m);

In [18]:
# Save the map
f_name: str = "sc_incidents_map_2021.html"
m.save(f_name)
print(f"Map has been saved as {f_name}")

Map has been saved as sc_incidents_map_2021.html
