In [1]:
# Import packages

import rasterio
import rasterio.mask
from rasterio.plot import reshape_as_image 
from rasterio.plot import reshape_as_raster
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
import branca
import folium
import shapely
import json

In [3]:
%load_ext pycodestyle_magic

The pycodestyle_magic extension is already loaded. To reload it, use:
  %reload_ext pycodestyle_magic


In [4]:
%pycodestyle_on

### Visualisation: Bubble Map of incidents

This part of code is made to make a bubble map of the number of incidents per zones. For the zones, we decided to apply COROP zoning system, which is higher granuality than provinces, but less detailed than municipalities. The zoning of COROP can be found in the website of CBS.  
https://www.cbs.nl/nl-nl/achtergrond/2023/10/kaarten-regionale-indelingen-2023

In [5]:
# Reading incidents data
path_incidents = "../Advanced Data Science Data/Incidents_clean.csv"
df = pd.read_csv(path_incidents)

display(df)

Unnamed: 0,type,starttime_new,endtime_new,vild_primair_wegnummer,primaire_locatie_lengtegraad,primaire_locatie_breedtegraad,duration
0,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A1,4.974663,52.346931,151160.933333
1,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A9,4.716725,52.514820,151160.933333
2,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A9,4.738364,52.609730,151160.933333
3,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A35,6.824692,52.204929,151160.933333
4,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A4,4.346407,52.041920,151160.933333
...,...,...,...,...,...,...,...
88846,vehicle_obstruction,2019-12-31 23:43:49,2019-12-31 23:43:49,,4.475721,51.893230,0.000000
88847,accident,2019-12-31 23:46:10,2019-12-31 23:46:10,N246,4.701406,52.443363,0.000000
88848,vehicle_obstruction,2019-12-31 23:47:01,2019-12-31 23:47:01,N31,5.922628,53.165226,0.000000
88849,vehicle_obstruction,2019-12-31 23:55:23,2019-12-31 23:55:23,A4,4.313686,51.442677,0.000000


In [6]:
# Importing CORUP shape file
path_gpd = "../Advanced Data Science Data/COROP/B14_COROP_gebieden/B14_COROP_gebiedenPolygon.shp"
COROP = gpd.read_file(path_gpd)

2:80: E501 line too long (109 > 79 characters)


In [7]:
# Translate the geometry into epsg 4326 coordinates standard
COROP = COROP.to_crs(epsg=4326)
COROP.head(5)

Unnamed: 0,SHAPE_LENG,RUBRIEK_XS,RUBRIEK,JRSTATCODE,STATNAAM_X,STATNAAM,STATCODE_X,STATCODE,OBJECTID,geometry
0,102265.901559,,coropgebied,2018CR19,,Alkmaar en omgeving,,CR19,35.0,"POLYGON ((4.72522 52.69385, 4.72885 52.69140, ..."
1,190305.88823,,coropgebied,2018CR37,,Noord-Limburg,,CR37,36.0,"POLYGON ((5.93277 51.74194, 5.93589 51.74103, ..."
2,168527.142803,,coropgebied,2018CR38,,Midden-Limburg,,CR38,37.0,"POLYGON ((5.89041 51.31367, 5.91065 51.30771, ..."
3,158991.993399,,coropgebied,2018CR39,,Zuid-Limburg,,CR39,38.0,"POLYGON ((5.82368 51.06673, 5.82677 51.05587, ..."
4,326651.059478,,coropgebied,2018CR40,,Flevoland,,CR40,39.0,"MULTIPOLYGON (((5.86218 52.52197, 5.85975 52.5..."


In [None]:
# Get zone names
zone_names = [i for i in COROP["STATNAAM"]]

# Get the geometry of the COROP
g = [i for i in COROP.geometry]

# Get the coordinates of the geometry
list_poly_coords = [shapely.geometry.mapping(g[i])['coordinates'][0] for i in range(len(g))]

# Turn the coordinates into numpy arrays
list_arr_coords = [np.array(list_poly_coords[i]) for i in range(len(list_poly_coords))]

# Organising dimension of arrays
for i in range(len(list_arr_coords)):
    # some polygon data has 3D array but the first dimention is always 1 so get rid of it
    if list_arr_coords[i].ndim == 3:
        list_arr_coords[i] = list_arr_coords[i][0]

In [51]:
# Identify in which zone each incident is located
for k in range(len(df)):
    for j in range(len(g)):
        if shapely.geometry.Point(df["primaire_locatie_lengtegraad"][k], df["primaire_locatie_breedtegraad"][k]).within(g[j]):
            df.loc[k, "zone"] = zone_names[j]
            break        
        else:
            df.loc[k, "zone"] = "Unknown"

Incident 0 is in zone 14
Incident 1 is in zone 7
Incident 2 is in zone 0
Incident 3 is in zone 34
Incident 4 is in zone 16
Incident 5 is in zone 12
Incident 6 is in zone 35
Incident 7 is in zone 38
Incident 8 is in zone 22
Incident 9 is in zone 7
Incident 10 is in zone 37
Incident 11 is in zone 37
Incident 12 is in zone 26
Incident 13 is in zone 37
Incident 14 is in zone 1
Incident 15 is in zone 24
Incident 16 is in zone 32
Incident 17 is in zone 14
Incident 18 is in zone 24
Incident 19 is in zone 24
Incident 20 is in zone 14
Incident 21 is in zone 15
Incident 22 is in zone 39
Incident 23 is in zone 22
Incident 24 is in zone 3
Incident 25 is in zone 24
Incident 26 is in zone 24
Incident 27 is in zone 33
Incident 28 is in zone 26
Incident 29 is in zone 31
Incident 30 is in zone 16
Incident 31 is in zone 31
Incident 32 is in zone 26
Incident 33 is in zone 16
Incident 34 is in zone 26
Incident 35 is in zone 24
Incident 36 is in zone 37
Incident 37 is in zone 20
Incident 38 is in zone 16
I

8:80: E501 line too long (92 > 79 characters)
11:80: E501 line too long (87 > 79 characters)
15:80: E501 line too long (89 > 79 characters)
21:5: E115 expected an indented block (comment)
22:80: E501 line too long (126 > 79 characters)
25:18: W291 trailing whitespace


Incident 88834 is in zone 24
Incident 88835 is in zone 24
Incident 88836 is in zone 26
Incident 88837 is in zone 24
Incident 88838 is in zone 34
Incident 88839 is in zone 24
Incident 88840 is in zone 31
Incident 88841 is in zone 26
Incident 88842 is in zone 31
Incident 88843 is in zone 38
Incident 88844 is in zone 18
Incident 88845 is in zone 14
Incident 88846 is in zone 24
Incident 88847 is in zone 18
Incident 88848 is in zone 10
Incident 88849 is in zone 31
Incident 88850 is in zone 14


In [52]:
df

Unnamed: 0,type,starttime_new,endtime_new,vild_primair_wegnummer,primaire_locatie_lengtegraad,primaire_locatie_breedtegraad,duration,zone
0,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A1,4.974663,52.346931,151160.933333,Groot-Amsterdam
1,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A9,4.716725,52.514820,151160.933333,IJmond
2,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A9,4.738364,52.609730,151160.933333,Alkmaar en omgeving
3,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A35,6.824692,52.204929,151160.933333,Twente
4,vehicle_obstruction,2019-08-28 12:11:32,2019-12-11 11:32:28,A4,4.346407,52.041920,151160.933333,Agglomeratie 's-Gravenhage
...,...,...,...,...,...,...,...,...
88846,vehicle_obstruction,2019-12-31 23:43:49,2019-12-31 23:43:49,,4.475721,51.893230,0.000000,Groot-Rijnmond
88847,accident,2019-12-31 23:46:10,2019-12-31 23:46:10,N246,4.701406,52.443363,0.000000,Zaanstreek
88848,vehicle_obstruction,2019-12-31 23:47:01,2019-12-31 23:47:01,N31,5.922628,53.165226,0.000000,Noord-Friesland
88849,vehicle_obstruction,2019-12-31 23:55:23,2019-12-31 23:55:23,A4,4.313686,51.442677,0.000000,West-Noord-Brabant


In [53]:
# Check if loaded correctly
# fig, ax = plt.subplots(figsize=(8, 8))
# COROP.plot(ax=ax)
# ax.scatter(df["primaire_locatie_lengtegraad"], df["primaire_locatie_breedtegraad"], s=5, c="red")
# plt.show()

4:80: E501 line too long (99 > 79 characters)


In [54]:
# Set up a dataframe for bubble map
columns_bubble = ["Zone", "Lat", "Lon", "Count"]
df_bubble = pd.DataFrame(columns=columns_bubble)

# input the zones and the coordinates of the centroid (centre of gravity)
df_bubble["Zone"] = zone_names
df_bubble["Lat"] = [list(shapely.geometry.Polygon(list_arr_coords[i]).centroid.coords)[0][1] for i in range(len(zone_names))]
df_bubble["Lon"] = [list(shapely.geometry.Polygon(list_arr_coords[i]).centroid.coords)[0][0] for i in range(len(zone_names))]

# Count the number of incidents in each zone
for i in range(len(zone_names)):
    df_bubble.loc[i, "Count"] = len(df[df["zone"] == zone_names[i]])


df_bubble["Count"] = df_bubble["Count"].astype(int)

7:80: E501 line too long (125 > 79 characters)
8:80: E501 line too long (125 > 79 characters)


In [55]:
df_bubble

Unnamed: 0,Zone,Lat,Lon,Count
0,Alkmaar en omgeving,52.639448,4.758368,496
1,Noord-Limburg,51.464509,6.035926,988
2,Midden-Limburg,51.200997,5.875367,1196
3,Zuid-Limburg,50.8838,5.862185,2175
4,Flevoland,52.523277,5.860541,1654
5,Agglomeratie Haarlem,52.377009,4.611586,468
6,Oost-Groningen,53.080286,7.048793,307
7,IJmond,52.464935,4.576024,1017
8,Delfzijl en omgeving,53.328281,6.840129,47
9,Overig Groningen,53.369105,6.196435,1501


In [56]:
print(df_bubble["Count"].sum())


2:1: W391 blank line at end of file


88754


In [57]:
import plotly.express as px

fig = px.scatter_mapbox(df_bubble, lat='Lat', lon='Lon', size='Count', size_max=30, zoom=5.7, color='Count', 
                        color_continuous_scale=px.colors.sequential.Turbo, mapbox_style='open-street-map')

fig.update_layout(title='Number of incidents per COROP zones', title_x=0.5, font=dict(size=12))
fig.show()

3:80: E501 line too long (108 > 79 characters)
3:109: W291 trailing whitespace
4:80: E501 line too long (106 > 79 characters)
6:80: E501 line too long (95 > 79 characters)
