# Assign Risks and Get No-Fly zones

This notebook is step 2 in the process of creating a graph for a specific area.
To run this notebook, you must first execute get_data.ipynb for the desired area.

In this step, risk scores are calculated for each area type based on predefined likelihood and severity values. These risk scores are then assigned to all data points obtained from the OSM data in get_data.ipynb. In addition, no-fly zones are also added to the dataset.

The result is exported as a GeoJSON file, which can be used as input for get_graph.ipynb.

In [34]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os

### 0. Specify the area

Set folder_name to the folder used for the desired area, and set file_name to the corresponding area file name (including .geojson).
Make sure to use the same boundaries as in get_data.ipynb to correctly identify the no-fly zones for the selected area.

In [35]:
# city = 'breda'
# city = 'borsele'
city = 'alphen-waddinxveen' # lowercase name of the folder

In [36]:
# file_name = 'breda.geojson' # name of the file to be created
# file_name = 'alphen-waddinxveen.geojson' # name of the file to be created
file_name = f'{city}.geojson' # name of the file to be created

In [37]:
# boundaries = ['Breda, Noord-Brabant, Netherlands']

boundaries = [
    'Alphen aan den Rijn, Zuid-Holland, Netherlands',
    'Waddinxveen, Zuid-Holland, Netherlands',
    'Boskoop, Zuid-Holland, Netherlands'
] # name of the boundaries to be used in the file

#boundaries = ['Borsele, Zeeland, Netherlands']


In [38]:
no_catastrophic = True

In [39]:
output_path = "output/" + city

os.makedirs(output_path, exist_ok=True)

## 1. Calculate risks

In [40]:
df = pd.read_csv("/Users/cmartens/Documents/thesis_cf_martens/2.risk_analysis/input/risk_scores.csv") 


In [41]:
df

Unnamed: 0,area_type,Sf,Sp,Ss,L1,L2,L3,L4,L5,Height
0,Motorways and major roads,4.0,3.0,1.0,3.0,2.0,1.0,1.0,3.0,30.0
1,Regional roads,3.0,3.0,2.0,4.0,3.0,1.0,3.0,2.0,30.0
2,Tracks and rural access roads,2.0,2.0,1.0,2.0,1.0,3.0,2.0,2.0,30.0
3,Living and residential streets,3.0,4.0,4.0,5.0,4.0,1.0,5.0,3.0,30.0
4,Pedestrian and cycling paths,3.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,30.0
5,Railways,4.0,3.0,2.0,5.0,2.0,2.0,1.0,4.0,30.0
6,Power lines,1.0,4.0,1.0,2.0,2.0,3.0,1.0,5.0,60.0
7,Power plants,1.0,4.0,2.0,4.0,3.0,2.0,3.0,5.0,60.0
8,Communication towers,1.0,3.0,1.0,4.0,5.0,1.0,2.0,4.0,60.0
9,High infrastructures,1.0,3.0,2.0,4.0,2.0,1.0,3.0,3.0,60.0


### 1.1 Define alpha

In [42]:
alpha_f = 0.4  # fatality
alpha_p = 0.3  # property
alpha_s = 0.3  # societal

### 1.2 Calculate risk

In [43]:
# Aantal externe risicofactoren
n_factors = 5

# Bereken cumulatieve bijdrage per domein (zonder deling)
R_f_total = 0
R_p_total = 0

# Loop over externe risicofactoren
for i in range(1, n_factors + 1):
    # Bereken afzonderlijke fatality- en property-risicobijdragen
    df[f"R_if_{i}"] = df["Sf"] * df[f"L{i}"]
    df[f"R_ip_{i}"] = df["Sp"] * df[f"L{i}"]
    
    # Tel cumulatief op
    R_f_total += df[f"R_if_{i}"]
    R_p_total += df[f"R_ip_{i}"]

# Cumulatieve crash-gerelateerde risico's
df["R_f"] = R_f_total
df["R_p"] = R_p_total

# Sociaal risico (losstaand van risicofactoren)
df["R_s"] = df["Ss"]

# Normaliseer fatality, property en societal domeinen afzonderlijk
df["R_f_norm"] = (df["R_f"] - df["R_f"].min()) / (df["R_f"].max() - df["R_f"].min())
df["R_p_norm"] = (df["R_p"] - df["R_p"].min()) / (df["R_p"].max() - df["R_p"].min())
df["R_s_norm"] = (df["R_s"] - df["R_s"].min()) / (df["R_s"].max() - df["R_s"].min())

# Gebruik genormaliseerde risico's in gewogen som
df["risk"] = (
    alpha_f * df["R_f_norm"] +
    alpha_p * df["R_p_norm"] +
    alpha_s * df["R_s_norm"]
)

df["risk"] = df["risk"].round(3)

# Sorteer op hoogste totaalrisico
df_sorted = df.sort_values("risk", ascending=False)


In [44]:
df_sorted.drop(columns=["Height"], inplace=True)

In [45]:
df_sorted

Unnamed: 0,area_type,Sf,Sp,Ss,L1,L2,L3,L4,L5,R_if_1,...,R_ip_4,R_if_5,R_ip_5,R_f,R_p,R_s,R_f_norm,R_p_norm,R_s_norm,risk
22,Schools and universities,4.0,3.0,4.0,5.0,4.0,1.0,4.0,3.0,20.0,...,12.0,12.0,9.0,68.0,51.0,4.0,1.0,0.576923,1.0,0.873
3,Living and residential streets,3.0,4.0,4.0,5.0,4.0,1.0,5.0,3.0,15.0,...,20.0,9.0,12.0,54.0,72.0,4.0,0.774194,0.846154,1.0,0.864
12,Retail zones,4.0,4.0,3.0,5.0,4.0,1.0,4.0,3.0,20.0,...,16.0,12.0,12.0,68.0,68.0,3.0,1.0,0.794872,0.666667,0.838
13,Residential areas,2.0,4.0,4.0,5.0,4.0,1.0,5.0,3.0,10.0,...,20.0,6.0,12.0,36.0,72.0,4.0,0.483871,0.846154,1.0,0.747
23,Hospitals,2.0,3.0,4.0,5.0,4.0,1.0,4.0,4.0,10.0,...,12.0,8.0,12.0,36.0,54.0,4.0,0.483871,0.615385,1.0,0.678
11,Commercial zones,3.0,3.0,3.0,5.0,4.0,1.0,4.0,3.0,15.0,...,12.0,9.0,9.0,51.0,51.0,3.0,0.725806,0.576923,0.666667,0.663
10,Industrial zones,2.0,4.0,2.0,5.0,5.0,1.0,5.0,5.0,10.0,...,20.0,10.0,20.0,42.0,84.0,2.0,0.580645,1.0,0.333333,0.632
28,Parks,4.0,2.0,3.0,5.0,2.0,2.0,3.0,2.0,20.0,...,6.0,8.0,4.0,56.0,28.0,3.0,0.806452,0.282051,0.666667,0.607
26,Cultural sites,4.0,3.0,3.0,4.0,2.0,1.0,3.0,2.0,16.0,...,9.0,8.0,6.0,48.0,36.0,3.0,0.677419,0.384615,0.666667,0.586
14,Recreational zones,4.0,2.0,3.0,4.0,2.0,2.0,3.0,2.0,16.0,...,6.0,8.0,4.0,52.0,26.0,3.0,0.741935,0.25641,0.666667,0.574


In [46]:
df_risks = df.copy()


In [47]:
# if no_catastrophic:
#     print("No catastrophic risks")
#     df_risks["risk"] = df_risks["risk"].astype(str)

#     df_risks.loc[
#         (df_risks["Sf"] == 4) | (df_risks["Sp"] == 4) | (df_risks["Ss"] == 4),
#         "risk"
#     ] = 'no_fly_zone'

In [48]:
df_risks

Unnamed: 0,area_type,Sf,Sp,Ss,L1,L2,L3,L4,L5,Height,...,R_ip_4,R_if_5,R_ip_5,R_f,R_p,R_s,R_f_norm,R_p_norm,R_s_norm,risk
0,Motorways and major roads,4.0,3.0,1.0,3.0,2.0,1.0,1.0,3.0,30.0,...,3.0,12.0,9.0,40.0,30.0,1.0,0.548387,0.307692,0.0,0.312
1,Regional roads,3.0,3.0,2.0,4.0,3.0,1.0,3.0,2.0,30.0,...,9.0,6.0,6.0,39.0,39.0,2.0,0.532258,0.423077,0.333333,0.44
2,Tracks and rural access roads,2.0,2.0,1.0,2.0,1.0,3.0,2.0,2.0,30.0,...,4.0,4.0,4.0,20.0,20.0,1.0,0.225806,0.179487,0.0,0.144
3,Living and residential streets,3.0,4.0,4.0,5.0,4.0,1.0,5.0,3.0,30.0,...,20.0,9.0,12.0,54.0,72.0,4.0,0.774194,0.846154,1.0,0.864
4,Pedestrian and cycling paths,3.0,2.0,3.0,4.0,2.0,2.0,2.0,2.0,30.0,...,4.0,6.0,4.0,36.0,24.0,3.0,0.483871,0.230769,0.666667,0.463
5,Railways,4.0,3.0,2.0,5.0,2.0,2.0,1.0,4.0,30.0,...,3.0,16.0,12.0,56.0,42.0,2.0,0.806452,0.461538,0.333333,0.561
6,Power lines,1.0,4.0,1.0,2.0,2.0,3.0,1.0,5.0,60.0,...,4.0,5.0,20.0,13.0,52.0,1.0,0.112903,0.589744,0.0,0.222
7,Power plants,1.0,4.0,2.0,4.0,3.0,2.0,3.0,5.0,60.0,...,12.0,5.0,20.0,17.0,68.0,2.0,0.177419,0.794872,0.333333,0.409
8,Communication towers,1.0,3.0,1.0,4.0,5.0,1.0,2.0,4.0,60.0,...,6.0,4.0,12.0,16.0,48.0,1.0,0.16129,0.538462,0.0,0.226
9,High infrastructures,1.0,3.0,2.0,4.0,2.0,1.0,3.0,3.0,60.0,...,9.0,3.0,9.0,13.0,39.0,2.0,0.112903,0.423077,0.333333,0.272


## 2. Risk scores for gdf

In [49]:
gdf = gpd.read_file(f"/Users/cmartens/Documents/thesis_cf_martens/1.get_osm_data/output/{city}/osm_data_{city}.geojson")

In [50]:
gdf['area_type'].unique()

array(['Pedestrian and cycling paths', 'Tracks and rural access roads',
       'Regional roads', 'Living and residential streets',
       'Motorways and major roads', 'Railways', 'High infrastructures',
       'Communication towers', 'Power lines', 'Power plants',
       'Agricultural lands', 'Residential areas', 'Industrial zones',
       'Recreational zones', 'Retail zones', 'Commercial zones',
       'Meadows and open grass', 'Forests and woodlands', 'Wetlands',
       'Rivers, canals and streams', 'Lakes and ponds', 'Religious sites',
       'Schools and universities', 'Cultural sites', 'Prisons',
       'Hospitals', 'postnl point'], dtype=object)

In [51]:
gdf = gdf.merge(df_risks[['area_type', 'risk', 'Height']], on='area_type', how='left')

##### All postnl data gets risk = 0

In [52]:
# if area_type is postnl then set risk to 0
gdf.loc[gdf['area_type'] == 'postnl point', 'risk'] = 0
gdf.loc[gdf['area_type'] == 'postnl point', 'Height'] = 0

In [53]:
# print rows with risk == nan
gdf[gdf['risk'].isna()]

Unnamed: 0,name,id,description,area_type,category,geometry,risk,Height


In [54]:
gdf['risk'].unique()

array([0.463, 0.144, 0.44 , 0.864, 0.312, 0.561, 0.272, 0.226, 0.222,
       0.409, 0.066, 0.747, 0.632, 0.574, 0.838, 0.663, 0.01 , 0.062,
       0.   , 0.039, 0.556, 0.873, 0.586, 0.529, 0.678])

In [55]:
gdf['risk'] = gdf['risk'].round(3)

In [56]:
gdf.to_file((f"/Users/cmartens/Documents/thesis_cf_martens/2.risk_analysis/output/{city}/osm_data_with_risk_{city}.geojson"), driver="GeoJSON")