# Assign Risks and Get No-Fly zones

This notebook is step 2 in the process of creating a graph for a specific area.
To run this notebook, you must first execute get_data.ipynb for the desired area.

In this step, risk scores are calculated for each area type based on predefined likelihood and severity values. These risk scores are then assigned to all data points obtained from the OSM data in get_data.ipynb. In addition, no-fly zones are also added to the dataset.

The result is exported as a GeoJSON file, which can be used as input for get_graph.ipynb.

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os

### 0. Specify the area

Set folder_name to the folder used for the desired area, and set file_name to the corresponding area file name (including .geojson).
Make sure to use the same boundaries as in get_data.ipynb to correctly identify the no-fly zones for the selected area.

In [None]:
# city = 'breda'
# city = 'borsele'
city = 'alphen-waddinxveen' # lowercase name of the folder

In [None]:
# file_name = 'breda.geojson' # name of the file to be created
# file_name = 'alphen-waddinxveen.geojson' # name of the file to be created
file_name = f'{city}.geojson' # name of the file to be created

In [None]:
# boundaries = ['Breda, Noord-Brabant, Netherlands']

boundaries = [
    'Alphen aan den Rijn, Zuid-Holland, Netherlands',
    'Waddinxveen, Zuid-Holland, Netherlands',
    'Boskoop, Zuid-Holland, Netherlands'
] # name of the boundaries to be used in the file

#boundaries = ['Borsele, Zeeland, Netherlands']


In [None]:
no_catastrophic = True

In [None]:
output_path = "output/" + city

os.makedirs(output_path, exist_ok=True)

## 1. Calculate risks

In [None]:
df = pd.read_csv("/Users/cmartens/Documents/thesis_cf_martens/2.risk_analysis/input/risk_scores.csv") 


In [None]:
df

### 1.1 Define alpha

In [None]:
alpha_f = 0.4  # fatality
alpha_p = 0.3  # property
alpha_s = 0.3  # societal

### 1.2 Calculate risk

In [None]:
# Aantal externe risicofactoren
n_factors = 5

# Bereken cumulatieve bijdrage per domein (zonder deling)
R_f_total = 0
R_p_total = 0

# Loop over externe risicofactoren
for i in range(1, n_factors + 1):
    # Bereken afzonderlijke fatality- en property-risicobijdragen
    df[f"R_if_{i}"] = df["Sf"] * df[f"L{i}"]
    df[f"R_ip_{i}"] = df["Sp"] * df[f"L{i}"]
    
    # Tel cumulatief op
    R_f_total += df[f"R_if_{i}"]
    R_p_total += df[f"R_ip_{i}"]

# Cumulatieve crash-gerelateerde risico's
df["R_f"] = R_f_total
df["R_p"] = R_p_total

# Sociaal risico (losstaand van risicofactoren)
df["R_s"] = df["Ss"]

# Normaliseer fatality, property en societal domeinen afzonderlijk
df["R_f_norm"] = (df["R_f"] - df["R_f"].min()) / (df["R_f"].max() - df["R_f"].min())
df["R_p_norm"] = (df["R_p"] - df["R_p"].min()) / (df["R_p"].max() - df["R_p"].min())
df["R_s_norm"] = (df["R_s"] - df["R_s"].min()) / (df["R_s"].max() - df["R_s"].min())

# Gebruik genormaliseerde risico's in gewogen som
df["risk"] = (
    alpha_f * df["R_f_norm"] +
    alpha_p * df["R_p_norm"] +
    alpha_s * df["R_s_norm"]
)

df["risk"] = df["risk"].round(3)

# Sorteer op hoogste totaalrisico
df_sorted = df.sort_values("risk", ascending=False)


In [None]:
df_sorted.drop(columns=["Height"], inplace=True)

In [None]:
df_sorted

In [None]:
df_sorted[['area_type', 'Sf', 'Sp', 'Ss', 'L1', 'L2', 'L3', 'L4', 'L5', 'risk']]

In [None]:
df_risks = df.copy()


In [None]:
# if no_catastrophic:
#     print("No catastrophic risks")
#     df_risks["risk"] = df_risks["risk"].astype(str)

#     df_risks.loc[
#         (df_risks["Sf"] == 4) | (df_risks["Sp"] == 4) | (df_risks["Ss"] == 4),
#         "risk"
#     ] = 'no_fly_zone'

In [None]:
df_risks

## 2. Risk scores for gdf

In [None]:
gdf = gpd.read_file(f"/Users/cmartens/Documents/thesis_cf_martens/1.get_osm_data/output/{city}/osm_data_{city}.geojson")

In [None]:
gdf['area_type'].unique()

In [None]:
gdf = gdf.merge(df_risks[['area_type', 'risk', 'Height']], on='area_type', how='left')

##### All postnl data gets risk = 0

In [None]:
# if area_type is postnl then set risk to 0
gdf.loc[gdf['area_type'] == 'postnl point', 'risk'] = 0
gdf.loc[gdf['area_type'] == 'postnl point', 'Height'] = 0

In [None]:
# print rows with risk == nan
gdf[gdf['risk'].isna()]

In [None]:
gdf['risk'].unique()

In [None]:
gdf['risk'] = gdf['risk'].round(3)

In [None]:
gdf.to_file((f"/Users/cmartens/Documents/thesis_cf_martens/2.risk_analysis/output/{city}/osm_data_with_risk_{city}.geojson"), driver="GeoJSON")