In [24]:
import pandas as pd
import json
import plotly.express as px
from urllib.request import urlopen
import re
import numpy as np

#Load map of Amsterdam
GEOJSON_URL = "https://maps.amsterdam.nl/open_geodata/geojson_lnglat.php?KAARTLAAG=INDELING_STADSDEEL&THEMA=gebiedsindeling"
with urlopen(GEOJSON_URL) as response:
    stadsdelen = json.load(response)

In [None]:
# test = pd.read_csv("predicted_sensor_values_3min.csv")
# # Head van de twee kolommen
# same = (test["GASA-06_95"] == test["GASA-06-B_95"]).all()
# print(same)

# diff_count = (test["GASA-06_95"] != test["GASA-06-B_95"]).sum()
# print(f"Aantal verschillen: {diff_count}")

# # Boolean mask: True waar beide kolommen ≠ 0
# mask = (test["GASA-06_95"] != 0) & (test["GASA-06-B_95"] != 0)

# # Check of dat ooit voorkomt
# any_both_nonzero = mask.any()

# print(any_both_nonzero)


# --- Sensorlocaties ---
df = pd.read_csv("sensor-location.xlsx - Sheet1.csv")
df[["Lat", "Long"]] = df["Lat/Long"].str.split(",", expand=True)
df["Lat"]  = pd.to_numeric(df["Lat"].astype(str).str.strip(),  errors="coerce")
df["Long"] = pd.to_numeric(df["Long"].astype(str).str.strip(), errors="coerce")

#Some sensors don't have "Breedte", so it gives "Effectieve breedte"
for col in ["Breedte", "Effectieve breedte"]:
    if col in df.columns:
        df[col] = df[col].fillna("Effectieve breedte")

# if "Breedte" in df.columns or "Effectieve breedte" in df.columns: 
#     b = pd.to_numeric(df.get("Breedte"), errors="coerce") # 
#     be = pd.to_numeric(df.get("Effectieve breedte"), errors="coerce")
#     df["Breedte_eff"] = np.where(~be.isna(), be, b)
#----Predicted data ----
cs = pd.read_csv("predicted_sensor_values_3min.csv", parse_dates=["timestamp"]) #use real time 
cs = cs.sort_values("timestamp") #sort on time


#GASA-06 also has B. Based on the test, there is checked is there are scenarios where Both
#Columns have values. This isn't the case, so the values are added. 
base_sensor = "GASA-06"
angles = [95, 275]

for angle in angles:
    col_main = f"{base_sensor}_{angle}"
    col_b = f"{base_sensor}-B_{angle}"

    # Controleer of beide kolommen bestaan
    if col_main in cs.columns and col_b in cs.columns:
        # Tel ze bij elkaar op (als de ene 0 is en de andere >0 werkt dit ook prima)
        cs[col_main] = cs[col_main].fillna(0) + cs[col_b].fillna(0)
        # Verwijder de B-kolom, want die is nu samengevoegd
        cs = cs.drop(columns=col_b)


#test = pd.read_csv("predicted_sensor_values_3min.csv")
# # Head van de twee kolommen
# same = (test["GASA-06_95"] == test["GASA-06-B_95"]).all()
# print(same)

# diff_count = (test["GASA-06_95"] != test["GASA-06-B_95"]).sum()
# print(f"Aantal verschillen: {diff_count}")

# # Boolean mask: True waar beide kolommen ≠ 0
# mask = (test["GASA-06_95"] != 0) & (test["GASA-06-B_95"] != 0)

# # Check of dat ooit voorkomt
# any_both_nonzero = mask.any()

# print(any_both_nonzero)

In [27]:
  
def parse_col(col):
    """
    Verwacht 'OBJECT_hoek', bv 'CMSA-GAWW-11_120'.
    Geeft (objectnummer, hoek[int] of None) terug.
    """
    if col == "timestamp":
        return (None, None)
    m = re.match(r"^(.*)_(\d+)$", col)
    if m:
        return (m.group(1), int(m.group(2)))
    else:
        # Geen hoek in kolomnaam (komt soms voor); dan alleen object.
        return (col, None)

sensor_cols = [c for c in cs.columns if c != "timestamp"]
parsed = [parse_col(c) for c in sensor_cols]
obj_nums = [p[0] for p in parsed]
angles  = [p[1] for p in parsed]

col_map = pd.DataFrame({
    "column": sensor_cols,
    "Objectnummer": obj_nums,
    "hoek": angles})
#only actual values 

In [28]:
# --- 4) Long/tidy maken: (timestamp, Objectnummer, hoek, value) ---
#    Smelt de forecasts en merge met col_map om hoek/object toe te voegen
cs_long = (
    cs.melt(id_vars="timestamp", var_name="column", value_name="value")
      .merge(col_map, on="column", how="left")
)

# Filter rijen zonder geldig Objectnummer of hoek (we willen richting-specifieke waarden)
cs_long = cs_long[cs_long["Objectnummer"].notna() & cs_long["hoek"].notna()].copy()
cs_long["hoek"] = cs_long["hoek"].astype(int)

# --- 5) Locatie-info toevoegen (df heeft geen hoek; hoek blijft in cs_long) ---
cs_loc = cs_long.merge(df, on="Objectnummer", how="left")


#Gasa-06 also has GASA-06-B, so need to merge 

In [30]:
# --- SANITY CHECKS ---

# 1️⃣ Unieke sensoren uit predicted_sensor_values
unique_pred_sensors = sorted(set(col_map["Objectnummer"].dropna()))
n_pred_sensors = len(unique_pred_sensors)
print(f"Aantal unieke sensoren in predicted_sensor_values: {n_pred_sensors}")

# 2️⃣ Unieke sensoren uit sensor-location
unique_loc_sensors = sorted(set(df["Objectnummer"].dropna()))
n_loc_sensors = len(unique_loc_sensors)
print(f"Aantal unieke sensoren in sensor-location: {n_loc_sensors}")

# 3️⃣ Hoeveel matches tussen de twee
matched = set(unique_pred_sensors) & set(unique_loc_sensors)
unmatched_pred = set(unique_pred_sensors) - set(unique_loc_sensors)
unmatched_loc  = set(unique_loc_sensors) - set(unique_pred_sensors)

print(f"→ Aantal gematchte sensoren: {len(matched)}")
print(f"→ Aantal niet-gematchte (alleen in predicted_sensor): {len(unmatched_pred)}")
print(f"→ Aantal niet-gematchte (alleen in sensor-location): {len(unmatched_loc)}")

# (optioneel) Bekijk de lijst van niet-gematchte namen
if unmatched_pred:
    print("\nNiet-gematchte sensoren in predicted_sensor_values:")
    print(sorted(list(unmatched_pred)))

if unmatched_loc:
    print("\nNiet-gematchte sensoren in sensor-location:")
    print(sorted(list(unmatched_loc)))



sensor_id = "CMSA-GAKH-01"   # <-- vervang door jouw sensornaam
cs_loc_sensor = cs_loc[cs_loc["Objectnummer"] == sensor_id]
cs_loc_sensor.head()



Aantal unieke sensoren in predicted_sensor_values: 36
Aantal unieke sensoren in sensor-location: 36
→ Aantal gematchte sensoren: 36
→ Aantal niet-gematchte (alleen in predicted_sensor): 0
→ Aantal niet-gematchte (alleen in sensor-location): 0


Unnamed: 0,timestamp,column,value,Objectnummer,hoek,Locatienaam,Lat/Long,Breedte,Effectieve breedte,Lat,Long
0,2025-08-23 22:00:00,CMSA-GAKH-01_0,0.0,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
1,2025-08-23 22:03:00,CMSA-GAKH-01_0,0.0,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
2,2025-08-23 22:06:00,CMSA-GAKH-01_0,0.0,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
3,2025-08-23 22:09:00,CMSA-GAKH-01_0,0.0,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
4,2025-08-23 22:12:00,CMSA-GAKH-01_0,0.0,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071


In [None]:
cs_loc

Unnamed: 0,timestamp,column,value,Objectnummer,hoek,Locatienaam,Lat/Long,Breedte,Effectieve breedte,Lat,Long
0,2025-08-23 22:00:00,CMSA-GAKH-01_0,0.00,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
1,2025-08-23 22:03:00,CMSA-GAKH-01_0,0.00,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
2,2025-08-23 22:06:00,CMSA-GAKH-01_0,0.00,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
3,2025-08-23 22:09:00,CMSA-GAKH-01_0,0.00,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
4,2025-08-23 22:12:00,CMSA-GAKH-01_0,0.00,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
...,...,...,...,...,...,...,...,...,...,...,...
16275,2025-08-24 08:45:00,GVCV-14_270,2.15,GVCV-14,270,Buiksloterweg,"52.382169, 4.903385",35,31,52.382169,4.903385
16276,2025-08-24 08:48:00,GVCV-14_270,2.16,GVCV-14,270,Buiksloterweg,"52.382169, 4.903385",35,31,52.382169,4.903385
16277,2025-08-24 08:51:00,GVCV-14_270,2.18,GVCV-14,270,Buiksloterweg,"52.382169, 4.903385",35,31,52.382169,4.903385
16278,2025-08-24 08:54:00,GVCV-14_270,2.61,GVCV-14,270,Buiksloterweg,"52.382169, 4.903385",35,31,52.382169,4.903385


: 

: 

In [None]:
test = pd.read_csv("sensordata_SAIL2025.csv")
# Head van de twee kolommen


same = (test["GASA-06_95"] == test["GASA-06-B_95"]).all()
print(same)

diff_count = (test["GASA-06_95"] != test["GASA-06-B_95"]).sum()
print(f"Aantal verschillen: {diff_count}")

# Boolean mask: True waar beide kolommen ≠ 0
mask = (test["GASA-06_95"] != 0) & (test["GASA-06-B_95"] != 0)

# Check of dat ooit voorkomt
any_both_nonzero = mask.any()

print(any_both_nonzero)




False
Aantal verschillen: 767
False


: 

: 