In [7]:
import pandas as pd
import json
import plotly.express as px
from urllib.request import urlopen
import re
import numpy as np

#Load map of Amsterdam
GEOJSON_URL = "https://maps.amsterdam.nl/open_geodata/geojson_lnglat.php?KAARTLAAG=INDELING_STADSDEEL&THEMA=gebiedsindeling"
with urlopen(GEOJSON_URL) as response:
    stadsdelen = json.load(response)

In [8]:
# --- Sensorlocaties ---
df = pd.read_csv("sensor-location.xlsx - Sheet1.csv")
df[["Lat", "Long"]] = df["Lat/Long"].str.split(",", expand=True)
df["Lat"]  = pd.to_numeric(df["Lat"].astype(str).str.strip(),  errors="coerce")
df["Long"] = pd.to_numeric(df["Long"].astype(str).str.strip(), errors="coerce")

#Some sensors don't have "Breedte", so it gives "Effectieve breedte"
for col in ["Breedte", "Effectieve breedte"]:
    if col in df.columns:
        df[col] = df[col].fillna("Effectieve breedte")

# if "Breedte" in df.columns or "Effectieve breedte" in df.columns: 
#     b = pd.to_numeric(df.get("Breedte"), errors="coerce") # 
#     be = pd.to_numeric(df.get("Effectieve breedte"), errors="coerce")
#     df["Breedte_eff"] = np.where(~be.isna(), be, b)
#----Predicted data ----
cs = pd.read_csv("predicted_sensor_values_3min.csv", parse_dates=["timestamp"]) #use real time 
cs = cs.sort_values("timestamp") #sort on time

In [9]:
#only predicted, remove actual sensors  
def parse_col(col):
    """
    Verwacht 'OBJECT_hoek', bv 'CMSA-GAWW-11_120'.
    Geeft (objectnummer, hoek[int] of None) terug.
    """
    if col == "timestamp":
        return (None, None)
    m = re.match(r"^(.*)_(\d+)$", col)
    if m:
        return (m.group(1), int(m.group(2)))
    else:
        # Geen hoek in kolomnaam (komt soms voor); dan alleen object.
        return (col, None)

sensor_cols = [c for c in cs.columns if c != "timestamp"]
parsed = [parse_col(c) for c in sensor_cols]
obj_nums = [p[0] for p in parsed]
angles  = [p[1] for p in parsed]

col_map = pd.DataFrame({
    "column": sensor_cols,
    "Objectnummer": obj_nums,
    "hoek": angles})
#only actual values 

In [10]:
# --- 4) Long/tidy maken: (timestamp, Objectnummer, hoek, value) ---
#    Smelt de forecasts en merge met col_map om hoek/object toe te voegen
cs_long = (
    cs.melt(id_vars="timestamp", var_name="column", value_name="value")
      .merge(col_map, on="column", how="left")
)

# Filter rijen zonder geldig Objectnummer of hoek (we willen richting-specifieke waarden)
cs_long = cs_long[cs_long["Objectnummer"].notna() & cs_long["hoek"].notna()].copy()
cs_long["hoek"] = cs_long["hoek"].astype(int)

# --- 5) Locatie-info toevoegen (df heeft geen hoek; hoek blijft in cs_long) ---
cs_loc = cs_long.merge(df, on="Objectnummer", how="left")

In [16]:
# --- SANITY CHECKS ---

# 1️⃣ Unieke sensoren uit predicted_sensor_values
unique_pred_sensors = sorted(set(col_map["Objectnummer"].dropna()))
n_pred_sensors = len(unique_pred_sensors)
print(f"Aantal unieke sensoren in predicted_sensor_values: {n_pred_sensors}")

# 2️⃣ Unieke sensoren uit sensor-location
unique_loc_sensors = sorted(set(df["Objectnummer"].dropna()))
n_loc_sensors = len(unique_loc_sensors)
print(f"Aantal unieke sensoren in sensor-location: {n_loc_sensors}")

# 3️⃣ Hoeveel matches tussen de twee
matched = set(unique_pred_sensors) & set(unique_loc_sensors)
unmatched_pred = set(unique_pred_sensors) - set(unique_loc_sensors)
unmatched_loc  = set(unique_loc_sensors) - set(unique_pred_sensors)

print(f"→ Aantal gematchte sensoren: {len(matched)}")
print(f"→ Aantal niet-gematchte (alleen in predicted_sensor): {len(unmatched_pred)}")
print(f"→ Aantal niet-gematchte (alleen in sensor-location): {len(unmatched_loc)}")

# (optioneel) Bekijk de lijst van niet-gematchte namen
if unmatched_pred:
    print("\nNiet-gematchte sensoren in predicted_sensor_values:")
    print(sorted(list(unmatched_pred)))

if unmatched_loc:
    print("\nNiet-gematchte sensoren in sensor-location:")
    print(sorted(list(unmatched_loc)))



sensor_id = "CMSA-GAKH-01"   # <-- vervang door jouw sensornaam
cs_loc_sensor = cs_loc[cs_loc["Objectnummer"] == sensor_id]
cs_loc_sensor.head()



Aantal unieke sensoren in predicted_sensor_values: 12
Aantal unieke sensoren in sensor-location: 36
→ Aantal gematchte sensoren: 12
→ Aantal niet-gematchte (alleen in predicted_sensor): 0
→ Aantal niet-gematchte (alleen in sensor-location): 24

Niet-gematchte sensoren in sensor-location:
['GACM-04', 'GASA-01-A1', 'GASA-01-A2', 'GASA-01-B', 'GASA-01-C', 'GASA-02-01', 'GASA-02-02', 'GASA-03', 'GASA-04', 'GASA-05-O', 'GASA-05-W', 'GASA-06', 'GVCV-01', 'GVCV-03', 'GVCV-04', 'GVCV-05-A', 'GVCV-05-B', 'GVCV-06', 'GVCV-07', 'GVCV-08', 'GVCV-09', 'GVCV-11', 'GVCV-13', 'GVCV-14']


Unnamed: 0,timestamp,column,value,Objectnummer,hoek,Locatienaam,Lat/Long,Breedte,Effectieve breedte,Lat,Long
0,2025-08-20 13:00:00,CMSA-GAKH-01_0,3.457648,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
1,2025-08-20 13:00:00,CMSA-GAKH-01_0,3.463887,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
2,2025-08-20 13:00:00,CMSA-GAKH-01_0,3.46714,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
3,2025-08-20 13:00:00,CMSA-GAKH-01_0,3.485175,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071
4,2025-08-20 13:00:00,CMSA-GAKH-01_0,3.495088,CMSA-GAKH-01,0,Kalverstraat t.h.v. 1,"52.372634, 4.892071",8,67,52.372634,4.892071


In [None]:
cs_loc