Diese Query zieht sich eine Random Zeile aus der Stop Tabelle und printed Attributes

In [1]:
import psycopg2
conn = psycopg2.connect(host="localhost", dbname="postgres", user="postgres", password="1234")
with conn.cursor() as cur:
    cur.execute("""
        SELECT stop_id, eva, ar_ts, dp_ts
        FROM stops
        ORDER BY random()
        LIMIT 1;
    """)
    row = cur.fetchone()

print("PK =", (row[0]))
print("rest =", {"eva": row[1], "ar_ts": row[2], "dp_ts": row[3]})
conn.close()

PK = -9196991393569848217-2510020541-20
rest = {'eva': 8089109, 'ar_ts': datetime.datetime(2025, 10, 2, 6, 34, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))), 'dp_ts': datetime.datetime(2025, 10, 2, 6, 34, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)))}


Die Query berechnet die durchschnittliche Verspätung global über ganz Berlin (unter der Bedingung, dass eine Versopätung vorliegt)

In [2]:
import psycopg2
conn = psycopg2.connect(host="localhost", dbname="postgres", user="postgres", password="1234")
# Durchschnittliche Abfahrtsverspätung (nur Zeilen mit dp_ts UND actual_departure != NULL)
with conn.cursor() as cur:
    cur.execute("""
        SELECT
            AVG(EXTRACT(EPOCH FROM (actual_departure - dp_ts))) AS avg_delay_seconds,
            COUNT(*) AS n
        FROM public.stops
        WHERE dp_ts IS NOT NULL
          AND actual_departure IS NOT NULL;
    """)
    avg_delay_seconds, n = cur.fetchone()

print("N =", n)
print("Ø Abfahrtsverspätung (Sek.) =", round(avg_delay_seconds, 2))
print("Ø Abfahrtsverspätung (Min.) =", round((float(avg_delay_seconds) / 60.0), 2) if avg_delay_seconds is not None else None)
conn.close()

N = 870984
Ø Abfahrtsverspätung (Sek.) = 123.66
Ø Abfahrtsverspätung (Min.) = 2.06


Diese Query berechnet die durchschnittliche Verspätung bei einer Station (unter der Bedingung, dass eine Verspätung vorliegt) je Station

In [3]:
# Ø Abfahrtsverspätung nach Station (größte zuerst)
import psycopg2
conn = psycopg2.connect(host="localhost", dbname="postgres", user="postgres", password="1234")
with conn.cursor() as cur:
    cur.execute("""
        SELECT
            st.eva,
            st.name AS station_name,
            COUNT(*) AS n,
            AVG(EXTRACT(EPOCH FROM (s.actual_departure - s.dp_ts))) AS avg_delay_seconds
        FROM public.stops s
        JOIN public.stationen st
          ON st.eva = s.eva
        WHERE s.dp_ts IS NOT NULL
          AND s.actual_departure IS NOT NULL
        GROUP BY st.eva, st.name
        HAVING COUNT(*) > 0
        ORDER BY avg_delay_seconds DESC;
    """)
    rows = cur.fetchall()

# Beispiel: Top 20 ausgeben
for eva, name, n, avg_s in rows[:20]:
    print(f"{eva} | {name:40s} | n={n:7d} | Ø={avg_s/60:7.2f} min")
conn.close()

8010404 | Berlin-Spandau                           | n=  10801 | Ø=   9.92 min
8011160 | Berlin Hauptbahnhof                      | n=  10697 | Ø=   9.57 min
8011113 | Berlin Südkreuz                          | n=   4780 | Ø=   6.47 min
8010406 | Berlin Zoologischer Garten               | n=   7209 | Ø=   5.86 min
8010255 | Berlin Ostbahnhof                        | n=   7984 | Ø=   5.03 min
8010405 | Berlin-Wannsee                           | n=   4536 | Ø=   4.87 min
8010403 | Berlin-Charlottenburg                    | n=   3533 | Ø=   4.80 min
8011102 | Berlin Gesundbrunnen                     | n=   6582 | Ø=   4.13 min
8011155 | Alexanderplatz                           | n=   6299 | Ø=   3.99 min
8011306 | Berlin-Friedrichstraße                   | n=   6586 | Ø=   3.91 min
8011162 | Berlin Ostkreuz                          | n=  10124 | Ø=   3.62 min
8011118 | Berlin Potsdamer Platz                   | n=   4349 | Ø=   3.55 min
8011041 | Berlin-Lichterfelde Ost                  |

Diese Query nimmt eine Station entgegen und rechnet das durchschnittliche Delay aus. Annahme ist dabei, dass ein Delay vorhanden ist und falls ein Zug früher gekommen ist, wird das mit einberechnete in den Durchschnitt.

Lösung zu 2.4

In [11]:
import psycopg2

SQL_AVG_DELAY_FOR_STATION = """
SELECT
    st.eva,
    st.name AS station_name,
    COUNT(*) AS n,
    AVG(EXTRACT(EPOCH FROM (s.actual_departure - s.dp_ts))) AS avg_delay_seconds
FROM public.stops s
JOIN public.stationen st
  ON st.eva = s.eva
WHERE s.dp_ts IS NOT NULL
  AND s.actual_departure IS NOT NULL
  AND st.name ILIKE %(station_name)s
GROUP BY st.eva, st.name;
"""

def avg_delay_for_station(conn, station_name: str):
    with conn.cursor() as cur:
        cur.execute(SQL_AVG_DELAY_FOR_STATION, {"station_name": station_name})
        row = cur.fetchone()   # None, falls keine Station / keine Daten
    return row

conn = psycopg2.connect(host="localhost", dbname="postgres", user="postgres", password="1234")

station = "Berlin Hauptbahnhof"
row = avg_delay_for_station(conn, station)

if row is None:
    print("Keine passende Station oder keine gültigen Stop-Daten gefunden.")
else:
    eva, name, n, avg_s = row
    print(f"{eva} | {name} | n={n} | Ø={avg_s/60:.2f} min")

conn.close()


8011160 | Berlin Hauptbahnhof | n=10697 | Ø=9.57 min


Diese Query gibt die Koordinaten aller Stationen zurück und das folgende Skript visualisiert die Stationen um zu checken, ob die Koordinaten richtig extrhiert wurden.

In [4]:
# Falls nötig (einmalig): !pip install folium
import psycopg2
import folium

# --- DB connect ---
conn = psycopg2.connect(host="localhost", dbname="postgres", user="postgres", password="1234")

# 1) Stationen aus DB holen (Berlin-BBox)
BERLIN_BBOX = {
    "lat_min": 52.3,
    "lat_max": 52.7,
    "lon_min": 13.0,
    "lon_max": 13.85,
}

with conn.cursor() as cur:
    cur.execute(
        """
        SELECT eva, name, latitude, longitude
        FROM public.stationen
        WHERE latitude  IS NOT NULL
          AND longitude IS NOT NULL
          AND latitude  BETWEEN %s AND %s
          AND longitude BETWEEN %s AND %s
        """,
        (BERLIN_BBOX["lat_min"], BERLIN_BBOX["lat_max"], BERLIN_BBOX["lon_min"], BERLIN_BBOX["lon_max"]),
    )
    rows = cur.fetchall()

print("Stationen (Berlin-BBox):", len(rows))

# 2) Karten-Zentrum bestimmen
if rows:
    lats = [r[2] for r in rows]
    lons = [r[3] for r in rows]
    center = [sum(lats) / len(lats), sum(lons) / len(lons)]
else:
    center = [52.52, 13.405]

# 3) OSM-Karte erzeugen (kein Clustering, crisp Marker)
POINT_RADIUS = 7
FONT_SIZE_PX = 16

m = folium.Map(
    location=center,
    zoom_start=11,
    tiles="OpenStreetMap",
    prefer_canvas=True,  # bei vielen Punkten oft performanter/cleaner
)

fg = folium.FeatureGroup(name="Stationen", show=True).add_to(m)

for eva, name, lat, lon in rows:
    tooltip_html = f'<div style="font-size:{FONT_SIZE_PX}px; font-weight:600;">{name}</div>'
    popup_html = f"""
    <div style="font-size:{FONT_SIZE_PX}px;">
      <b>{name}</b><br>
      EVA: {eva}<br>
      ({lat:.6f}, {lon:.6f})
    </div>
    """

    folium.CircleMarker(
        location=[lat, lon],
        radius=POINT_RADIUS,
        weight=3,          # klarer Rand
        opacity=1.0,       # Rand voll deckend
        fill=True,
        fill_opacity=1.0,  # Punkt voll deckend
        tooltip=folium.Tooltip(tooltip_html, sticky=True),
        popup=folium.Popup(popup_html, max_width=350),
    ).add_to(fg)

folium.LayerControl().add_to(m)

conn.close()
m

Stationen (Berlin-BBox): 133


Diese Query nimmt sich Lat/Long und gibt die nächste Station zurück. Darüber hinaus wird es auch grafisch gecheckt.

Lösung zu 2.2

In [None]:
import folium
from folium.plugins import MarkerCluster

SQL_NEAREST = """
SELECT
  eva,
  name,
  latitude,
  longitude,
  6371000 * 2 * asin(
    sqrt(
      power(sin(radians(%(lat)s - latitude) / 2), 2) +
      cos(radians(latitude)) * cos(radians(%(lat)s)) *
      power(sin(radians(%(lon)s - longitude) / 2), 2)
    )
  ) AS distance_m
FROM public.stationen
WHERE latitude IS NOT NULL AND longitude IS NOT NULL
ORDER BY distance_m
LIMIT 1;
"""

SQL_ALL = """
SELECT eva, name, latitude, longitude
FROM public.stationen
WHERE latitude IS NOT NULL AND longitude IS NOT NULL;
"""

def get_nearest_station(conn, lat, lon):
    with conn.cursor() as cur:
        cur.execute(SQL_NEAREST, {"lat": lat, "lon": lon})
        row = cur.fetchone()
    if not row:
        raise RuntimeError("Keine Station mit gültigen latitude/longitude gefunden.")
    eva, name, st_lat, st_lon, dist_m = row
    return {"eva": eva, "name": name, "lat": st_lat, "lon": st_lon, "dist_m": float(dist_m)}

def get_all_stations(conn):
    with conn.cursor() as cur:
        cur.execute(SQL_ALL)
        rows = cur.fetchall()
    return [{"eva": eva, "name": name, "lat": lat, "lon": lon} for (eva, name, lat, lon) in rows]

def make_map(lat, lon, stations, nearest, out_html="station_map.html"):
    m = folium.Map(location=[lat, lon], zoom_start=12, tiles="OpenStreetMap")

    # Marker für Eingabekoordinate
    folium.Marker(
        [lat, lon],
        popup=f"Input: ({lat:.6f}, {lon:.6f})",
        tooltip="Eingabe-Koordinate",
        icon=folium.Icon(color="blue", icon="info-sign"),
    ).add_to(m)

    # Alle Stationen (Cluster)
    cluster = MarkerCluster(name="Alle Stationen").add_to(m)
    for s in stations:
        folium.CircleMarker(
            [s["lat"], s["lon"]],
            radius=3,
            popup=f'{s["name"]} (EVA {s["eva"]})',
            tooltip=s["name"],
            fill=True,
        ).add_to(cluster)

    # Nächste Station hervorheben
    folium.Marker(
        [nearest["lat"], nearest["lon"]],
        popup=f'Nächste Station: {nearest["name"]} (EVA {nearest["eva"]})<br>'
              f'Distanz: {nearest["dist_m"]:.1f} m',
        tooltip="Nächste Station",
        icon=folium.Icon(color="red", icon="flag"),
    ).add_to(m)

    # Linie zwischen Input und nächster Station
    folium.PolyLine(
        [[lat, lon], [nearest["lat"], nearest["lon"]]],
        tooltip=f'{nearest["dist_m"]:.1f} m',
    ).add_to(m)

    folium.LayerControl().add_to(m)

    # Viewport sinnvoll setzen (Input + nächste Station)
    m.fit_bounds([[lat, lon], [nearest["lat"], nearest["lon"]]])
    return m

In [16]:
# Beispielwerte:
input_lat = 52.520008
input_lon = 13.404954

conn = psycopg2.connect(host="localhost", dbname="postgres", user="postgres", password="1234")

stations = get_all_stations(conn)
nearest = get_nearest_station(conn, input_lat, input_lon)

out_file = make_map(input_lat, input_lon, stations, nearest, out_html="station_map.html")
print("Nächste Station:", nearest)

conn.close()

Nächste Station: {'eva': 8089017, 'name': 'Hackescher Markt', 'lat': 52.522622, 'lon': 13.402364, 'dist_m': 339.4004282516994}


Diese Query nimmt einen Name einer Station und gibt Identifier, Lat/Long zurück.

Lösung zu 2.1

In [10]:
conn = psycopg2.connect(host="localhost", dbname="postgres", user="postgres", password="1234")

SQL_BY_NAME = """
SELECT eva, latitude, longitude
FROM public.stationen
WHERE name ILIKE %(name)s
LIMIT 1;
"""

def get_station_by_name(conn, station_name: str):
    with conn.cursor() as cur:
        cur.execute(SQL_BY_NAME, {"name": station_name})
        row = cur.fetchone()  # None, wenn keine Zeile gefunden wurde

    if row is None:
        return None

    eva, lat, lon = row
    return {"eva": eva, "latitude": lat, "longitude": lon}

# Beispiel:
result = get_station_by_name(conn, "Hackescher Markt")
print(result)

conn.close()

{'eva': 8089017, 'latitude': 52.522622, 'longitude': 13.402364}


Diese Query nimmt einen Timesnapshot entgegen (`YYMMDDHH`) und gibt die Anzahl aller gecancelten Stopps zurück.

Lösung zu 2.3

In [20]:
SQL_CANCELLED_IN_SNAPSHOT = """
WITH bounds AS (
  SELECT
    to_timestamp(%(snap)s, 'YYMMDDHH24') AS t0,
    to_timestamp(%(snap)s, 'YYMMDDHH24') + INTERVAL '1 hour' AS t1
)
SELECT
  COUNT(DISTINCT s.stop_id) AS cancelled_stops
FROM public.stops s
CROSS JOIN bounds b
WHERE
  (s.arrival_cs = 'c'
   AND s.cancelled_arrival IS NOT NULL
   AND s.cancelled_arrival >= b.t0
   AND s.cancelled_arrival <  b.t1)
  OR
  (s.departure_cs = 'c'
   AND s.cancelled_departure IS NOT NULL
   AND s.cancelled_departure >= b.t0
   AND s.cancelled_departure <  b.t1);
"""

def count_cancelled_stops(conn, snapshot_yymmddhh: str) -> int:
    with conn.cursor() as cur:
        cur.execute(SQL_CANCELLED_IN_SNAPSHOT, {"snap": snapshot_yymmddhh})
        (n,) = cur.fetchone()
    return int(n)

# Beispiel:
conn = psycopg2.connect(host="localhost", dbname="postgres", user="postgres", password="1234")
snapshot = "25091515" # 15.09.2025 at 3pm
print(count_cancelled_stops(conn, snapshot))
conn.close()

68
