# Finne høyfrekvente stasjoner

Vi ønsker å finne stasjoner/holdeplasser som tilfredsstiller ulike definisjoner av høyfrekvent.

In [5]:
import polars as pl
import polars.selectors as cs

### Data fra Entur

Tre tabeller er lastet ned fra Enturs datakatalog. SQL-koden er gjengitt under. 


Vi ser kun på en dato, nemlig 2026-01-15.

Koden under gir en rutetabell for Norge torsdag 15. januar 2026. 

```sql
SELECT 
    agency_name,
    route_id, route_short_name, route_long_name, route_type, 
    trip_id, trip_headsign, direction_id, shape_dist_traveled,
    stop_id, stop_sequence, 
    DATETIME(departure_time, "Europe/Oslo") AS departure_time_oslo,
    DATETIME(arrival_time, "Europe/Oslo") AS arrival_time_oslo
FROM `ent-data-sharing-ext-prd.timetable_gtfs.gtfs_last_recorded_ent_v1`
WHERE operating_date = '2026-01-15'
```

In [6]:
# resultatet fra koden over er lagret bq-results-20260206-080556-1770365220476.csv
alle_avganger = (
    pl.scan_csv("bq-results-20260206-080556-1770365220476.csv", schema_overrides={"route_short_name":pl.Utf8})
    .with_columns(
        pl.col("departure_time_oslo").str.to_datetime(format="%Y-%m-%d %H:%M:%S", time_zone="Europe/Oslo"),
        pl.col("arrival_time_oslo").str.to_datetime(format="%Y-%m-%d %H:%M:%S", time_zone="Europe/Oslo")
    )
    .rename({"departure_time_oslo":"departure_time", "arrival_time_oslo":"arrival_time"})
)

print(f"Det er {alle_avganger.select(pl.col("stop_id").len()).collect().item():,} antall rader i rutetabellen.\nSer sånn ut:")
alle_avganger.head(3).collect()

Det er 1,838,090 antall rader i rutetabellen.
Ser sånn ut:


agency_name,route_id,route_short_name,route_long_name,route_type,trip_id,trip_headsign,direction_id,shape_dist_traveled,stop_id,stop_sequence,departure_time,arrival_time
str,str,str,str,str,str,str,i64,i64,str,i64,"datetime[μs, Europe/Oslo]","datetime[μs, Europe/Oslo]"
"""Østfold kollektivtrafikk""","""OST:Line:1_477""","""477""","""Mysen vgs.""","""Bus""","""OST:ServiceJourney:477_2511110…","""Trøgstad-Krokedal snuplass""",1,11140,"""NSR:Quay:100103""",10,2026-01-15 14:14:00 CET,2026-01-15 14:14:00 CET
"""Østfold kollektivtrafikk""","""OST:Line:1_206""","""206""","""Krossern-Orkerød""","""Bus""","""OST:ServiceJourney:206_2511110…","""Moss sentrum""",1,1308,"""NSR:Quay:100110""",3,2026-01-15 18:17:00 CET,2026-01-15 18:17:00 CET
"""Østfold kollektivtrafikk""","""OST:Line:1_206""","""206""","""Krossern-Orkerød""","""Bus""","""OST:ServiceJourney:206_2511110…","""Moss sentrum""",1,1308,"""NSR:Quay:100110""",3,2026-01-15 17:47:00 CET,2026-01-15 17:47:00 CET


```sql
SELECT 
    id, version, publicCode, transportMode, name, shortName, 
    description, location_longitude, location_latitude, 
    parentRef.ref AS parentRef_ref, parentRef.version AS parentRef_version 
FROM 
    `ent-data-sharing-ext-prd.national_stop_registry.stop_places_all_versions
````

In [7]:
stoppesteder = (
    pl.scan_csv("bq-results-20260206-082744-1770366470007.csv")
    .filter(pl.col("version")==pl.col("version").max().over("id"))
) 
print(stoppesteder.select(pl.col("id").len().alias("nrow"),pl.col("id").unique().len().alias("n_unique_id")).collect())
stoppesteder.head(3).collect()

shape: (1, 2)
┌───────┬─────────────┐
│ nrow  ┆ n_unique_id │
│ ---   ┆ ---         │
│ u32   ┆ u32         │
╞═══════╪═════════════╡
│ 64438 ┆ 64438       │
└───────┴─────────────┘


id,version,publicCode,transportMode,name,shortName,description,location_longitude,location_latitude,parentRef_ref,parentRef_version
str,i64,str,str,str,str,str,f64,f64,str,i64
"""NSR:StopPlace:6459""",27,,"""BUS""","""Heimdalsgata""",,"""i Trondheimsveien""",10.760974,59.918394,"""NSR:StopPlace:58253""",20.0
"""NSR:StopPlace:5899""",7,,"""BUS""","""Vestbyveien""",,"""i Bekkenstenveien""",10.887819,59.953365,"""NSR:StopPlace:59645""",6.0
"""NSR:StopPlace:7215""",4,,"""BUS""","""Espa E6""",,"""mot Oslo""",11.25553,60.563193,,


``` sql
SELECT 
    id, version, publicCode, name, description, 
    location_longitude, location_latitude, stopPlaceRef, 
    stopPlaceVersion 
FROM 
    `ent-data-sharing-ext-prd.national_stop_registry.quays_all_versions`
```

In [8]:
quays = (
    pl.scan_csv("bq-results-20260206-082949-1770366598757.csv")
    .filter(pl.col("version")==pl.col("version").max().over("id"))
)
quays.head(3).collect()

id,version,publicCode,name,description,location_longitude,location_latitude,stopPlaceRef,stopPlaceVersion
str,i64,str,str,str,f64,f64,str,i64
"""NSR:Quay:1""",36,"""""",,,10.75525,59.909548,"""NSR:StopPlace:2""",36
"""NSR:Quay:1000""",17,"""4""",,,7.987168,58.14577,"""NSR:StopPlace:609""",17
"""NSR:Quay:100005""",15,"""1""",,,15.211521,59.278877,"""NSR:StopPlace:570""",16


In [9]:
alle_avganger = (
    alle_avganger
    .join(
        quays.select("id", "stopPlaceRef"), 
        left_on="stop_id", right_on="id",
        how="left"
    )
    .join(
        stoppesteder.rename({"parentRef_ref":"parent_station"}).select("id", "name", "parent_station"),
        left_on="stopPlaceRef", right_on="id"
    )
)

Vi ønsker å behandle bussveien i Nord-Jæren som ikke-buss. Det har vært vanskelig å oppdrive fine tabeller som oppgir hvilke stasjoner som er del av Bussveien. Jeg tror rutene 1,2,3 og 43

In [10]:
stopplaces_not_included = [
    "NSR:StopPlace:27927", "NSR:StopPlace:27035", "NSR:StopPlace:27047",
    "NSR:StopPlace:28523", "NSR:StopPlace:27039", "NSR:StopPlace:25931",
    "NSR:StopPlace:28127", "NSR:StopPlace:26716", "NSR:StopPlace:26719",
    "NSR:StopPlace:27195", "NSR:StopPlace:27813", "NSR:StopPlace:27181",
    "NSR:StopPlace:27383", "NSR:StopPlace:26657", "NSR:StopPlace:27973",
    "NSR:StopPlace:27977", "NSR:StopPlace:28144", "NSR:StopPlace:26773",
    "NSR:StopPlace:26133", "NSR:StopPlace:26137", "NSR:StopPlace:27440",
    "NSR:StopPlace:28813", "NSR:StopPlace:28312", "NSR:StopPlace:27788",
    "NSR:StopPlace:28803", "NSR:StopPlace:28804", "NSR:StopPlace:27480",
    "NSR:StopPlace:28062", "NSR:StopPlace:28065", "NSR:StopPlace:26525"
    ]


busslinjen_stopplaces = (
    alle_avganger
    .filter(
        pl.col("agency_name")=="Kolumbus",
        pl.col("route_short_name").is_in(["1", "2", "3", "42"])
    )
    .select("stop_id", "stopPlaceRef")
    .unique()
    .join(stoppesteder.select("id", cs.starts_with("location")), left_on="stopPlaceRef", right_on="id", how="left")
    .filter(
        ((pl.col("location_latitude")>58.969188398795595) & (pl.col("location_longitude")>5.702509477827003)).not_()
    )
    .filter(pl.col("stopPlaceRef").is_in(stopplaces_not_included).not_())
    .drop(cs.starts_with("location"))
    .collect()
)

busslinjen_stopplaces.head()
# from lonboard import viz
# import geopandas as gpd
# viz(gpd.GeoDataFrame(busslinjen_stopplaces, geometry=gpd.points_from_xy(busslinjen_stopplaces["location_longitude"], busslinjen_stopplaces["location_latitude"]), crs=4326))

stop_id,stopPlaceRef
str,str
"""NSR:Quay:48781""","""NSR:StopPlace:28374"""
"""NSR:Quay:44809""","""NSR:StopPlace:25838"""
"""NSR:Quay:45600""","""NSR:StopPlace:26353"""
"""NSR:Quay:109015""","""NSR:StopPlace:63166"""
"""NSR:Quay:47270""","""NSR:StopPlace:27423"""


In [11]:
alle_avganger = (
    alle_avganger
    .with_columns(
        pl.when(pl.col("stop_id").is_in(busslinjen_stopplaces.get_column("stop_id").implode())).then(pl.lit("Bussveien")).otherwise("route_type").alias("route_type")
    )
)

In [12]:
helsfyr = quays.filter(
    pl.col("stopPlaceRef").is_in(
        stoppesteder.filter(pl.col("name").str.contains("Helsfyr")).filter(pl.col("id")!=pl.col("parentRef_ref")).collect().get_column("id").implode()
    )
).collect()

import geopandas as gpd
from lonboard import viz

print(helsfyr.shape)

viz(gpd.GeoDataFrame(helsfyr.to_pandas(), geometry=gpd.points_from_xy(helsfyr.to_pandas()["location_longitude"], helsfyr.to_pandas()["location_latitude"]), crs=4326))

(19, 9)


<lonboard._map.Map object at 0x0000028828D850D0>

### Se på ulike definisjoner

#### Høyfrekvente ruter

Bussrute hvert 10. minutt. Andre ruter hvert 15. 

In [13]:
pl.read_parquet("stasjoner_med_frekvens_10_15_7_20.parquet").shape

(824, 5)

##### 7-20

In [14]:
start_time = pl.datetime(2026, 1, 15, 7, time_zone="Europe/Oslo")
end_time = pl.datetime(2026, 1, 15, 20, time_zone="Europe/Oslo")

timedelta_bus = pl.duration(minutes=10)
timedelta_not_bus = pl.duration(minutes=15)

frekvente_stoppesteder_7_20 = (
    alle_avganger
    .filter(pl.col("route_type")!="Ferry")
    .filter(pl.col("stop_sequence")!=pl.col("stop_sequence").max().over("trip_id"))
    .sort("departure_time")
    .with_columns(
        waiting_time = pl.col("departure_time").shift(-1).over("stop_id") - pl.col("departure_time")
    )
    .group_by(
        "stop_id", "parent_station", "stopPlaceRef", "route_type", "name"
    )
    .agg(
        max_waiting_time_bus = pl.col("waiting_time").filter(
                pl.col("departure_time")
                .is_between(
                    start_time,
                    end_time-timedelta_bus
                )
        ).max(),
        max_waiting_time_not_bus = pl.col("waiting_time").filter(
                pl.col("departure_time")
                .is_between(
                    start_time,
                    end_time-timedelta_not_bus
                )
        ).max(),
        first_departure = pl.col("departure_time").filter(pl.col("departure_time")>=start_time).min(),
        last_departure = pl.col("departure_time").filter(pl.col("departure_time")<=end_time).max()
    )
    .with_columns(
        pl.when(pl.col("route_type")=="Bus").then("max_waiting_time_bus").otherwise("max_waiting_time_not_bus").alias("max_waiting_time")
    )
    .drop(cs.contains("bus"))
    .filter(
        (
            (pl.col("route_type")=="Bus") & 
            (pl.col("max_waiting_time")<=timedelta_bus) & 
            (pl.col("first_departure")<=start_time + timedelta_bus) & 
            (pl.col("last_departure")>=end_time - timedelta_bus)
        ) |
        (
            (pl.col("route_type")!="Bus") & 
            (pl.col("max_waiting_time")<=timedelta_not_bus) & 
            (pl.col("first_departure")<=start_time + timedelta_not_bus) & 
            (pl.col("last_departure")>=end_time - timedelta_not_bus)
        )
    )
    .group_by("stopPlaceRef", "route_type", "name")
    .agg(pl.struct("stop_id", "first_departure", "last_departure", "max_waiting_time"))
    .join(stoppesteder.select("id", cs.contains("location")), left_on="stopPlaceRef", right_on="id", how="left")
    .collect()
)

frekvente_stoppesteder_7_20

stopPlaceRef,route_type,name,stop_id,location_longitude,location_latitude
str,str,str,list[struct[4]],f64,f64
"""NSR:StopPlace:6593""","""Bus""","""Simensbrekka""","[{""NSR:Quay:12180"",2026-01-15 07:04:00 CET,2026-01-15 19:57:00 CET,10m}]",10.782099,59.900787
"""NSR:StopPlace:4336""","""Tram""","""Lille Frogner allé""","[{""NSR:Quay:7828"",2026-01-15 07:05:00 CET,2026-01-15 19:56:00 CET,7m}, {""NSR:Quay:7827"",2026-01-15 07:01:00 CET,2026-01-15 19:55:00 CET,6m}]",10.712244,59.917896
"""NSR:StopPlace:41715""","""Bus""","""Leangen""","[{""NSR:Quay:71363"",2026-01-15 07:04:00 CET,2026-01-15 19:59:00 CET,7m}, {""NSR:Quay:71365"",2026-01-15 07:02:00 CET,2026-01-15 19:59:00 CET,9m}]",10.47662,63.43167
"""NSR:StopPlace:6240""","""Bus""","""Alnabruveien""","[{""NSR:Quay:11453"",2026-01-15 07:06:00 CET,2026-01-15 19:56:00 CET,10m}]",10.83525,59.924013
"""NSR:StopPlace:29772""","""Bus""","""Fageråsen""","[{""NSR:Quay:51186"",2026-01-15 07:06:00 CET,2026-01-15 19:56:00 CET,10m}]",5.359839,60.359595
…,…,…,…,…,…
"""NSR:StopPlace:32370""","""Bus""","""Sælen skole""","[{""NSR:Quay:55843"",2026-01-15 07:00:00 CET,2026-01-15 20:00:00 CET,10m}]",5.282229,60.346256
"""NSR:StopPlace:313""","""Train""","""Høybråten stasjon""","[{""NSR:Quay:515"",2026-01-15 07:08:00 CET,2026-01-15 19:53:00 CET,15m}, {""NSR:Quay:516"",2026-01-15 07:12:00 CET,2026-01-15 19:57:00 CET,15m}]",10.927655,59.948092
"""NSR:StopPlace:6218""","""Bus""","""Skibakken""","[{""NSR:Quay:11414"",2026-01-15 07:00:00 CET,2026-01-15 20:00:00 CET,10m}]",10.764164,59.959281
"""NSR:StopPlace:28564""","""Bussveien""","""Gausel sentrum""","[{""NSR:Quay:49078"",2026-01-15 07:02:00 CET,2026-01-15 19:58:00 CET,11m}, {""NSR:Quay:49076"",2026-01-15 07:02:00 CET,2026-01-15 19:58:00 CET,5m}]",5.728504,58.908708


### 7-18

In [15]:
start_time = pl.datetime(2026, 1, 15, 7, time_zone="Europe/Oslo")
end_time = pl.datetime(2026, 1, 15, 18, time_zone="Europe/Oslo")
timedelta_bus = pl.duration(minutes=10)
timedelta_not_bus = pl.duration(minutes=15)

frekvente_stoppesteder_7_18 = (
    alle_avganger
    .filter(pl.col("route_type")!="Ferry")
    .filter(pl.col("stop_sequence")!=pl.col("stop_sequence").max().over("trip_id"))
    .sort("departure_time")
    .with_columns(
        waiting_time = pl.col("departure_time").shift(-1).over("stop_id") - pl.col("departure_time")
    )
    .group_by(
        "stop_id", "parent_station", "stopPlaceRef", "route_type", "name"
    )
    .agg(
        max_waiting_time_bus = pl.col("waiting_time").filter(
                pl.col("departure_time")
                .is_between(
                    start_time,
                    end_time-timedelta_bus
                )
        ).max(),
        max_waiting_time_not_bus = pl.col("waiting_time").filter(
                pl.col("departure_time")
                .is_between(
                    start_time,
                    end_time-timedelta_not_bus
                )
        ).max(),
        first_departure = pl.col("departure_time").filter(pl.col("departure_time")>=start_time).min(),
        last_departure = pl.col("departure_time").filter(pl.col("departure_time")<=end_time).max()
    )
    .with_columns(
        pl.when(pl.col("route_type")=="Bus").then("max_waiting_time_bus").otherwise("max_waiting_time_not_bus").alias("max_waiting_time")
    )
    .drop(cs.contains("bus"))
    .filter(
        (
            (pl.col("route_type")=="Bus") & 
            (pl.col("max_waiting_time")<=timedelta_bus) & 
            (pl.col("first_departure")<=start_time + timedelta_bus) & 
            (pl.col("last_departure")>=end_time - timedelta_bus)
        ) |
        (
            (pl.col("route_type")!="Bus") & 
            (pl.col("max_waiting_time")<=timedelta_not_bus) & 
            (pl.col("first_departure")<=start_time + timedelta_not_bus) & 
            (pl.col("last_departure")>=end_time - timedelta_not_bus)
        )
    )
    .group_by("stopPlaceRef", "route_type", "name")
    .agg(pl.struct("stop_id", "first_departure", "last_departure", "max_waiting_time"))
    .join(stoppesteder.select("id", cs.contains("location")), left_on="stopPlaceRef", right_on="id", how="left")
    .collect()
)

frekvente_stoppesteder_7_18

stopPlaceRef,route_type,name,stop_id,location_longitude,location_latitude
str,str,str,list[struct[4]],f64,f64
"""NSR:StopPlace:6080""","""Bus""","""Etterstad sør""","[{""NSR:Quay:11168"",2026-01-15 07:03:00 CET,2026-01-15 17:56:00 CET,9m}]",10.801883,59.907619
"""NSR:StopPlace:6244""","""Bus""","""Smalvollveien""","[{""NSR:Quay:11458"",2026-01-15 07:06:00 CET,2026-01-15 17:56:00 CET,10m}]",10.842685,59.924485
"""NSR:StopPlace:31287""","""Bus""","""Årstadveien""","[{""NSR:Quay:53883"",2026-01-15 07:08:00 CET,2026-01-15 17:56:00 CET,10m}, {""NSR:Quay:53884"",2026-01-15 07:00:00 CET,2026-01-15 18:00:00 CET,5m}]",5.357676,60.383602
"""NSR:StopPlace:4293""","""Tram""","""Niels Juels gate""","[{""NSR:Quay:7753"",2026-01-15 07:02:00 CET,2026-01-15 17:56:00 CET,6m}, {""NSR:Quay:7754"",2026-01-15 07:04:00 CET,2026-01-15 17:55:00 CET,7m}]",10.715157,59.91634
"""NSR:StopPlace:19687""","""Bus""","""Rådhusplassen""","[{""NSR:Quay:111275"",2026-01-15 07:00:00 CET,2026-01-15 18:00:00 CET,9m}]",9.611109,59.2072
…,…,…,…,…,…
"""NSR:StopPlace:43777""","""Bus""","""Festningsgata""","[{""NSR:Quay:75156"",2026-01-15 07:10:00 CET,2026-01-15 17:57:00 CET,10m}]",10.416342,63.427882
"""NSR:StopPlace:32385""","""Bus""","""Hesjaholtet""","[{""NSR:Quay:55874"",2026-01-15 07:03:00 CET,2026-01-15 17:53:00 CET,10m}]",5.260523,60.344715
"""NSR:StopPlace:6007""","""Bus""","""Eftasåsen""","[{""NSR:Quay:11022"",2026-01-15 07:01:00 CET,2026-01-15 17:58:00 CET,8m}, {""NSR:Quay:11021"",2026-01-15 07:09:00 CET,2026-01-15 17:57:00 CET,9m}]",10.854183,59.89622
"""NSR:StopPlace:42282""","""Bus""","""Voll studentby""","[{""NSR:Quay:72401"",2026-01-15 07:01:00 CET,2026-01-15 18:00:00 CET,7m}, {""NSR:Quay:72402"",2026-01-15 07:01:00 CET,2026-01-15 17:57:00 CET,8m}]",10.44429,63.409392


In [19]:
frekvente_stoppesteder_7_18 = (
    frekvente_stoppesteder_7_18
    .with_columns(
        pl.when(pl.col("route_type")=="Bus").then(pl.lit(400)).otherwise(pl.lit(500)).alias("radius")
    )
    .with_columns(
        pl.when(pl.col("route_type")=="Busveien").then(pl.lit("Bus")).otherwise("route_type").alias("route_type")
    )
)

frekvente_stoppesteder_7_20 = (
    frekvente_stoppesteder_7_20
    .with_columns(
        pl.when(pl.col("route_type")=="Bus").then(pl.lit(400)).otherwise(pl.lit(500)).alias("radius")
    )
    .with_columns(
        pl.when(pl.col("route_type")=="Busveien").then(pl.lit("Bus")).otherwise("route_type").alias("route_type")
    )
)

frekvente_stoppesteder_7_18.select("stopPlaceRef", "route_type", "name", "location_longitude", "location_latitude", "radius").write_parquet("stasjoner_med_frekvens_10_15_7_18.parquet")
frekvente_stoppesteder_7_20.select("stopPlaceRef", "route_type", "name", "location_longitude", "location_latitude", "radius").write_parquet("stasjoner_med_frekvens_10_15_7_20.parquet")