In [1]:
import duckdb
import pandas as pd

# Connect to the database
connection = duckdb.connect("../data/gtfs_train.db", read_only=False)

In [2]:
# Delete stops table and create again and try to load stops data
qry = f"""
DROP TABLE IF EXISTS stops
"""
connection.execute(qry)
qry = f"""
CREATE TABLE stops (
    stop_id VARCHAR,
    stop_code VARCHAR,
    stop_name VARCHAR,
    stop_desc VARCHAR,
    stop_lat DOUBLE,
    stop_lon DOUBLE,
    zone_id VARCHAR,
    stop_url VARCHAR,
    location_type VARCHAR,
    parent_station VARCHAR,
    stop_timezone VARCHAR,
)
"""

connection.execute(qry)
# Load stops data
file = "stops.txt"
copy_query = f"COPY {file.split('.')[0]} FROM '../data/gtfs/gtfs_files/{file}' (delimiter ',', header, quote '\"', escape '\"', null_padding true, ignore_errors)"
connection.execute(copy_query)

# Query the stops table
qry = f"""
SELECT
    *
FROM stops
"""
stops = connection.execute(qry).fetchdf()
stops.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone
0,132,,Bahn-2000-Strecke,,47.196374,7.68936,0,,,0,
1,133,,Centovalli,,46.154371,8.603653,0,,,0,
2,134,,Furka-Basistunnel,,46.530761,8.435917,0,,,0,
3,135,,Lötschberg-Basistunnel,,46.356888,7.773846,0,,,0,
4,136,,Lötschberg-Bergstrecke,,46.433756,7.717215,0,,,0,


In [17]:
# Delete trips table and create again and try to load trips data
qry = f"""
DROP TABLE IF EXISTS trips
"""
connection.execute(qry)
qry = f"""
CREATE TABLE IF NOT EXISTS trips (
    route_id VARCHAR NOT NULL,
    service_id VARCHAR NOT NULL,
    trip_id VARCHAR NOT NULL,-- PRIMARY KEY,
    trip_headsign VARCHAR,
    trip_short_name VARCHAR,
    direction_id INTEGER,
    block_id VARCHAR,
);
"""

connection.execute(qry)
# Load stops data
file = "trips.txt"
copy_query = f"COPY {file.split('.')[0]} FROM '../data/gtfs/gtfs_files/{file}' (delimiter ',', header, quote '\"', escape '\"', null_padding true, ignore_errors)"
connection.execute(copy_query)

# Query the stops table
qry = f"""
SELECT
    *
FROM trips
"""
trips = connection.execute(qry).fetchdf()
trips.head()

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id
0,000003.000011.101:3,66612,0:1,Zürich HB,3,,
1,000003.000011.101:3,121098,0:2,Zürich HB,3,,
2,000003.000011.101:3,42990,0:3,Zürich HB,3,,
3,000003.000011.101:3,2,0:4,Zürich HB,3,,
4,000003.000011.101:3,90659,0:5,Zürich HB,3,,


In [3]:
# Query the stops table and join with stop_times
qry = f"""
SELECT
    *
FROM stops

"""
stops = connection.execute(qry).fetchdf()
stops.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone
0,132,,Bahn-2000-Strecke,,47.196374,7.68936,0,,,0,
1,133,,Centovalli,,46.154371,8.603653,0,,,0,
2,134,,Furka-Basistunnel,,46.530761,8.435917,0,,,0,
3,135,,Lötschberg-Basistunnel,,46.356888,7.773846,0,,,0,
4,136,,Lötschberg-Bergstrecke,,46.433756,7.717215,0,,,0,


In [16]:
# Query the trips table
qry = f"""
SELECT
    *
FROM trips

"""
trips = connection.execute(qry).fetchdf()
trips

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id
0,000003.000011.101:3,066612,0:1,Zürich HB,3,,
1,000003.000011.101:3,121098,0:2,Zürich HB,3,,
2,000003.000011.101:3,042990,0:3,Zürich HB,3,,
3,000003.000011.101:3,000002,0:4,Zürich HB,3,,
4,000003.000011.101:3,090659,0:5,Zürich HB,3,,
...,...,...,...,...,...,...,...
197714,025081.327000.104:11,068367,113303:1,Seregno,11,,
197715,025085.327000.101:11,053104,113304:1,Milano Porta Garibaldi,11,,
197716,025085.327000.102:11,230102,113305:1,Milano Porta Garibaldi,11,,
197717,025085.327000.104:11,068367,113306:1,Seregno,11,,


In [5]:
# Get 10 routes where "Zürich HB" and "Bern" are stops
qry = f"""
SELECT
    *
FROM trips
-- JOIN routes
INNER JOIN routes
ON trips.route_id = routes.route_id
-- JOIN agency
INNER JOIN agency
ON routes.agency_id = agency.agency_id
-- JOIN calendar
INNER JOIN calendar
ON trips.service_id = calendar.service_id
-- JOIN stop_times
INNER JOIN stop_times
ON trips.trip_id = stop_times.trip_id
-- JOIN stops
INNER JOIN stops
ON stop_times.stop_id = stops.stop_id
ORDER BY stops.stop_name DESC
"""
trips = connection.execute(qry).fetchdf()
trips


Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,route_id_1,agency_id,route_short_name,...,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone
0,012796.000078.101:10,000000,66212:1,Zürich HB SZU,10,,,012796.000078.101:10,000078,S10,...,TRLI,Zürich Triemli,,47.364990,8.495380,480,,,0,8503054
1,012916.000078.101:10,000000,66358:1,Zürich HB SZU,10,,,012916.000078.101:10,000078,S10,...,TRLI,Zürich Triemli,,47.364990,8.495380,480,,,0,8503054
2,012776.000078.101:10,000000,66186:1,Zürich HB SZU,10,,,012776.000078.101:10,000078,S10,...,TRLI,Zürich Triemli,,47.364990,8.495380,480,,,0,8503054
3,012940.000078.101:10,000000,66382:1,Zürich HB SZU,10,,,012940.000078.101:10,000078,S10,...,TRLI,Zürich Triemli,,47.364990,8.495380,480,,,0,8503054
4,012808.000078.101:10,000000,66224:1,Zürich HB SZU,10,,,012808.000078.101:10,000078,S10,...,TRLI,Zürich Triemli,,47.364990,8.495380,480,,,0,8503054
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6126,012603.000078.101:4,000000,66095:1,Langnau-Gattikon,4,,,012603.000078.101:4,000078,S4,...,ADW,Adliswil,,47.312274,8.524254,451,,,0,8503096
6127,012631.000078.101:4,000000,66113:1,Langnau-Gattikon,4,,,012631.000078.101:4,000078,S4,...,ADW,Adliswil,,47.312274,8.524254,451,,,0,8503096
6128,012503.000078.101:4,000000,66010:1,Langnau-Gattikon,4,,,012503.000078.101:4,000078,S4,...,ADW,Adliswil,,47.312274,8.524254,451,,,0,8503096
6129,012498.000078.101:4,000000,66006:1,Zürich HB SZU,4,,,012498.000078.101:4,000078,S4,...,ADW,Adliswil,,47.312274,8.524254,451,,,0,8503096


In [6]:
# Get "Zürich HB" and "Bern" stops
qry = f"""
SELECT
    *
FROM stops
WHERE stop_name = 'Zürich HB' OR stop_name = 'Bern'
"""
stops = connection.execute(qry).fetchdf()
stops


Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone
0,8503000,ZUE,Zürich HB,,47.378177,8.540212,407,,,1,
1,8507000,BN,Bern,,46.948832,7.439131,540,,,1,
2,8503000:10,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000.0
3,8503000:13,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000.0
4,8503000:7,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000.0
5,8503000:11,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000.0
6,8503000:9,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000.0
7,8503000:6,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000.0
8,8503000:16,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000.0
9,8503000:12,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000.0


In [7]:
# Join the trip to the stops
# Get "Zürich HB" and "Bern" stops
qry = f"""
SELECT
    *
FROM stops
INNER JOIN stop_times
ON stops.stop_id = stop_times.stop_id
INNER JOIN trips
ON stop_times.trip_id = trips.trip_id
WHERE stops.stop_name = 'Zürich HB'-- OR stops.stop_name = 'Bern'
"""
trips = connection.execute(qry).fetchdf()
trips

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,...,stop_sequence,pickup_type,drop_off_type,route_id,service_id,trip_id_1,trip_headsign,trip_short_name,direction_id,block_id
0,8503000:11,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,3,,0,000003.000011.101:3,066612,0:1,Zürich HB,3,,
1,8503000:6,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,3,,0,000003.000011.101:3,121098,0:2,Zürich HB,3,,
2,8503000:10,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,3,,0,000003.000011.101:3,042990,0:3,Zürich HB,3,,
3,8503000:10,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,3,,0,000003.000011.101:3,000002,0:4,Zürich HB,3,,
4,8503000:13,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,3,,0,000003.000011.101:3,090659,0:5,Zürich HB,3,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37930,8503000:18,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,1,,0,001009.000011.101:1009,009051,4249:7,Zürich HB,1009,,
37931,8503000:16,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,1,,0,001009.000011.101:1009,085692,4249:8,Zürich HB,1009,,
37932,8503000:17,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,1,,0,001009.000011.101:1009,004853,4249:9,Zürich HB,1009,,
37933,8503000:18,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,1,,0,001011.000011.101:1011,340986,4250:1,Zürich HB,1011,,


In [8]:
qry = f"""
SELECT
    *
FROM trips
INNER JOIN routes
ON trips.route_id = routes.route_id
WHERE trip_id IN (
    SELECT
        trip_id
    FROM stop_times
    WHERE stop_id IN (
        SELECT
            stop_id
        FROM stops
        WHERE stop_name = 'Zürich HB' OR stop_name = 'Bern'
    )
)
"""
trips = connection.execute(qry).fetchdf()
trips

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,route_id_1,agency_id,route_short_name,route_long_name,route_desc,route_type
0,000003.000011.101:3,141763,0:8,Zürich HB,3,,,000003.000011.101:3,000011,ICE,ICE 3,,2
1,000003.000011.102:3,008406,1:2,Zürich HB,3,,,000003.000011.102:3,000011,ICE,ICE 3,,2
2,000004.000011.101:4,000608,2:18,Basel Bad Bf,4,,,000004.000011.101:4,000011,ICE,ICE 4,,2
3,000004.000011.102:4,042936,3:5,Basel Bad Bf,4,,,000004.000011.102:4,000011,ICE,ICE 4,,2
4,000004.000011.104:4,200334,4:7,Basel Bad Bf,4,,,000004.000011.104:4,000011,ICE,ICE 4,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
58802,009228.87_LEX.002:612B,506983,92786:1,Paris Gare de Lyon,612B,,,009228.87_LEX.002:612B,87_LEX,TGV,612B,,2
58803,009228.87_LEX.003:612B,013566,92787:1,Paris Gare de Lyon,612B,,,009228.87_LEX.003:612B,87_LEX,TGV,612B,,2
58804,009228.87_LEX.006:612B,050544,92789:1,Paris Gare de Lyon,612B,,,009228.87_LEX.006:612B,87_LEX,TGV,612B,,2
58805,009228.87_LEX.007:612B,039031,92790:1,Paris Gare de Lyon,612B,,,009228.87_LEX.007:612B,87_LEX,TGV,612B,,2


In [9]:
# get route and trip iwth route_long_name ICE 8
qry = f"""
SELECT
    *
FROM trips
INNER JOIN routes
ON trips.route_id = routes.route_id
INNER JOIN stop_times
ON trips.trip_id = stop_times.trip_id
INNER JOIN stops
ON stop_times.stop_id = stops.stop_id
WHERE route_long_name = 'IC 8' or route_long_name = 'IC 6'
ORDER BY stop_times.stop_sequence
"""
trips = connection.execute(qry).fetchdf()
trips




Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,route_id_1,agency_id,route_short_name,...,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone
0,030810.000011.104:8,197862,38393:1,Brig,8,,,030810.000011.104:8,000011,IC8,...,BN,Bern,,46.948832,7.439131,540,,,0,8507000
1,030815.000011.104:8,050839,38414:1,Zürich HB,8,,,030815.000011.104:8,000011,IC8,...,BR,Brig,,46.319423,7.988095,677,,,0,8501609
2,030828.000011.106:8,043001,38466:1,Brig,8,,,030828.000011.106:8,000011,IC8,...,BN,Bern,,46.948832,7.439131,540,,,0,8507000
3,030837.000011.106:8,043126,38511:1,Weinfelden,8,,,030837.000011.106:8,000011,IC8,...,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000
4,030837.000011.106:8,043127,38511:2,Weinfelden,8,,,030837.000011.106:8,000011,IC8,...,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,8503000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27188,000807.000011.101:8,003148,3398:15,Romanshorn,8,,,000807.000011.101:8,000011,IC8,...,RH,Romanshorn,,47.565523,9.379371,398,,,0,8506121
27189,000807.000011.103:8,367211,3400:1,Romanshorn,8,,,000807.000011.103:8,000011,IC8,...,RH,Romanshorn,,47.565523,9.379371,398,,,0,8506121
27190,000807.000011.103:8,044409,3400:2,Romanshorn,8,,,000807.000011.103:8,000011,IC8,...,RH,Romanshorn,,47.565523,9.379371,398,,,0,8506121
27191,000807.000011.101:8,003145,3398:13,Romanshorn,8,,,000807.000011.101:8,000011,IC8,...,RH,Romanshorn,,47.565523,9.379371,398,,,0,8506121


In [10]:
# Get all stops for the route ICE 8
qry = f"""
SELECT
    *
FROM stops
INNER JOIN stop_times
ON stops.stop_id = stop_times.stop_id
INNER JOIN trips
ON stop_times.trip_id = trips.trip_id
INNER JOIN routes
ON trips.route_id = routes.route_id
WHERE route_long_name = 'IC 8' AND trip_headsign = 'Brig' AND trips.trip_id='15643:1'
ORDER BY stop_sequence
"""
stops_IC8 = connection.execute(qry).fetchdf()
stops_IC8

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,...,trip_headsign,trip_short_name,direction_id,block_id,route_id_1,agency_id,route_short_name,route_long_name,route_desc,route_type
0,8503000:10,ZUE,Zürich HB,,47.378177,8.540212,407,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2
1,8502113:4,AA,Aarau,,47.39136,8.051274,383,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2
2,8500218:8,OL,Olten,,47.351935,7.9077,396,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2
3,0000132,,Bahn-2000-Strecke,,47.196374,7.68936,0,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2
4,8507000:7,BN,Bern,,46.948832,7.439131,540,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2
5,8507100:2,TH,Thun,,46.754853,7.629606,559,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2
6,8507483:3,SP,Spiez,,46.686396,7.680103,627,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2
7,0000135,,Lötschberg-Basistunnel,,46.356888,7.773846,0,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2
8,8501605:7,VI,Visp,,46.294029,7.881465,650,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2
9,8501609:3,BR,Brig,,46.319423,7.988095,677,,,0,...,Brig,8,,,010806.000011.105:8,11,IC8,IC 8,,2


In [11]:
# Get all stops for the route ICE 6
qry = f"""
SELECT
    *
FROM stops
INNER JOIN stop_times
ON stops.stop_id = stop_times.stop_id
INNER JOIN trips
ON stop_times.trip_id = trips.trip_id
INNER JOIN routes
ON trips.route_id = routes.route_id
WHERE route_long_name = 'IC 6' AND trip_headsign = 'Brig' AND trips.trip_id='4008:1'
ORDER BY stop_sequence
"""
stops_IC6 = connection.execute(qry).fetchdf()
stops_IC6

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,...,trip_headsign,trip_short_name,direction_id,block_id,route_id_1,agency_id,route_short_name,route_long_name,route_desc,route_type
0,8500010:9,BS,Basel SBB,,47.547412,7.589563,276,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2
1,8500023:2,LST,Liestal,,47.484461,7.731367,327,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2
2,8500218:11,OL,Olten,,47.351935,7.9077,396,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2
3,0000132,,Bahn-2000-Strecke,,47.196374,7.68936,0,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2
4,8507000:7,BN,Bern,,46.948832,7.439131,540,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2
5,8507100:2,TH,Thun,,46.754853,7.629606,559,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2
6,8507483:3,SP,Spiez,,46.686396,7.680103,627,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2
7,0000135,,Lötschberg-Basistunnel,,46.356888,7.773846,0,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2
8,8501605:7,VI,Visp,,46.294029,7.881465,650,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2
9,8501609:3,BR,Brig,,46.319423,7.988095,677,,,0,...,Brig,6,,,000961.000011.107:6,11,IC6,IC 6,,2


In [12]:
# Function that converts timestamp to minutes
def convert_string_timestamp_to_minutes(timestamp):
    if timestamp is None:
        return None
    print(timestamp)
    time = timestamp.split(":")
    return int(time[0])*60 + int(time[1])

In [13]:
# Convert the structure into the following:
# "stop_code": [(departure_time, "stop_code", arrival_time,"route_short_name")]

def create_graph_list_structure(s):
    stops = s.copy()
    # Remove rows that have a stop code of None
    stops = stops[stops["stop_code"].notnull()]
    stops = stops.reset_index(drop=True)
    stops["stop_sequence"] = stops.index # to know the order of the stops
    # loop through the stops and create the structure
    stops_dict = {}
    for i in range(len(stops)):
        stop = stops.iloc[i]
        # get the next stop -> meaning (stop_sequence of current stop + 1)
        next_stop = stops[stops["stop_sequence"] == stop["stop_sequence"] + 1]
        if len(next_stop) == 0:
            continue
        stop_name = stop["stop_name"]
        next_stop_name = next_stop["stop_name"].values[0]
        if next_stop_name not in stops_dict:
            stops_dict[next_stop_name] = []
        departure_time = convert_string_timestamp_to_minutes(stop["departure_time"])
        # use the arrival time of the next stop
        arrival_time = convert_string_timestamp_to_minutes(next_stop["arrival_time"].values[0])
        route_short_name = stop["route_short_name"]
        if stop_name not in stops_dict:
            stops_dict[stop_name] = [(departure_time, next_stop_name, arrival_time, route_short_name)]
        else:
            stops_dict[stop_name].append((departure_time, next_stop_name, arrival_time, route_short_name))

    # delete entry with None stop_code
    if None in stops_dict:
        del stops_dict[None]
    return stops_dict


In [14]:

# stops_IC8
stops_IC8_dict = create_graph_list_structure(stops_IC8)
stops_IC6_dict = create_graph_list_structure(stops_IC6)

# combine the two dictionaries
# loop through the stops_IC6_dict and add to stops_IC8_dict
stops_all_dict = {}
for key, value in stops_IC6_dict.items():
    if key in stops_all_dict:
        stops_all_dict[key] += value
    else:
        stops_all_dict[key] = value

for key, value in stops_IC8_dict.items():
    if key in stops_all_dict:
        stops_all_dict[key] += value
    else:
        stops_all_dict[key] = value


stops_all_dict


06:44:00
07:16:00
07:18:00
07:27:00
07:31:00
07:58:00
08:07:00
08:25:00
08:26:00
08:36:00
08:36:00
09:02:00
09:03:00
09:11:00
07:55:00
08:06:00
08:07:00
08:25:00
08:29:00
08:56:00
09:07:00
09:25:00
09:26:00
09:36:00
09:36:00
10:02:00
10:03:00
10:11:00


{'Liestal': [(487, 'Olten', 505, 'IC6')],
 'Basel SBB': [(475, 'Liestal', 486, 'IC6')],
 'Olten': [(509, 'Bern', 536, 'IC6'), (451, 'Bern', 478, 'IC8')],
 'Bern': [(547, 'Thun', 565, 'IC6'), (487, 'Thun', 505, 'IC8')],
 'Thun': [(566, 'Spiez', 576, 'IC6'), (506, 'Spiez', 516, 'IC8')],
 'Spiez': [(576, 'Visp', 602, 'IC6'), (516, 'Visp', 542, 'IC8')],
 'Visp': [(603, 'Brig', 611, 'IC6'), (543, 'Brig', 551, 'IC8')],
 'Brig': [],
 'Aarau': [(438, 'Olten', 447, 'IC8')],
 'Zürich HB': [(404, 'Aarau', 436, 'IC8')]}

In [15]:
# combine stops_IC8 and stops_IC6
stops_IC8_IC6 = pd.concat([stops_IC8, stops_IC6])
# remove rows with stop_code = None
stops_IC8_IC6 = stops_IC8_IC6[stops_IC8_IC6["stop_code"].notnull()]
# create list of stops with stop_name, stop_lat, stop_lon
stops_IC8_IC6 = stops_IC8_IC6[["stop_name", "stop_lat", "stop_lon"]].drop_duplicates()
stops_IC8_IC6 = stops_IC8_IC6.reset_index(drop=True)
stops_IC8_IC6_records = stops_IC8_IC6.to_dict(orient="records")
stops_IC8_IC6_records

[{'stop_name': 'Zürich HB', 'stop_lat': 47.378177, 'stop_lon': 8.540212},
 {'stop_name': 'Aarau', 'stop_lat': 47.39136, 'stop_lon': 8.051274},
 {'stop_name': 'Olten', 'stop_lat': 47.351935, 'stop_lon': 7.9077},
 {'stop_name': 'Bern', 'stop_lat': 46.948832, 'stop_lon': 7.439131},
 {'stop_name': 'Thun', 'stop_lat': 46.754853, 'stop_lon': 7.629606},
 {'stop_name': 'Spiez', 'stop_lat': 46.686396, 'stop_lon': 7.680103},
 {'stop_name': 'Visp', 'stop_lat': 46.294029, 'stop_lon': 7.881465},
 {'stop_name': 'Brig', 'stop_lat': 46.319423, 'stop_lon': 7.988095},
 {'stop_name': 'Basel SBB', 'stop_lat': 47.547412, 'stop_lon': 7.589563},
 {'stop_name': 'Liestal', 'stop_lat': 47.484461, 'stop_lon': 7.731367}]