In [1]:
import ast
import glob
import os

import pandas as pd
import numpy as np

import geopandas as gpd
import shapely.geometry as geom
from shapely.geometry import Point
import random

import folium
from folium import plugins
from tqdm import tqdm

from constants import *

In [2]:
def load_timetables_for_line(date: str, line_number: int, \
    timetables_folder: str) -> pd.DataFrame:
    """
    Make a dataframe of timetables for a specific tram line and a specific date

    Args:
        date (str): date in  a  'dd_mm_yyyy' format
        line_number (int): tram line number
        timetables_folder (str): folder with csv files containing tram 
        timetables

    Returns:
        pd.DataFrame: a dataframe of timetables for a selected tram line
        and a selected date
    """
    # change date to yyyy-dd-mm format
    DATE_F = '-'.join(date.split('_')[::-1])

    # construct one version of file name (with 2-digit month and day) 
    # e.g. 2023-01-13
    check_name = f'{timetables_folder}rozklady_{DATE_F}.csv'
    
    # if os.path.isfile(check_name):

    # read a csv file containing timetables
    df_t = pd.read_csv(check_name)


    # df_t = pd.read_pickle(f'{TIMETABLES_FOLDER}rozklady_{DATE_F}.pkl', compression='zip')
    # restrict dataframe to a given line number and discard empty rows
    df_t['linie'] = df_t['linie'].apply(ast.literal_eval)
    df_t['linie'] = df_t['linie'].apply(lambda x: x.get(str(line_number)))


    df_t['brygada'] = df_t['brygada'].apply(ast.literal_eval)
    df_t['brygada'] = df_t['brygada'].apply(lambda x: x.get(str(line_number)))
    
    df_t['trasa'] = df_t['trasa'].apply(ast.literal_eval)
    df_t['trasa'] = df_t['trasa'].apply(lambda x: x.get(str(line_number)))
    
    df_t_sel = df_t[(~df_t['linie'].isnull()) & (df_t['linie'] != ())]
    df_t_sel = df_t[(~df_t['brygada'].isnull()) & (df_t['brygada'] != ())]

    # delete unwanted (autogenerated) column (specific for csv files)
    if 'Unnamed: 0' in df_t_sel:
        del df_t_sel['Unnamed: 0']
    
    df_t_sel = df_t_sel.rename(columns = {'linie': 'czas'})

    return df_t_sel

In [3]:
df_timetables_33 = load_timetables_for_line(DATE, TRAM_NUMBER, TIMETABLES_FOLDER)

##### DATE[`str`] + TRAM_NUMBER[`str`] + [📂TIMETABLES_FOLDER] -> `load_timetables_for_line` ->⏱df_timetables_33 #####

In [4]:
df_timetables_33

Unnamed: 0,index,zespol,slupek,nazwa_zespolu,id_ulicy,szer_geo,dlug_geo,kierunek,obowiazuje_od,czas,typ,brygada,trasa
193,3285,3118,1,Kielecka,1903,52.206813,21.003722,Rakowiecka-Sanktuarium,2022-09-08 00:00:00.0,"(08:38, 17:38, 18:21, 19:47, 19:59, 20:11, 22:...",T,"(031, 2, 10, 6, 5, 7, 12, 16, 3, 020)","(TZ-PIA3, TZ-PIA3, TZ-PIA3, TZ-MML3, TZ-MML3, ..."
194,3286,3118,2,Kielecka,1903,52.206836,21.004010,Metro Pole Mokotowskie,2022-09-08 00:00:00.0,"(04:17, 04:27, 04:44, 05:03, 05:13, 05:23, 05:...",T,"(5, 6, 10, 12, 14, 16, 17, 1, 2, 3, 5, 6, 7, 8...","(TD-3MML, TD-3MML, TD-3MML, TD-3MML, TP-MML, T..."
195,3287,3118,71,Kielecka,1903,52.206732,21.002608,______________________________,2022-09-08 00:00:00.0,"(05:13, 05:33, 05:43, 05:59, 06:05, 06:17, 06:...",T,"(14, 17, 1, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15...","(TP-MML, TP-MML, TP-MML, TP-MML, TP-MML, TP-MM..."
196,3551,3228,3,Metro Pole Mokotowskie,116,52.208369,21.007819,Kielecka,2022-09-08 00:00:00.0,"(04:55, 05:15, 05:35, 05:45, 05:55, 06:05, 06:...",T,"(14, 17, 1, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15...","(TD-4KIE, TD-4KIE, TD-4KIE, TP-KIE, TP-KIE, TD..."
197,3552,3228,4,Metro Pole Mokotowskie,116,52.208837,21.007694,Biblioteka Narodowa,2022-09-08 00:00:00.0,"(04:19, 04:29, 04:46, 05:05, 05:15, 05:25, 05:...",T,"(5, 6, 10, 12, 14, 16, 17, 1, 2, 3, 5, 6, 7, 8...","(TD-3MML, TD-3MML, TD-3MML, TD-3MML, TP-MML, T..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,6895,7093,4,GUS,116,52.216699,21.005211,Nowowiejska,2022-09-08 00:00:00.0,"(04:21, 04:31, 04:48, 05:07, 05:17, 05:27, 05:...",T,"(5, 6, 10, 12, 14, 16, 17, 1, 2, 3, 5, 6, 7, 8...","(TD-3MML, TD-3MML, TD-3MML, TD-3MML, TP-MML, T..."
565,6901,7094,3,Biblioteka Narodowa,116,52.213141,21.006219,Metro Pole Mokotowskie,2022-09-08 00:00:00.0,"(04:54, 05:14, 05:34, 05:44, 05:54, 06:04, 06:...",T,"(14, 17, 1, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15...","(TD-4KIE, TD-4KIE, TD-4KIE, TP-KIE, TP-KIE, TD..."
566,6902,7094,4,Biblioteka Narodowa,116,52.212914,21.006343,GUS,2022-09-08 00:00:00.0,"(04:20, 04:30, 04:47, 05:06, 05:16, 05:26, 05:...",T,"(5, 6, 10, 12, 14, 16, 17, 1, 2, 3, 5, 6, 7, 8...","(TD-3MML, TD-3MML, TD-3MML, TD-3MML, TP-MML, T..."
575,6930,R-03,0,ZET MOKOTÓW,2513,52.188500,20.999906,Wołoska,2022-09-08 00:00:00.0,"(04:08, 04:18, 04:35, 04:54, 05:14, 05:39, 05:...",T,"(5, 6, 10, 12, 16, 2, 3, 7, 031, 020)","(TD-3MML, TD-3MML, TD-3MML, TD-3MML, TD-3MML, ..."


Load gps positions for selected line

In [5]:
def load_gps_positions_for_line(date: str, line_number: int, \
    gps_positions_folder: str) -> pd.DataFrame:    
    """
    Make a dataframe of gps positions for a specific tram line given a folder
    with txt files from a selected dat

    Args:
        date (str): date in  a  'dd_mm_yyyy' format
        line_number (int): tram line number
        gps_positions_folder (str): folder with txt files containing tram 
        gps positions

    Returns:
        pd.DataFrame: a dataframe of gps positions for a selected tram line
        in a selected day
    """
    def to_df(file_name: str) -> pd.DataFrame:
        """
        Read files generated by the API script and make a dataframe from them

        Args:
            file_name (str): name of txt file

        Returns:
            pd.DataFrame: dataframe contructed from one txt file containing
            gps tram positions
        """
        with open(file_name, 'r') as file:
            lines = file.readlines()

        # read every 3rd line starting from index 1
        # 1st line contains date (irrelevant), 2nd line contains actual data, 
        # 3rd line in blank 
        len_entry = len(lines[1::3])

        pd_full = []
        # iterate over lines in txt file and concat the to dataframe
        for i in tqdm(range(len_entry)):
            pd_full.append(pd.json_normalize(ast.literal_eval(lines[1::3][i])['result']))
        return pd.concat(pd_full, ignore_index=False)


    # get all txt files
    path = f"{gps_positions_folder}trams_{date}/*.txt"

    # iterate over all txt files, make dataframes from them and concat them
    # together
    df_p = pd.concat([to_df(str(file)) for file in glob.glob(path)], \
        ignore_index = True)

    # drop duplicated values 
    df_p.drop_duplicates(inplace=True)

    # restrict the dataframe to row containing only the seleced line and
    # sort them with respect to 'Time'
    df_p_sel = df_p[df_p['Lines'] == str(line_number)].sort_values(by='Time')

    # delete obviosly incorrect values; sometimes neighboring entries have
    # different dates or differ for example by one hour, when the real time
    # difference should be no more the a couple of seconds
    #
    # this will keep only entries that have a time difference with their 
    # neighbour less then 5 minutes
    df_p_sel = df_p_sel[pd.to_datetime(df_p_sel['Time']).diff() < pd.Timedelta(5,'m')]

    return df_p_sel

##### DATE[`str`] + TRAM_NUMBER[`str`] + [📂GPS_POSITIONS_FOLDER] -> `load_gps_positions_for_line` ->🌍df_gps33 #####

In [6]:
df_gps_33 = load_gps_positions_for_line(DATE, TRAM_NUMBER, GPS_POSITIONS_FOLDER)

100%|██████████| 117/117 [00:00<00:00, 407.66it/s]
100%|██████████| 120/120 [00:00<00:00, 701.76it/s]
100%|██████████| 117/117 [00:01<00:00, 68.14it/s]
100%|██████████| 112/112 [00:01<00:00, 64.48it/s]
100%|██████████| 115/115 [00:01<00:00, 59.71it/s]
100%|██████████| 102/102 [00:01<00:00, 66.02it/s]
100%|██████████| 114/114 [00:01<00:00, 66.05it/s]
100%|██████████| 113/113 [00:01<00:00, 57.25it/s]
100%|██████████| 116/116 [00:02<00:00, 56.78it/s]
100%|██████████| 115/115 [00:02<00:00, 57.12it/s]
100%|██████████| 116/116 [00:02<00:00, 44.24it/s]
100%|██████████| 117/117 [00:02<00:00, 48.59it/s]
100%|██████████| 119/119 [00:00<00:00, 447.37it/s]
100%|██████████| 113/113 [00:01<00:00, 57.24it/s]
100%|██████████| 117/117 [00:01<00:00, 79.53it/s]
100%|██████████| 116/116 [00:00<00:00, 145.90it/s]
100%|██████████| 117/117 [00:00<00:00, 213.78it/s]
100%|██████████| 120/120 [00:00<00:00, 839.16it/s]
100%|██████████| 119/119 [00:00<00:00, 314.81it/s]
100%|██████████| 120/120 [00:00<00:00, 127.

In [7]:
df_gps_33

Unnamed: 0,Lines,Lon,VehicleNumber,Time,Lat,Brigade
448987,33,21.001247,3248,2023-01-13 04:16:11,52.198906,5
449012,33,21.000622,3248,2023-01-13 04:16:32,52.201023,5
449038,33,21.000175,3248,2023-01-13 04:17:09,52.202686,5
449064,33,20.999332,3248,2023-01-13 04:17:40,52.205692,5
449090,33,20.999640,3248,2023-01-13 04:18:01,52.206080,5
...,...,...,...,...,...,...
446757,33,21.001007,3286,2023-01-13 23:54:25,52.188835,020
446807,33,21.000990,3286,2023-01-13 23:55:00,52.188840,020
446907,33,21.000930,3286,2023-01-13 23:56:00,52.188810,020
446956,33,21.000938,3286,2023-01-13 23:56:15,52.188812,020


Select a specific brigade

🌍df_gps_33 -> `select_brigade` -> 🌍df_gps_33

In [8]:
df_gps_33 = df_gps_33[df_gps_33['Brigade'] == '5']
df_gps_33 

Unnamed: 0,Lines,Lon,VehicleNumber,Time,Lat,Brigade
448987,33,21.001247,3248,2023-01-13 04:16:11,52.198906,5
449012,33,21.000622,3248,2023-01-13 04:16:32,52.201023,5
449038,33,21.000175,3248,2023-01-13 04:17:09,52.202686,5
449064,33,20.999332,3248,2023-01-13 04:17:40,52.205692,5
449090,33,20.999640,3248,2023-01-13 04:18:01,52.206080,5
...,...,...,...,...,...,...
375069,33,21.001902,3248,2023-01-13 20:08:46,52.189840,5
375345,33,21.001970,3248,2023-01-13 20:09:23,52.189285,5
375620,33,21.001970,3248,2023-01-13 20:09:40,52.189285,5
375895,33,21.000930,3248,2023-01-13 20:10:22,52.188797,5


Load route coordinates for selected line

In [9]:
def load_routes_for_line(line_number: int, lines_geometry_file_name: str) -> pd.DataFrame:
    """
    Make a dataframe of routes for a specific tram line given a txt file with
    route geometry

    Args:
        line_number (int): tram line number
        lines_geometry_file_name (str): name of txt file with route geometry

    Returns:
        pd.DataFrame: a dataframe of a specific tram line route in GeoPandas
        format
    """

    # read geometry file and restrict it to only the selected tram line
    df_l = pd.read_csv(lines_geometry_file_name, sep = ';')
    df_l = df_l[df_l['route_id'] == line_number]

    # make shapely points
    geometry = [Point(xy) for xy in zip(df_l.XCoord, df_l.YCoord)]

    # txt files contain coordinates written in CRS 2178 system
    # a common well-known longitude-latitude coordinate system is called WGS84
    # which coresponds to EPSG = 4326
    #
    # the final dataframe will have GeoPandas points writted in 
    # lonfitude-latitude format
    df_l_sel = gpd.GeoDataFrame(df_l, geometry=geometry, crs=2178)
    df_l_sel['geometry'] = df_l_sel['geometry'].to_crs(epsg = 4326)

    return df_l_sel

##### line_number[`str`] + [📄lines_geometry_file_name]  -> `load_routes_for_line` -> 📈df_route33 #####

In [10]:
df_route_33 = load_routes_for_line(TRAM_NUMBER, ROUTES_GEOMETRY_FILE)

In [11]:
df_route_33

Unnamed: 0,XCoord,YCoord,route_id,trakcja,geometry
17442,7.498789e+06,5.791285e+06,33,tram,POINT (20.98227 52.25512)
17443,7.498792e+06,5.791273e+06,33,tram,POINT (20.98231 52.25501)
17444,7.498795e+06,5.791262e+06,33,tram,POINT (20.98235 52.25491)
17445,7.498796e+06,5.791258e+06,33,tram,POINT (20.98236 52.25488)
17446,7.498797e+06,5.791255e+06,33,tram,POINT (20.98238 52.25485)
...,...,...,...,...,...
18147,7.498786e+06,5.791296e+06,33,tram,POINT (20.98222 52.25522)
18148,7.498788e+06,5.791289e+06,33,tram,POINT (20.98225 52.25516)
18149,7.498789e+06,5.791285e+06,33,tram,POINT (20.98227 52.25512)
18150,7.498789e+06,5.791285e+06,33,tram,POINT (20.98227 52.25512)


Visualize route coordinates

In [12]:
def visualize_route(dataframe: pd.DataFrame, m: folium.Map = None)-> folium.Map:
    """
    Visualize points that represent a tram's route

    Args:
        dataframe (pd.DataFrame): dataframe created with
        load_routes_for_line function

    Returns:
        folium.Map: static map with points indicating tram's route
    """
    # geo coordinates take from GeoPandas geometry column
    coordinates = [[point.xy[1][0], point.xy[0][0]] for point in dataframe.geometry]

    # start with a map with the 250-th entry as the center point
    if m == None:
        m = folium.Map(location=coordinates[250], zoom_start=12)
    plugins.Geocoder().add_to(m)

    # add points as circle markers to map
    for i in coordinates:
        folium.CircleMarker([i[0], i[1]], radius = 5).add_to(m)
    folium.PolyLine(coordinates, color = 'blue').add_to(m)
    return m

##### 📈df_route_33 -> `visualize_route` #####

In [13]:
m = visualize_route(df_route_33)
m

Visualize gps positions

In [13]:
def animate_gps_positions(dataframe: pd.DataFrame, n: folium.Map = None) -> folium.Map:
    """
    Visualize points that represent a moving tram

    Args:
        dataframe (pd.DataFrame): dataframe created with
        load_gps_positions_for_line function restricted to a specific brigade

    Returns:
        folium.Map: dynamic map with lines indicating tram's movement
    """

    # folium needs dates written in a specific format (with 'T' as a date-time
    # separator)
    converted_dates = dataframe['Time'].apply(lambda x: 'T'.join(x.split())).tolist()

    # create folium map zooming in on a point located 1/4 from the beginning
    if n == None:
        n = folium.Map(location=dataframe[['Lat', 'Lon']].iloc[(dataframe.shape[0]//4)].tolist(), \
        zoom_start=13)

        # add geocoder (textbox to input geolocation names)
        plugins.Geocoder().add_to(n)

    # draw lines based on coordinates in dataframe
    # 'lon' is first, 'lat' is second
    lines = [
        {
            "coordinates": [[x,y] for x,y in zip(dataframe['Lon'], dataframe['Lat'])],
            "dates": converted_dates,
            "color": "red"}]

    # make 'features' to output them into TimestampedGeoJson
    features = [
        {
            "type": "Feature",
            "geometry": {
                "type": "LineString",
                "coordinates": line["coordinates"],
            },
            "properties": {
                "times": line["dates"],
                "style": {
                    "color": line["color"],
                    },
                "icon": "marker",

                # comment whole 'iconstyle' for a default marker
                "iconstyle": { 
                    "iconUrl": TRAM_ICON,
                    "iconSize": [40, 40],
                },
            },
        }
        for line in lines
    ]

    plugins.TimestampedGeoJson(
        {
            "type": "FeatureCollection",
            "features": features,
        },
        period="PT1M",
        add_last_point=True,
    ).add_to(n)

    return n

##### 🌍df_gps_33 -> `animate_gps_positions` #####

In [14]:
animate_gps_positions(df_gps_33)

---

Get all unique routes values

In [15]:
def get_unique_routes_for_line(df_timetables: pd.DataFrame) -> tuple:
    """Get unique routes from a timetable of a single tram line

    Args:
        df_timetables (pd.DataFrame): datagrame with all timetables for a single
        tram number

    Returns:
        set: list of all unique routes for a given tram line
    """
    all_route_symbols = []
    for entry in df_timetables['trasa']:
        all_route_symbols.extend(entry)

    return tuple(set(all_route_symbols))

##### ⏱df_timetables_33 -> `get_unique_routes_for_line` -> routes[`list`] #####

In [16]:
routes = get_unique_routes_for_line(df_timetables_33)
routes

('TP-KIE',
 'TD-3PIA',
 'TZ-MML3',
 'TD-4KIE',
 'TZ-PIA3',
 'TP-KIEP',
 'TP-MML',
 'TZ-KIE4',
 'TX-PIA',
 'TD-3MML')

In [17]:
def pick_one_route(df_timetables: pd.DataFrame, routes: list, pick_route: str = 'first') -> pd.DataFrame:
    """Select only one route from all timetables of a given tram number line

    Args:
        df_timetables (pd.DataFrame): dataframe with all timetables of a given 
        tram number line
        
        routes (list): list of all possible route names
        
        pick_route (str, optional): can take three values: 
        1) particular line name - selects that particulat route
        2) 'random' -  selects a random route
        3)'first' - select the first route from the list
        Defaults to 'first'.

    Returns: 
        pd.DataFrame: A dataframe with all titmetables for a single route 
        (usually an about 30-50 min) of a given 
    """

    # all_route_symbols = []
    # for entry in df_timetables['trasa']:
    #     all_route_symbols.extend(entry)

    # all_route_symbols =  list(set(all_route_symbols))


    # expand ⏱df_timetables on 'trasa' (TP-KIE, TP-MML...)
    work_df = df_timetables.explode('trasa')

    # select a particular route based on 'pick_route' value 
    # (first, random or given)
    if pick_route not in {'random', 'first'}:
        result = work_df[work_df['trasa'] == pick_route]
    elif pick_route == 'first':
        result = work_df[work_df['trasa'] == routes[0]]
    elif pick_route == 'random':
        result = work_df[work_df['trasa'] == random.sample(routes,1)[0]]
    
    # expand the final dataframe on 'czas'
    result = result.explode('czas')
    result = result.sort_values('czas').drop_duplicates()

    result = result.reset_index()
    
    return result


##### ⏱df_timetables_33  + routes[`str`] -> `pick_one_route` -> 1️⃣📈df_route #####

In [27]:
df_route = pick_one_route(df_timetables_33, routes, 'TP-KIE')
df_route

Unnamed: 0,level_0,index,zespol,slupek,nazwa_zespolu,id_ulicy,szer_geo,dlug_geo,kierunek,obowiazuje_od,czas,typ,brygada,trasa
0,194,3286,3118,2,Kielecka,1903,52.206836,21.004010,Metro Pole Mokotowskie,2022-09-08 00:00:00.0,04:17,T,"(5, 6, 10, 12, 14, 16, 17, 1, 2, 3, 5, 6, 7, 8...",TP-MML
1,197,3552,3228,4,Metro Pole Mokotowskie,116,52.208837,21.007694,Biblioteka Narodowa,2022-09-08 00:00:00.0,04:19,T,"(5, 6, 10, 12, 14, 16, 17, 1, 2, 3, 5, 6, 7, 8...",TP-MML
2,566,6902,7094,4,Biblioteka Narodowa,116,52.212914,21.006343,GUS,2022-09-08 00:00:00.0,04:20,T,"(5, 6, 10, 12, 14, 16, 17, 1, 2, 3, 5, 6, 7, 8...",TP-MML
3,564,6895,7093,4,GUS,116,52.216699,21.005211,Nowowiejska,2022-09-08 00:00:00.0,04:21,T,"(5, 6, 10, 12, 14, 16, 17, 1, 2, 3, 5, 6, 7, 8...",TP-MML
4,555,6879,7090,4,Nowowiejska,116,52.219832,21.004934,Koszykowa,2022-09-08 00:00:00.0,04:22,T,"(5, 6, 10, 12, 14, 16, 17, 1, 2, 3, 5, 6, 7, 8...",TP-MML
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5146,435,5940,6025,4,Cm.Wawrzyszewski,2504,52.280184,20.928438,Bogusławskiego,2022-09-08 00:00:00.0,23:40,T,"(5, 6, 10, 12, 14, 16, 17, 1, 3, 5, 6, 7, 8, 9...",TP-MML
5147,437,5944,6026,4,Bogusławskiego,2504,52.281182,20.924765,Popiela,2022-09-08 00:00:00.0,23:41,T,"(5, 6, 10, 12, 14, 16, 17, 1, 3, 5, 6, 7, 8, 9...",TP-MML
5148,439,5951,6027,4,Popiela,2504,52.282201,20.920542,Nocznickiego,2022-09-08 00:00:00.0,23:42,T,"(5, 6, 10, 12, 14, 16, 17, 1, 3, 5, 6, 7, 8, 9...",TP-MML
5149,463,6055,6058,2,Nocznickiego,1603,52.284735,20.922889,Metro Młociny,2022-09-08 00:00:00.0,23:43,T,"(5, 6, 10, 12, 14, 16, 17, 1, 3, 5, 6, 7, 8, 9...",TP-MML


In [19]:
def find_stop(full_stop_name: str, df_name: pd.DataFrame) -> pd.DataFrame:
    """Helper function for filering out a dataframe based on a full stop name.
    Enter full_stop_name = 'Kielecka 01' and it will select rows with
    'nazwa_zespolu' == 'Kielecka' and 'slupek' == '01

    Args:
        full_stop_name (str): full stop name, like: 'Kielecka 01'
        df_name (pd.DataFrame): ⏱df_timetables_33 dataframe

    Returns:
        pd.DataFrame: a timetabels dataframe with only one particular stop
    """
    # get elements from a given full_stop_name
    nazwa_zespolu, slupek = full_stop_name[:-3], full_stop_name[-2:]
    slupek = str(int(slupek))

    # select row of a dataframe based on elements ('nazwa_zespolu' and 'slupek')
    result = df_name[(df_name['nazwa_zespolu'] == nazwa_zespolu) & (df_name['slupek'] == int(slupek))]
    return result

In [20]:
def make_sequence(sequence_name: str, sequence_folder: str, df_timetables: pd.DataFrame) -> pd.DataFrame:
    """Add latitude and longitude to sequence of stops from sequence_name file

    Args:
        sequence_name (str): txt file contatining the squence of stops
        sequence_folder (str): folder in whith sequence file is located
        df_timetables (pd.DataFrame): timetables table containing information
        like geo coordinates os stops

    Returns:
        pd.DataFrame: dataframe with stop sequence, stop names and stop 
        coordinated
    """
    # full path to sequence file
    test_seq_path = sequence_folder + sequence_name
    
    #  open sequence file and split times and names of stops
    with open(test_seq_path, 'r', encoding='utf-8') as file:
        test_sequence = [(elem[:5], elem[6:].replace('\n', '')) for elem in file.readlines()]
    
    result = pd.DataFrame(test_sequence, columns = ['time', 'full_stop_name'])

    longitude = []
    latitude = []

    # search timetables table for coordinates for tram stops
    for elem in test_sequence:
        longitude.append(find_stop(elem[1], df_timetables)['dlug_geo'].values[0])
        latitude.append(find_stop(elem[1], df_timetables)['szer_geo'].values[0])
    
    # add two new columns to result dataframe
    result['longitude'] = longitude
    result['latitude'] = latitude

    return result

##### [📄test_seq_name] + [📂SEQUENCES_FOLDER] +⏱df_timetables_33   -> `make_sequence` -> 📌stop_seq #####

In [21]:
test_seq_name = 'line_T_033__13_01_2023__5.07_Metro_Młociny_15__Kielecka_01.seq'

In [22]:
stop_seq = make_sequence(test_seq_name, SEQUENCES_FOLDER, df_timetables_33)
stop_seq

Unnamed: 0,time,full_stop_name,longitude,latitude
0,05:07,Metro Młociny 15,20.930104,52.291902
1,05:08,Metro Młociny 08,20.929685,52.291213
2,05:09,Metro Młociny 06,20.929255,52.290001
3,05:10,Nocznickiego 01,20.922982,52.284872
4,05:11,Popiela 03,20.920274,52.282663
5,05:12,Bogusławskiego 03,20.923942,52.281342
6,05:13,Cm.Wawrzyszewski 03,20.928018,52.280263
7,05:15,Aspekt 03,20.9332,52.278444
8,05:16,al.Reymonta 03,20.937763,52.275709
9,05:18,Piaski 03,20.945765,52.271463


In [23]:
# visualize route
m = visualize_route(df_route_33)

# add layer containing bus stops and a tooltip with basic info
for n, row in stop_seq.iterrows():
    folium.CircleMarker([row['latitude'], row['longitude']],color = 'green', fill_color = 'green', popup = f"{n}<br><h2>{row['time']}</h2><br>{row['full_stop_name']}").add_to(m)

In [24]:
m

In [25]:
# select dataframe in a given time frame 
# ADD DESCRIPTION!
def select_time(dataframe, time_column_name, operator, time1, time2 = None):
    time_column = dataframe[time_column_name]
    start_time = pd.to_datetime(pd.DataFrame([time1])[0]).dt.time.values
    if time2 != None:
        end_time = pd.to_datetime(pd.DataFrame([time2])[0]).dt.time.values
    if operator == '>':
        result = dataframe[pd.to_datetime(time_column).dt.time.values > start_time]
    elif operator == '<':
        result = dataframe[pd.to_datetime(time_column).dt.time.values < start_time]
    elif operator == '><':
        result = dataframe[(pd.to_datetime(time_column).dt.time.values > start_time) & (pd.to_datetime(time_column).dt.time.values < end_time)]
    return result


In [30]:
animate_gps_positions(select_time(df_gps_33, 'Time', '><', '5:00', '5:50'), m)

In [26]:
# snap gps line to route line
# ADD DESCRIPTION!

def snap_points_to_line(gps_dataframe: pd.DataFrame, route_dataframe: pd.DataFrame) -> folium.Map:

	gps_line = [[y,x] for x,y in zip(gps_dataframe['Lon'], gps_dataframe['Lat'])]
	route_line = geom.LineString([[point.xy[1][0], point.xy[0][0]] for point in route_dataframe.geometry])

	inter_path = []
	for n, point in enumerate(gps_line):
		inter_path.append(list(route_line.interpolate(route_line.project(Point(gps_line[n][0], gps_line[n][1]))).coords))

	inter_path = [elem[0] for elem in inter_path]


	map = folium.Map(location=inter_path[0], zoom_start=12)
	folium.PolyLine(inter_path, color = 'black').add_to(map)

	return map

In [37]:
new_df_gps_33 = select_time(df_gps_33, 'Time', '><', '5:00', '5:50')

snap_points_to_line(new_df_gps_33, df_route_33)

Splitting

https://stackoverflow.com/questions/62990029/how-to-get-equally-spaced-points-on-a-line-in-shapely