This file uses `constants.py` and files in folders:

`data/Lines`

`data/Positions`

`data/Timetables`

In [1]:
import ast
import glob

import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

import folium
from folium import plugins

from constants import *

Load timetables for selected line

In [2]:
DATE_F = '-'.join(DATE.split('_')[::-1])
df_t = pd.read_csv(f'{TIMETABLES_FOLDER}rozklady_{DATE_F}.csv')

In [3]:
df_t

Unnamed: 0.1,Unnamed: 0,index,zespol,slupek,nazwa_zespolu,id_ulicy,szer_geo,dlug_geo,kierunek,obowiazuje_od,linie,typ
0,2,2,1001,3,Kijowska,2201,52.248928,21.044169,al.Zieleniecka,2022-07-15 00:00:00.0,"{'3': ('04:25', '04:44', '04:54', '05:04', '05...",T
1,3,3,1001,4,Kijowska,2201,52.249969,21.041588,Ząbkowska,2022-07-15 00:00:00.0,"{'3': ('04:16', '04:26', '04:46', '05:06', '05...",T
2,4,5,1001,6,Kijowska,1203,52.250078,21.043848,Dw.Wschodni (Kijowska),2022-07-15 00:00:00.0,"{'3': ('09:01', '09:22', '09:30', '18:17', '18...",T
3,10,11,1002,3,Ząbkowska,2201,52.251583,21.038448,Kijowska,2022-07-15 00:00:00.0,"{'3': ('05:02', '05:22', '05:42', '06:02', '06...",T
4,11,12,1002,4,Ząbkowska,2201,52.251944,21.037888,Dw.Wileński,2022-07-15 00:00:00.0,"{'3': ('04:18', '04:28', '04:48', '05:08', '05...",T
...,...,...,...,...,...,...,...,...,...,...,...,...
565,6828,6907,R-02,99,ZET PRAGA,1202,52.257020,21.055770,ZET PRAGA,2022-07-15 00:00:00.0,"{'3': (), '6': (), '7': (), '9': (), '13': (),...",T
566,6829,6908,R-03,0,ZET MOKOTÓW,2513,52.188500,20.999906,Wołoska,2022-07-15 00:00:00.0,"{'1': ('03:56', '04:11', '04:21', '04:36', '04...",T
567,6830,6909,R-03,99,ZET MOKOTÓW,2513,52.188500,21.000290,ZET MOKOTÓW,2022-07-15 00:00:00.0,"{'1': (), '4': (), '7': (), '9': (), '11': (),...",T
568,6831,6910,R-04,0,ZET ŻOLIBORZ,1803,52.299136,20.934157,"Zgrupowania AK ""Kampinos""",2022-07-15 00:00:00.0,"{'1': ('03:46', '03:56', '04:06', '04:26', '04...",T


In [4]:
def load_timetables_for_line(date: str, line_number: int, \
    timetables_folder: str) -> pd.DataFrame:
    """
    Make a dataframe of timetables for a specific tram line and a specific date

    Args:
        date (str): date in  a  'dd_mm_yyyy' format
        line_number (int): tram line number
        timetables_folder (str): folder with csv files containing tram 
        timetables

    Returns:
        pd.DataFrame: a dataframe of timetables for a selected tram line
        and a selected date
    """
    # change date to yyyy-dd-mm format
    DATE_F = '-'.join(date.split('_')[::-1])

    # read a csv file containing timetables
    df_t = pd.read_csv(f'{timetables_folder}/rozklady_{DATE_F}.csv')

    # restrict dataframe to a given line number and discard empty rows
    df_t['linie'] = df_t['linie'].apply(ast.literal_eval)
    df_t['linie'] = df_t['linie'].apply(lambda x: x.get(str(line_number)))

    df_t_sel = df_t[(~df_t['linie'].isnull()) & (df_t['linie'] != ())]

    # delete unwanted (autogenerated) column (specific for csv files)
    if 'Unnamed: 0' in df_t_sel:
        del df_t_sel['Unnamed: 0']

    return df_t_sel

In [23]:
df_timetables_33 = load_timetables_for_line(DATE, TRAM_NUMBER, TIMETABLES_FOLDER)

In [6]:
df_timetables_33

Unnamed: 0,index,zespol,slupek,nazwa_zespolu,id_ulicy,szer_geo,dlug_geo,kierunek,obowiazuje_od,linie,typ
194,3271,3118,1,Kielecka,1903,52.206813,21.003722,Rakowiecka-Sanktuarium,2022-07-15 00:00:00.0,"(08:38, 17:38, 18:21, 19:47, 19:59, 20:11, 22:...",T
195,3272,3118,2,Kielecka,1903,52.206836,21.004010,Metro Pole Mokotowskie,2022-07-15 00:00:00.0,"(04:17, 04:27, 04:44, 05:03, 05:13, 05:23, 05:...",T
196,3273,3118,71,Kielecka,1903,52.206732,21.002608,______________________________,2022-07-15 00:00:00.0,"(05:13, 05:33, 05:43, 05:59, 06:05, 06:17, 06:...",T
197,3533,3228,3,Metro Pole Mokotowskie,116,52.208369,21.007819,Kielecka,2022-07-15 00:00:00.0,"(04:55, 05:15, 05:35, 05:45, 05:55, 06:05, 06:...",T
198,3534,3228,4,Metro Pole Mokotowskie,116,52.208837,21.007694,Biblioteka Narodowa,2022-07-15 00:00:00.0,"(04:19, 04:29, 04:46, 05:05, 05:15, 05:25, 05:...",T
...,...,...,...,...,...,...,...,...,...,...,...
555,6871,7093,4,GUS,116,52.216699,21.005211,Nowowiejska,2022-07-15 00:00:00.0,"(04:21, 04:31, 04:48, 05:07, 05:17, 05:27, 05:...",T
556,6877,7094,3,Biblioteka Narodowa,116,52.213141,21.006219,Metro Pole Mokotowskie,2022-07-15 00:00:00.0,"(04:54, 05:14, 05:34, 05:44, 05:54, 06:04, 06:...",T
557,6878,7094,4,Biblioteka Narodowa,116,52.212914,21.006343,GUS,2022-07-15 00:00:00.0,"(04:20, 04:30, 04:47, 05:06, 05:16, 05:26, 05:...",T
566,6908,R-03,0,ZET MOKOTÓW,2513,52.188500,20.999906,Wołoska,2022-07-15 00:00:00.0,"(04:08, 04:18, 04:35, 04:54, 05:14, 05:39, 05:...",T


Load gps positions for selected line

In [7]:
def load_gps_positions_for_line(date: str, line_number: int, \
    gps_positions_folder: str) -> pd.DataFrame:    
    """
    Make a dataframe of gps positions for a specific tram line given a folder
    with txt files from a selected dat

    Args:
        date (str): date in  a  'dd_mm_yyyy' format
        line_number (int): tram line number
        gps_positions_folder (str): folder with txt files containing tram 
        gps positions

    Returns:
        pd.DataFrame: a dataframe of gps positions for a selected tram line
        in a selected day
    """
    def to_df(file_name: str) -> pd.DataFrame:
        """
        Read files generated by the API script and make a dataframe from them

        Args:
            file_name (str): name of txt file

        Returns:
            pd.DataFrame: dataframe contructed from one txt file containing
            gps tram positions
        """
        with open(file_name, 'r') as file:
            lines = file.readlines()

        # read every 3rd line starting from index 1
        # 1st line contains date (irrelevant), 2nd line contains actual data, 
        # 3rd line in blank 
        len_entry = len(lines[1::3])

        pd_full = []
        # iterate over lines in txt file and concat the to dataframe
        for i in range(len_entry):
            pd_full.append(pd.json_normalize(ast.literal_eval(lines[1::3][i])['result']))
        return pd.concat(pd_full, ignore_index=False)


    # get all txt files
    path = f"{gps_positions_folder}/trams_{date}/*.txt"

    # iterate over all txt files, make dataframes from them and concat them
    # together
    df_p = pd.concat([to_df(str(file)) for file in glob.glob(path)], \
        ignore_index = True)

    # drop duplicated values 
    df_p.drop_duplicates(inplace=True)

    # restrict the dataframe to row containing only the seleced line and
    # sort them with respect to 'Time'
    df_p_sel = df_p[df_p['Lines'] == str(line_number)].sort_values(by='Time')

    # delete obviosly incorrect values; sometimes neighboring entries have
    # different dates or differ for example by one hour, when the real time
    # difference should be no more the a couple of seconds
    #
    # this will keep only entries that have a time difference with their 
    # neighbour less then 5 minutes
    df_p_sel = df_p_sel[pd.to_datetime(df_p_sel['Time']).diff() < pd.Timedelta(5,'m')]

    return df_p_sel

In [8]:
df_gps_33 = load_gps_positions_for_line(DATE, TRAM_NUMBER, GPS_POSITIONS_FOLDER)

In [9]:
df_gps_33

Unnamed: 0,Lines,Lon,VehicleNumber,Time,Lat,Brigade
459684,33,21.001247,3246,2022-11-24 04:14:39,52.198856,5
459724,33,21.000793,3246,2022-11-24 04:15:10,52.200493,5
459764,33,21.000519,3246,2022-11-24 04:15:47,52.201480,5
459804,33,21.000270,3246,2022-11-24 04:16:08,52.202370,5
459845,33,20.999979,3246,2022-11-24 04:16:40,52.203365,5
...,...,...,...,...,...,...
454324,33,21.001938,3260,2022-11-24 23:47:18,52.189560,020
454389,33,20.933510,3202,2022-11-24 23:47:29,52.297417,18
454398,33,21.001938,3260,2022-11-24 23:47:33,52.189560,020
455504,33,21.001976,3260,2022-11-24 23:49:35,52.189278,020


In [10]:
df_gps_33 = df_gps_33[df_gps_33['Brigade'] == '2']
df_gps_33 

Unnamed: 0,Lines,Lon,VehicleNumber,Time,Lat,Brigade
484767,33,21.006410,3235,2022-11-24 05:50:40,52.207390,2
484993,33,21.008000,3235,2022-11-24 05:51:05,52.208263,2
485220,33,21.007547,3235,2022-11-24 05:51:40,52.209316,2
485446,33,21.006746,3235,2022-11-24 05:52:10,52.211704,2
485599,33,21.006332,3235,2022-11-24 05:52:31,52.213024,2
...,...,...,...,...,...,...
287552,33,21.001810,3235,2022-11-24 17:42:48,52.192154,2
287911,33,21.001873,3235,2022-11-24 17:43:28,52.190075,2
288270,33,21.001982,3235,2022-11-24 17:43:53,52.189280,2
288630,33,21.000767,3235,2022-11-24 17:44:33,52.188780,2


Load route coordinates for selected line

In [11]:
def load_routes_for_line(line_number: int, lines_geometry_file_name: str) -> pd.DataFrame:
    """
    Make a dataframe of routes for a specific tram line given a txt file with
    route geometry

    Args:
        line_number (int): tram line number
        lines_geometry_file_name (str): name of txt file with route geometry

    Returns:
        pd.DataFrame: a dataframe of a specific tram line route in GeoPandas
        format
    """

    # read geometry file and restrict it to only the selected tram line
    df_l = pd.read_csv(lines_geometry_file_name, sep = ';')
    df_l = df_l[df_l['route_id'] == line_number]

    # make shapely points
    geometry = [Point(xy) for xy in zip(df_l.XCoord, df_l.YCoord)]

    # txt files contain coordinates written in CRS 2178 system
    # a common well-known longitude-latitude coordinate system is called WGS84
    # which coresponds to EPSG = 4326
    #
    # the final dataframe will have GeoPandas points writted in 
    # lonfitude-latitude format
    df_l_sel = gpd.GeoDataFrame(df_l, geometry=geometry, crs=2178)
    df_l_sel['geometry'] = df_l_sel['geometry'].to_crs(epsg = 4326)

    return df_l_sel

In [12]:
df_route_33 = load_routes_for_line(TRAM_NUMBER, ROUTES_GEOMETRY_FILE)

In [13]:
df_route_33

Unnamed: 0,XCoord,YCoord,route_id,trakcja,geometry
17442,7.498789e+06,5.791285e+06,33,tram,POINT (20.98227 52.25512)
17443,7.498792e+06,5.791273e+06,33,tram,POINT (20.98231 52.25501)
17444,7.498795e+06,5.791262e+06,33,tram,POINT (20.98235 52.25491)
17445,7.498796e+06,5.791258e+06,33,tram,POINT (20.98236 52.25488)
17446,7.498797e+06,5.791255e+06,33,tram,POINT (20.98238 52.25485)
...,...,...,...,...,...
18147,7.498786e+06,5.791296e+06,33,tram,POINT (20.98222 52.25522)
18148,7.498788e+06,5.791289e+06,33,tram,POINT (20.98225 52.25516)
18149,7.498789e+06,5.791285e+06,33,tram,POINT (20.98227 52.25512)
18150,7.498789e+06,5.791285e+06,33,tram,POINT (20.98227 52.25512)


Visualize route coordinates

In [14]:
def visualize_route(dataframe: pd.DataFrame)-> folium.Map:
    """
    Visualize points that represent a tram's route

    Args:
        dataframe (pd.DataFrame): dataframe created with
        load_routes_for_line function

    Returns:
        folium.Map: static map with points indicating tram's route
    """
    # geo coordinates take from GeoPandas geometry column
    coordinates = [[point.xy[1][0], point.xy[0][0]] for point in dataframe.geometry]

    # start with a map with the 250-th entry as the center point
    m = folium.Map(location=coordinates[250], zoom_start=12)
    plugins.Geocoder().add_to(m)

    # add points as circle markers to map
    for i in coordinates:
        folium.CircleMarker([i[0], i[1]], radius = 5).add_to(m)
    return m

In [15]:
visualize_route(df_route_33)

Visualize gps positions

In [16]:
def visualize_gps_positions(dataframe: pd.DataFrame) -> folium.Map:
    """
    Visualize points that represent a moving tram

    Args:
        dataframe (pd.DataFrame): dataframe created with
        load_gps_positions_for_line function restricted to a specific brigade

    Returns:
        folium.Map: dynamic map with lines indicating tram's movement
    """

    # folium needs dates written in a specific format (with 'T' as a date-time
    # separator)
    converted_dates = dataframe['Time'].apply(lambda x: 'T'.join(x.split())).tolist()

    # create folium map zooming in on a point located 1/4 from the beginning
    n = folium.Map(location=dataframe[['Lat', 'Lon']].iloc[(dataframe.shape[0]//4)].tolist(), \
        zoom_start=13)

    # add geocoder (textbox to input geolocation names)
    plugins.Geocoder().add_to(n)

    # draw lines based on coordinates in dataframe
    # 'lon' is first, 'lat' is second
    lines = [
        {
            "coordinates": [[x,y] for x,y in zip(dataframe['Lon'], dataframe['Lat'])],
            "dates": converted_dates,
            "color": "red"}]

    # make 'features' to output them into TimestampedGeoJson
    features = [
        {
            "type": "Feature",
            "geometry": {
                "type": "LineString",
                "coordinates": line["coordinates"],
            },
            "properties": {
                "times": line["dates"],
                "style": {
                    "color": line["color"],
                    },
                "icon": "marker",

                # comment whole 'iconstyle' for a default marker
                "iconstyle": { 
                    "iconUrl": TRAM_ICON,
                    "iconSize": [40, 40],
                },
            },
        }
        for line in lines
    ]

    plugins.TimestampedGeoJson(
        {
            "type": "FeatureCollection",
            "features": features,
        },
        period="PT1M",
        add_last_point=True,
    ).add_to(n)

    return n

In [17]:
visualize_gps_positions(df_gps_33)