In [1]:
import pytz
import pandas
from datetime import datetime

import gtfs_kit as gk

In [2]:
vehicle_positions = pandas.read_parquet("../data/vehicle_positions.parquet")
vehicle_positions = vehicle_positions.tz_localize('UTC')

In [3]:
feed = gk.read_feed("../downloads/schedule_updates/2024-03-20T06_34_14.zip", dist_units='km')

## Linee monitorate

In [4]:
def get_monitored_routes(vehicle_positions: pandas.DataFrame):
    return vehicle_positions["vehicle.trip.route_id"].unique()

In [5]:
planned_routes = feed.get_routes()["route_id"].unique()
monitored_routes = get_monitored_routes(vehicle_positions)
unimonitored_routes =  set(planned_routes) - set(monitored_routes)

### Percentage of monitored routes

In [6]:
round(len(monitored_routes) / len(planned_routes) * 100, 1)

93.7

## Route Analysis

In [7]:
ROUTE = "360"
DATE = datetime(2024, 3, 20, 4, tzinfo=pytz.timezone('Europe/Rome'))

In [8]:
vehicle_positions = vehicle_positions[vehicle_positions.index.to_series().dt.date == DATE.date()]

In [9]:
route_timetable = feed.build_route_timetable(ROUTE, [DATE.strftime("%Y%m%d")])

  f["dt"] = f["departure_time"].fillna(method="ffill")
  f["min_dt"] = f.groupby("trip_id")["dt"].transform(min)


In [11]:
def get_route_vehicle_positions(vehicle_positions: pandas.DataFrame, route: str, date: datetime):
    route_vehicle_positions = vehicle_positions[vehicle_positions["vehicle.trip.route_id"] == route].copy()
    route_vehicle_positions["vehicle.trip.trip_id"] = route_vehicle_positions["vehicle.trip.trip_id"]#.str.replace("0#", "1#")
    route_vehicle_positions.reset_index(inplace=True)
    route_vehicle_positions = route_vehicle_positions[route_vehicle_positions["vehicle.timestamp"] >= date]
    return route_vehicle_positions.sort_values("vehicle.timestamp")

In [12]:
route_vehicle_positions = get_route_vehicle_positions(vehicle_positions, route=ROUTE, date=DATE)

## Failed Trips

In [14]:
route_planned_trips = route_timetable["trip_id"].unique()
route_actual_trips = route_vehicle_positions["vehicle.trip.trip_id"].unique()
route_failed_trips = set(route_planned_trips) - set(route_actual_trips)
len(route_planned_trips), len(route_failed_trips)

(208, 35)

In [15]:
route_timetable[(route_timetable["trip_id"].isin(route_failed_trips)) & (route_timetable["stop_sequence"] == 1)][["trip_id","trip_headsign","departure_time","date"]]

Unnamed: 0,trip_id,trip_headsign,departure_time,date
3865217,0#4657-3,MUSE,05:28:00,20240320
3865233,0#4657-4,ZAMA,05:50:00,20240320
3865264,0#4657-5,MUSE,06:31:00,20240320
3866462,0#4660-4,MUSE,07:02:00,20240320
3865293,0#4657-6,ZAMA,07:22:00,20240320
3862187,0#4650-3,MUSE,07:46:00,20240320
3866491,0#4660-5,ZAMA,07:52:00,20240320
3862203,0#4650-4,ZAMA,08:19:00,20240320
3865324,0#4657-7,MUSE,08:27:00,20240320
3866522,0#4660-6,MUSE,08:56:00,20240320
