diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index a1a0a32..f8bb619 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @npaun @JMilot1 +* @npaun @JMilot1 @jsteelz diff --git a/gtfs_loader/__init__.py b/gtfs_loader/__init__.py index ac96273..309140a 100644 --- a/gtfs_loader/__init__.py +++ b/gtfs_loader/__init__.py @@ -15,11 +15,11 @@ def get_files(files): return schema.FileCollection(*(schema.GTFS_FILENAMES[f] for f in files)).values() -def load(gtfs_dir, sorted_read=False, files=None, verbose=True): +def load(gtfs_dir, sorted_read=False, files=None, verbose=True, itineraries=False): gtfs_dir = Path(gtfs_dir) gtfs = types.Entity() - files_to_load = get_files(files) if files else schema.GTFS_SUBSET_SCHEMA.values() + files_to_load = get_files(files) if files else schema.GTFS_SUBSET_SCHEMA_ITINERARIES.values() if itineraries else schema.GTFS_SUBSET_SCHEMA.values() for file_schema in files_to_load: if verbose: @@ -163,6 +163,10 @@ def convert(config, value): if not config.required and value == '': return config.default + # Lists are stringified as JSON in csv. + if typing.get_origin(config.type) is list: + return list(json.loads(value)) + config_type = get_inner_type(config.type) if issubclass(config_type, enum.IntEnum): return config_type(int(value)) @@ -217,7 +221,7 @@ def sorted_entities(file_schema, entities): return sorted(entities.items(), key=lambda kv: kv[0]) -def patch(gtfs, gtfs_in_dir, gtfs_out_dir, files=None, sorted_output=False, verbose=True): +def patch(gtfs, gtfs_in_dir, gtfs_out_dir, files=None, sorted_output=False, verbose=True, itineraries=False): gtfs_in_dir = Path(gtfs_in_dir) gtfs_out_dir = Path(gtfs_out_dir) gtfs_out_dir.mkdir(parents=True, exist_ok=True) @@ -229,7 +233,7 @@ def patch(gtfs, gtfs_in_dir, gtfs_out_dir, files=None, sorted_output=False, verb except shutil.SameFileError: pass # No need to copy if we're working in-place - files_to_patch = get_files(files) if files else schema.GTFS_SUBSET_SCHEMA.values() + files_to_patch = get_files(files) if files else schema.GTFS_SUBSET_SCHEMA_ITINERARIES.values() if itineraries else schema.GTFS_SUBSET_SCHEMA.values() for file_schema in files_to_patch: if verbose: diff --git a/gtfs_loader/schema.py b/gtfs_loader/schema.py index 8ebcaf7..9c93357 100644 --- a/gtfs_loader/schema.py +++ b/gtfs_loader/schema.py @@ -241,6 +241,26 @@ class StopTime(Entity): def stop(self): return self._gtfs.stops[self.stop_id] +class ItineraryCell(Entity): + _schema = File(id='itinerary_index', + name='itinerary_cells', + fileType=FileType.CSV, + required=True, + group_id='stop_sequence') + + stop_id: str + stop_sequence: int + pickup_type: PickupType = PickupType.REGULARLY_SCHEDULED + drop_off_type: DropOffType = DropOffType.REGULARLY_SCHEDULED + mean_duration_factor: Optional[float] = None + mean_duration_offset: Optional[float] = None + safe_duration_factor: Optional[float] = None + safe_duration_offset: Optional[float] = None + + @property + def stop(self): + return self._gtfs.stops[self.stop_id] + class Transfer(Entity): _schema = File(id='from_trip_id', @@ -325,16 +345,85 @@ def last_stop(self): @cached_property def route(self): return self._gtfs.routes[self.route_id] + +class ItineraryTrip(Entity): + _schema = File(id='trip_id', + fileType=FileType.CSV, + name='trips', + required=True) + + trip_id: str + service_id: str + block_id: str = '' + route_id: str + itinerary_index: str + departure_times: List[int] + arrival_times: List[int] + start_pickup_drop_off_windows: List[int] + end_pickup_drop_off_windows: List[int] + + @property + def first_itinerary_cell(self): + return self._gtfs.itinerary_cells[self.itinerary_index][0] + + @property + def last_itinerary_cell(self): + return self._gtfs.itinerary_cells[self.itinerary_index][-1] + + @property + def stop_shape(self): + locations = tuple(self._gtfs.stops[st.stop_id].location for st in self._gtfs.itinerary_cells[self.itinerary_index]) + + if None in locations: + return None + return locations + + @cached_property + def shift_days(self): + return 1 if self.departure_times[0] >= DAY_SEC else 0 + + @cached_property + def first_departure(self): + return self.departure_times[0] - DAY_SEC * self.shift_days + + @cached_property + def last_arrival(self): + return self.arrival_times[-1] - DAY_SEC * self.shift_days + + @cached_property + def first_point(self): + return self.first_stop.location + + @cached_property + def last_point(self): + return self.last_stop.location + + @cached_property + def first_stop(self): + return self._gtfs.stops[self.first_itinerary_cell.stop_id] + + @cached_property + def last_stop(self): + return self._gtfs.stops[self.last_itinerary_cell.stop_id] + + @cached_property + def route(self): + return self._gtfs.routes[self.route_id] GTFS_SUBSET_SCHEMA = FileCollection(Agency, BookingRule, Calendar, CalendarDate, Locations, LocationGroups, Routes, Transfer, Trip, Stop, StopTime) +GTFS_SUBSET_SCHEMA_ITINERARIES = FileCollection(Agency, BookingRule, Calendar, CalendarDate, ItineraryCell, + ItineraryTrip, Locations, LocationGroups, Routes, Transfer, Stop) + GTFS_FILENAMES = { Agency._schema.name: Agency, BookingRule._schema.name: BookingRule, Calendar._schema.name: Calendar, CalendarDate._schema.name: CalendarDate, + ItineraryCell._schema.name: ItineraryCell, + ItineraryTrip._schema.name: ItineraryTrip, Locations._schema.name: Locations, LocationGroups._schema.name: LocationGroups, Routes._schema.name: Routes, diff --git a/gtfs_loader/types.py b/gtfs_loader/types.py index c68cb91..cc9b3a8 100644 --- a/gtfs_loader/types.py +++ b/gtfs_loader/types.py @@ -1,7 +1,8 @@ import functools import enum +import json from datetime import datetime -from typing import Any, List +from typing import Any from .schema_classes import Schema, SchemaCollection @@ -12,7 +13,6 @@ class GTFSTime(int): # forward an arbitrary number of days using this notation, but we block it # as it just creates confusion. MAX_HOUR_REPRESENTATION = 36 - def __new__(cls, time_str): if isinstance(time_str, int): return super().__new__(cls, time_str) @@ -43,7 +43,6 @@ def __add__(self, other): def __sub__(self, other): return GTFSTime(super().__sub__(other)) - class GTFSDate(datetime): def __new__(cls, *args, **kwargs): @@ -134,6 +133,9 @@ def clone(self, **overrides): @functools.singledispatch def serialize(value: Any): + if isinstance(value, list): + # Remove spaces after commas for compactness + return json.dumps(value,separators=(',', ':')) return str(value) diff --git a/setup.py b/setup.py index 3a88cdc..d9d0209 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,10 @@ from setuptools import setup, find_packages setup(name='py-gtfs-loader', - version='0.1.15', + version='0.2.0', description='Load GTFS', url='https://github.com/TransitApp/py-gtfs-loader', - author='Nicholas Paun, Jonathan Milot', + author='Nicholas Paun, Jonathan Milot, Jeremy Steele', license='License :: OSI Approved :: MIT License', packages=find_packages(), zip_safe=False, diff --git a/tests/test_itineraries_unmodified/expected_default/itinerary_cells.txt b/tests/test_itineraries_unmodified/expected_default/itinerary_cells.txt new file mode 100644 index 0000000..d472870 --- /dev/null +++ b/tests/test_itineraries_unmodified/expected_default/itinerary_cells.txt @@ -0,0 +1,6 @@ +itinerary_index,stop_sequence,stop_id,pickup_type,drop_off_type,mean_duration_factor,mean_duration_offset,safe_duration_factor,safe_duration_offset +1,0,junction,0,0,,,, +1,1,slocan-park,0,0,,,, +1,2,slocan-city,0,0,,,, +1,3,nelson-tc,0,0,,,, +1,4,junction,0,0,,,, diff --git a/tests/test_itineraries_unmodified/expected_default/trips.txt b/tests/test_itineraries_unmodified/expected_default/trips.txt new file mode 100644 index 0000000..125273b --- /dev/null +++ b/tests/test_itineraries_unmodified/expected_default/trips.txt @@ -0,0 +1,4 @@ +route_id,trip_id,service_id,block_id,itinerary_index,departure_times,arrival_times,start_pickup_drop_off_windows,end_pickup_drop_off_windows +red,trip_1,mon-tues-wed-thurs,1,1,"[79200,79260,79320,79380,79440]","[79200,79260,79320,79380,79440]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]" +red,trip_2,mon-tues-wed-thurs,1,1,"[79440,79500,79560,79620,79680]","[79440,79500,79560,79620,79680]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]" +red,trip_3,mon-tues-wed-thurs,1,1,"[79680,79740,79800,79860,79920]","[79680,79740,79800,79860,79920]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]" diff --git a/tests/test_itineraries_unmodified/input/itinerary_cells.txt b/tests/test_itineraries_unmodified/input/itinerary_cells.txt new file mode 100644 index 0000000..1347adb --- /dev/null +++ b/tests/test_itineraries_unmodified/input/itinerary_cells.txt @@ -0,0 +1,6 @@ +itinerary_index,stop_sequence,stop_id +1,0,junction +1,1,slocan-park +1,2,slocan-city +1,3,nelson-tc +1,4,junction diff --git a/tests/test_itineraries_unmodified/input/trips.txt b/tests/test_itineraries_unmodified/input/trips.txt new file mode 100644 index 0000000..125273b --- /dev/null +++ b/tests/test_itineraries_unmodified/input/trips.txt @@ -0,0 +1,4 @@ +route_id,trip_id,service_id,block_id,itinerary_index,departure_times,arrival_times,start_pickup_drop_off_windows,end_pickup_drop_off_windows +red,trip_1,mon-tues-wed-thurs,1,1,"[79200,79260,79320,79380,79440]","[79200,79260,79320,79380,79440]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]" +red,trip_2,mon-tues-wed-thurs,1,1,"[79440,79500,79560,79620,79680]","[79440,79500,79560,79620,79680]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]" +red,trip_3,mon-tues-wed-thurs,1,1,"[79680,79740,79800,79860,79920]","[79680,79740,79800,79860,79920]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]" diff --git a/tests/test_runner.py b/tests/test_runner.py index f5f3f92..1efb6cd 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -14,8 +14,9 @@ def test_default(feed_dir): def do_test(feed_dir): + itineraries = 'itineraries' in feed_dir.name work_dir = test_support.create_test_data(feed_dir) - gtfs = gtfs_loader.load(work_dir, verbose=False) - gtfs_loader.patch(gtfs, work_dir, work_dir, verbose=False) + gtfs = gtfs_loader.load(work_dir, verbose=False, itineraries=itineraries) + gtfs_loader.patch(gtfs, work_dir, work_dir, verbose=False, itineraries=itineraries) test_support.check_expected_output(feed_dir, work_dir)