Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1 +1 @@
* @npaun @JMilot1
* @npaun @JMilot1 @jsteelz
12 changes: 8 additions & 4 deletions gtfs_loader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ def get_files(files):
return schema.FileCollection(*(schema.GTFS_FILENAMES[f] for f in files)).values()


def load(gtfs_dir, sorted_read=False, files=None, verbose=True):
def load(gtfs_dir, sorted_read=False, files=None, verbose=True, itineraries=False):
gtfs_dir = Path(gtfs_dir)
gtfs = types.Entity()

files_to_load = get_files(files) if files else schema.GTFS_SUBSET_SCHEMA.values()
files_to_load = get_files(files) if files else schema.GTFS_SUBSET_SCHEMA_ITINERARIES.values() if itineraries else schema.GTFS_SUBSET_SCHEMA.values()

for file_schema in files_to_load:
if verbose:
Expand Down Expand Up @@ -163,6 +163,10 @@ def convert(config, value):
if not config.required and value == '':
return config.default

# Lists are stringified as JSON in csv.
if typing.get_origin(config.type) is list:
return list(json.loads(value))

config_type = get_inner_type(config.type)
if issubclass(config_type, enum.IntEnum):
return config_type(int(value))
Expand Down Expand Up @@ -217,7 +221,7 @@ def sorted_entities(file_schema, entities):
return sorted(entities.items(), key=lambda kv: kv[0])


def patch(gtfs, gtfs_in_dir, gtfs_out_dir, files=None, sorted_output=False, verbose=True):
def patch(gtfs, gtfs_in_dir, gtfs_out_dir, files=None, sorted_output=False, verbose=True, itineraries=False):
gtfs_in_dir = Path(gtfs_in_dir)
gtfs_out_dir = Path(gtfs_out_dir)
gtfs_out_dir.mkdir(parents=True, exist_ok=True)
Expand All @@ -229,7 +233,7 @@ def patch(gtfs, gtfs_in_dir, gtfs_out_dir, files=None, sorted_output=False, verb
except shutil.SameFileError:
pass # No need to copy if we're working in-place

files_to_patch = get_files(files) if files else schema.GTFS_SUBSET_SCHEMA.values()
files_to_patch = get_files(files) if files else schema.GTFS_SUBSET_SCHEMA_ITINERARIES.values() if itineraries else schema.GTFS_SUBSET_SCHEMA.values()

for file_schema in files_to_patch:
if verbose:
Expand Down
89 changes: 89 additions & 0 deletions gtfs_loader/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,26 @@ class StopTime(Entity):
def stop(self):
return self._gtfs.stops[self.stop_id]

class ItineraryCell(Entity):
_schema = File(id='itinerary_index',
name='itinerary_cells',
fileType=FileType.CSV,
required=True,
group_id='stop_sequence')

stop_id: str
stop_sequence: int
pickup_type: PickupType = PickupType.REGULARLY_SCHEDULED
drop_off_type: DropOffType = DropOffType.REGULARLY_SCHEDULED
mean_duration_factor: Optional[float] = None
mean_duration_offset: Optional[float] = None
safe_duration_factor: Optional[float] = None
safe_duration_offset: Optional[float] = None

@property
def stop(self):
return self._gtfs.stops[self.stop_id]


class Transfer(Entity):
_schema = File(id='from_trip_id',
Expand Down Expand Up @@ -325,16 +345,85 @@ def last_stop(self):
@cached_property
def route(self):
return self._gtfs.routes[self.route_id]

class ItineraryTrip(Entity):
_schema = File(id='trip_id',
fileType=FileType.CSV,
name='trips',
required=True)

trip_id: str
service_id: str
block_id: str = ''
route_id: str
itinerary_index: str
departure_times: List[int]
arrival_times: List[int]
start_pickup_drop_off_windows: List[int]
end_pickup_drop_off_windows: List[int]

@property
def first_itinerary_cell(self):
return self._gtfs.itinerary_cells[self.itinerary_index][0]

@property
def last_itinerary_cell(self):
return self._gtfs.itinerary_cells[self.itinerary_index][-1]

@property
def stop_shape(self):
locations = tuple(self._gtfs.stops[st.stop_id].location for st in self._gtfs.itinerary_cells[self.itinerary_index])

if None in locations:
return None
return locations

@cached_property
def shift_days(self):
return 1 if self.departure_times[0] >= DAY_SEC else 0

@cached_property
def first_departure(self):
return self.departure_times[0] - DAY_SEC * self.shift_days

@cached_property
def last_arrival(self):
return self.arrival_times[-1] - DAY_SEC * self.shift_days

@cached_property
def first_point(self):
return self.first_stop.location

@cached_property
def last_point(self):
return self.last_stop.location

@cached_property
def first_stop(self):
return self._gtfs.stops[self.first_itinerary_cell.stop_id]

@cached_property
def last_stop(self):
return self._gtfs.stops[self.last_itinerary_cell.stop_id]

@cached_property
def route(self):
return self._gtfs.routes[self.route_id]


GTFS_SUBSET_SCHEMA = FileCollection(Agency, BookingRule, Calendar, CalendarDate,
Locations, LocationGroups, Routes, Transfer, Trip, Stop, StopTime)

GTFS_SUBSET_SCHEMA_ITINERARIES = FileCollection(Agency, BookingRule, Calendar, CalendarDate, ItineraryCell,
ItineraryTrip, Locations, LocationGroups, Routes, Transfer, Stop)

GTFS_FILENAMES = {
Agency._schema.name: Agency,
BookingRule._schema.name: BookingRule,
Calendar._schema.name: Calendar,
CalendarDate._schema.name: CalendarDate,
ItineraryCell._schema.name: ItineraryCell,
ItineraryTrip._schema.name: ItineraryTrip,
Locations._schema.name: Locations,
LocationGroups._schema.name: LocationGroups,
Routes._schema.name: Routes,
Expand Down
8 changes: 5 additions & 3 deletions gtfs_loader/types.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import functools
import enum
import json
from datetime import datetime
from typing import Any, List
from typing import Any

from .schema_classes import Schema, SchemaCollection

Expand All @@ -12,7 +13,6 @@ class GTFSTime(int):
# forward an arbitrary number of days using this notation, but we block it
# as it just creates confusion.
MAX_HOUR_REPRESENTATION = 36

def __new__(cls, time_str):
if isinstance(time_str, int):
return super().__new__(cls, time_str)
Expand Down Expand Up @@ -43,7 +43,6 @@ def __add__(self, other):
def __sub__(self, other):
return GTFSTime(super().__sub__(other))


class GTFSDate(datetime):

def __new__(cls, *args, **kwargs):
Expand Down Expand Up @@ -134,6 +133,9 @@ def clone(self, **overrides):

@functools.singledispatch
def serialize(value: Any):
if isinstance(value, list):
# Remove spaces after commas for compactness
return json.dumps(value,separators=(',', ':'))
return str(value)


Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from setuptools import setup, find_packages

setup(name='py-gtfs-loader',
version='0.1.15',
version='0.2.0',
description='Load GTFS',
url='https://github.com/TransitApp/py-gtfs-loader',
author='Nicholas Paun, Jonathan Milot',
author='Nicholas Paun, Jonathan Milot, Jeremy Steele',
license='License :: OSI Approved :: MIT License',
packages=find_packages(),
zip_safe=False,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
itinerary_index,stop_sequence,stop_id,pickup_type,drop_off_type,mean_duration_factor,mean_duration_offset,safe_duration_factor,safe_duration_offset
1,0,junction,0,0,,,,
1,1,slocan-park,0,0,,,,
1,2,slocan-city,0,0,,,,
1,3,nelson-tc,0,0,,,,
1,4,junction,0,0,,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
route_id,trip_id,service_id,block_id,itinerary_index,departure_times,arrival_times,start_pickup_drop_off_windows,end_pickup_drop_off_windows
red,trip_1,mon-tues-wed-thurs,1,1,"[79200,79260,79320,79380,79440]","[79200,79260,79320,79380,79440]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]"
red,trip_2,mon-tues-wed-thurs,1,1,"[79440,79500,79560,79620,79680]","[79440,79500,79560,79620,79680]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]"
red,trip_3,mon-tues-wed-thurs,1,1,"[79680,79740,79800,79860,79920]","[79680,79740,79800,79860,79920]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]"
6 changes: 6 additions & 0 deletions tests/test_itineraries_unmodified/input/itinerary_cells.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
itinerary_index,stop_sequence,stop_id
1,0,junction
1,1,slocan-park
1,2,slocan-city
1,3,nelson-tc
1,4,junction
4 changes: 4 additions & 0 deletions tests/test_itineraries_unmodified/input/trips.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
route_id,trip_id,service_id,block_id,itinerary_index,departure_times,arrival_times,start_pickup_drop_off_windows,end_pickup_drop_off_windows
red,trip_1,mon-tues-wed-thurs,1,1,"[79200,79260,79320,79380,79440]","[79200,79260,79320,79380,79440]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]"
red,trip_2,mon-tues-wed-thurs,1,1,"[79440,79500,79560,79620,79680]","[79440,79500,79560,79620,79680]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]"
red,trip_3,mon-tues-wed-thurs,1,1,"[79680,79740,79800,79860,79920]","[79680,79740,79800,79860,79920]","[-1,-1,-1,-1,-1]","[-1,-1,-1,-1,-1]"
5 changes: 3 additions & 2 deletions tests/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ def test_default(feed_dir):


def do_test(feed_dir):
itineraries = 'itineraries' in feed_dir.name
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems very strange to me, but after a read thru of test_support.py I see it does follow existing code in terms of meaningful directory names. The approach established there of a single do_test function all tests doesn't scale well but 🤷

work_dir = test_support.create_test_data(feed_dir)

gtfs = gtfs_loader.load(work_dir, verbose=False)
gtfs_loader.patch(gtfs, work_dir, work_dir, verbose=False)
gtfs = gtfs_loader.load(work_dir, verbose=False, itineraries=itineraries)
gtfs_loader.patch(gtfs, work_dir, work_dir, verbose=False, itineraries=itineraries)
test_support.check_expected_output(feed_dir, work_dir)