In [1]:
import ilgtfs

In [2]:
g = ilgtfs.GTFS(r'data/gtfs_2016_05_25/israel-public-transportation.zip')

In [3]:
g.load_agencies()
g.load_routes()
g.load_shapes()
g.load_services()
g.load_trips()
g.load_stops()

Loading agencies
26 agencies loaded
Loading routes
7963 routes loaded
Loading shapes
7175 shapes loaded
Loading services
35110 services loaded
Loading trips
435123 trips loaded
Loading stops
27258 stops loaded


In [10]:
import csv

In [14]:
new_route_ids = list(g.routes.keys())[:10]
new_route_ids_set = set(new_route_ids)

In [16]:
with open(r'sample/routes.txt', 'w', encoding='utf8') as f:
    fields = ["route_id","agency_id","route_short_name","route_long_name","route_desc","route_type","route_color"]
    writer = csv.DictWriter(f, fieldnames=fields, lineterminator='\n')
    writer.writeheader()
    for route_id in new_route_ids:
        route = g.routes[route_id]
        row =  {
            "route_id": route_id,
            "agency_id": route.agency.agency_id,
            "route_short_name": route.line_number,
            "route_long_name": route.route_long_name,
            "route_desc": route.route_desc,
            "route_type": route.route_type,
            "route_color": ""
        }
        writer.writerow(row)

In [18]:
new_trips = {trip_id: trip for (trip_id, trip) in g.trips.items() if trip.route.route_id in new_route_ids_set}
print("%d trips left" % len(new_trips))
with open(r'sample/trips.txt', 'w', encoding='utf8') as f:
    fields = ["route_id","service_id","trip_id","direction_id","shape_id"]
    writer = csv.DictWriter(f, fieldnames=fields, lineterminator='\n')
    writer.writeheader()
    for trip_id, trip in new_trips.items():
        row =  {
            "route_id": trip.route.route_id,
            "service_id": trip.service.service_id,
            "trip_id": trip_id,
            "direction_id": trip.direction_id,
            "shape_id": trip.shape_id
        }
        writer.writerow(row)

909 trips left


In [24]:
new_shape_ids = set(trip.shape_id for trip in new_trips.values())
print("There are %d shapes left"% len(new_shape_ids))
with open(r'sample/shapes.txt', 'w', encoding='utf8') as f:
    fields = ["shape_id","shape_pt_lat","shape_pt_lon","shape_pt_sequence"]
    writer = csv.DictWriter(f, fieldnames=fields, lineterminator='\n')
    writer.writeheader()
    for shape_id in (shape_id for shape_id in g.shapes if shape_id in new_shape_ids):
        shape = g.shapes[shape_id]
        for pt_sequence, point in shape.coordinates.items():
            row =  {
                "shape_id": shape_id,
                "shape_pt_lat": point[0],
                "shape_pt_lon": point[1],
                "shape_pt_sequence": pt_sequence
            }
            writer.writerow(row)

There are 10 shapes left


In [27]:
new_services = set(trip.service for trip in new_trips.values())
print("There are %d services left"% len(new_services))
with open(r'sample/calendar.txt', 'w', encoding='utf8') as f:
    fields = ["service_id","sunday","monday","tuesday","wednesday","thursday","friday","saturday","start_date","end_date"]
    writer = csv.DictWriter(f, fieldnames=fields, lineterminator='\n')
    writer.writeheader()
    for service in new_services:
        row =  {
            "service_id": service.service_id,
            "sunday": int(6 in service.days),
            "monday": int(0 in service.days),
            "tuesday": int(1 in service.days),
            "wednesday": int(2 in service.days),
            "thursday": int(3 in service.days),
            "friday": int(4 in service.days),
            "saturday": int(5 in service.days) ,
            "start_date": service.start_date.strftime('%Y%m%d'),
            "end_date": service.end_date.strftime('%Y%m%d')
        }
        writer.writerow(row)

There are 43 services left


In [30]:
# agencies
new_agencies = set(g.routes[route_id].agency for route_id in new_route_ids)
print("There are %d agencies left" % len(new_agencies))
with open(r'sample/agency.txt', 'w', encoding='utf8') as f:
    fields = ["agency_id","agency_name","agency_url","agency_timezone","agency_lang","agency_phone","agency_fare_url"]
    writer = csv.DictWriter(f, fieldnames=fields, lineterminator='\n')
    writer.writeheader()
    for agency in new_agencies:
        row =  {
            "agency_id": agency.agency_id,
            "agency_name": agency.agency_name
        }
        writer.writerow(row)

There are 3 agencies left


In [39]:
import zipfile, io
# stop_times
# trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type
stop_ids_left = set()
stop_times_written = 0
with zipfile.ZipFile(g.filename) as z, open(r'sample/stop_times.txt', 'w', encoding='utf8') as outf:
    with io.TextIOWrapper(z.open('stop_times.txt'), 'utf8') as f:
        outf.write(next(f))   # header
        for line in f:
            fields = line.strip().split(',')
            trip_id = fields[0]
            if trip_id in new_trips:
                outf.write(line)
                stop_times_written += 1
            stop_ids_left.add(int(fields[3]))
                
print("%d lines written to stop_times.txt" % stop_times_written)
print("There are %d stops left" % len(stop_ids_left))


40344 lines written to stop_times.txt
There are 27217 stops left


In [40]:
# stops
# 
with open(r'sample/stops.txt', 'w', encoding='utf8') as f:
    fields = ["stop_id","stop_code","stop_name","stop_desc","stop_lat","stop_lon","location_type","parent_station","zone_id"]
    writer = csv.DictWriter(f, fieldnames=fields, lineterminator='\n')
    writer.writeheader()
    for stop_id in stop_ids_left:
        stop = g.stops[stop_id]
        row =  {
            "stop_id": stop_id,
            "stop_code": stop.stop_code,
            "stop_name": stop.stop_name,
            "stop_desc": stop.stop_desc,
            "stop_lat": stop.stop_lat,
            "stop_lon": stop.stop_lon,
            "location_type": stop.location_type,
            "parent_station": stop.parent_station,
            "zone_id": stop.zone_id
        }
        writer.writerow(row)