# GTFS

## Routes from Shapes

Import geopandas, pandas, matplotlib.pyplot, contextily, ZipFile from zipfile, StringIO from io, and Point and LineString from shapely.geometry

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import contextily as ctx
from zipfile import ZipFile
from io import StringIO, BytesIO
from shapely import geometry
import requests

Define a function for reading gtfs zipfiles into python dataframes. The dataframes are stored in a python dictionary.

In [None]:
def zipToDataframes(zip):
    dataframes = {}
    for file in zip.filelist:
        file_name = file.filename
        with zip.open(file_name) as f:
            bytes = f.read()
            s = str(bytes, 'utf-8')
            data = StringIO(s)
            df = pd.read_csv(data, low_memory=False)
            name = file_name.split('.txt')[0]
            dataframes[name] = df
    return dataframes

Run the function and list the resulting dataframes. The number of dataframes will vary between different gtfs sources.

In [None]:
#zip = zipfile.ZipFile('data/nyc_subways.zip')
url = 'http://web.mta.info/developers/data/nyct/subway/google_transit.zip'
r = requests.get(url)
zip = ZipFile(BytesIO(r.content))
gtfs_dataframes = zipToDataframes(zip)
list(gtfs_dataframes)

Let's take a look at the agency dataframe

In [None]:
agency = gtfs_dataframes['agency']
agency.head()

Here's the routes dataframe

In [None]:
routes = gtfs_dataframes['routes']
routes.head()

We can join the agency and routes dataframes on the agency_id column

In [None]:
agency_routes = agency.join(
    routes.set_index('agency_id'),
    on='agency_id'
)
agency_routes.head()

In [None]:
trips = gtfs_dataframes['trips']
trips.head()

In [None]:
routes_trips = agency_routes.join(
    trips.set_index('route_id'),
    on='route_id'
)
routes_trips.head()

In [None]:
shapes = gtfs_dataframes['shapes']
shapes.head()

In [None]:
shapes_trips = shapes.join(
    routes_trips.set_index('shape_id'),
    on='shape_id'
)
shapes_trips.head()

In [None]:
list(routes.route_id.unique())

In [None]:
route_list = []

for route_id in routes.route_id.unique():
    route_shapes = shapes_trips.loc[shapes_trips.route_id == route_id]
    if route_shapes.size > 0:
        route_shapes = route_shapes.drop_duplicates(
            subset=['shape_id', 'shape_pt_lat', 'shape_pt_lon'],
            keep='first'
        ).reset_index(drop=True)

        route_shapes['geometry'] = route_shapes.apply(
            lambda row: geometry.Point(row.shape_pt_lon, row.shape_pt_lat),
            axis=1
        )

        unique_route_shapes = route_shapes.drop_duplicates(
            subset=['shape_id'],
            keep='first'
        ).drop(
            [
                'service_id',
                'trip_id',
                'trip_headsign',
                'direction_id',
                'block_id',
                'shape_pt_lat',
                'shape_pt_lon',
                'shape_pt_sequence',
                'shape_dist_traveled'
            ], 
            axis=1
        ).reset_index(drop=True)

        unique_route_shapes['geometry'] = unique_route_shapes.apply(
            lambda row: geometry.LineString(
                route_shapes.loc[route_shapes.shape_id == row.shape_id].geometry.to_list()
            ),
            axis=1
        )

        route_list.append(unique_route_shapes)

network = gpd.GeoDataFrame(pd.concat(route_list))
network.set_crs(4236, inplace=True)
network.to_crs(epsg=3857, inplace=True)
network.head()

In [None]:
if 'route_color' in network.columns:
    network.route_color.fillna('000000', inplace=True)
else:
    network.route_color = '000000'

ax = network.plot(color='#' + network.route_color, figsize=(10, 10), alpha=0.5)
ctx.add_basemap(ax)
plt.show()

The base map using tilesets provided in contextily.providers

In [None]:
list(ctx.providers.keys())

In [None]:
list(ctx.providers.CartoDB.keys())

In [None]:
ax = network.plot(color='#' + network.route_color, figsize=(10, 10), alpha=0.5)
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
plt.show()