### Create Panda dataframes of the GTFS data.

In [None]:
import zipfile

import pandas as pd

zip_obj = zipfile.ZipFile('google_transit.zip')

dtype = {c: str for c in {'agency_id', 'service_id', 'fare_id', 'route_id',
                          'trip_id', 'shape_id', 'stop_id', 'parent_station',
                          'from_stop_id', 'to_stop_id'}}

gtfs = {}
for filename in zip_obj.namelist():
    filelabel = filename.replace('.txt', '')
    gtfs[filelabel] = pd.read_csv(zip_obj.open(filename),
                                  encoding='utf-8-sig', dtype=dtype)
    
for n, df in gtfs.items():
    print(n)
    print(df.info())
    print('----')

### Transform the stops longitude and latitude in python objects we can use.

In [None]:
import geopandas as gpd
from shapely.geometry import Point

gtfs['stops'] = gpd.GeoDataFrame(
    gtfs['stops'].assign(
        stop_name=gtfs['stops']['stop_name']\
        .map(lambda x: x[9:].strip() if x.lower().startswith('karlsruhe') else x),
        geometry=gtfs['stops'].apply(
            lambda row: Point(row['stop_lon'], row['stop_lat']), axis=1)
    ).drop(['stop_lat', 'stop_lon'], axis=1)
)
gtfs['stops'].head()

### Transform the departure and arrival times in values we can use

GTFS knows about times over 24:00. To simplify our problem let's just use integers for the times.

In [None]:
def time_to_int(time):
    return int(time.replace(':', '')[:4])

gtfs['stop_times'] = gtfs['stop_times'].assign(
    arrival_time_int=gtfs['stop_times']['arrival_time'].map(time_to_int),
    departure_time_int=gtfs['stop_times']['departure_time'].map(time_to_int)
)

gtfs['stop_times'].head()

### Create the projections

In [None]:
import pyproj

base_proj = pyproj.Proj(init='EPSG:4326')  # WGS84
calc_proj = pyproj.Proj(init='EPSG:3035')  # ETRS89

### Reproject stops to ETRS89

In [None]:
gtfs['stops'].crs = base_proj.srs
gtfs['stops'] = gtfs['stops'].to_crs(calc_proj.srs)

In [None]:
from functools import partial

from shapely.ops import transform

def transform_srs(geometry, *, from_proj=base_proj, to_proj=calc_proj):
    if from_proj == to_proj:
        return geometry
    project = partial(pyproj.transform, from_proj, to_proj)
    geometry = transform(project, geometry)
    return geometry

### Let's center the map

In [None]:
here_wgs84 = Point(8.383590, 49.001763)

M.set_center(here_wgs84.x, here_wgs84.y, 13)

### From shapely geometries to points on the map.

In [None]:
def geometry_to_map(geometry, *, from_proj=calc_proj, **kwargs):
    geometry = transform_srs(geometry, to_proj=base_proj,
                             from_proj=from_proj)
    geo_interface = geometry.__geo_interface__
    type_ = geo_interface['type'].lower()
    coords = geo_interface['coordinates']
    if type_ == 'polygon':
        if len(coords) > 1:
            kwargs.setdefault('hole', coords[2:])
        coords = coords[0]
    elif type_ != 'point':
        raise ValueError('Can only show Points and Polygons on map.')
    M.add_annotation(type_, coords, kwargs)

### Stops on the map

In [None]:
stops = gtfs['stops']

stop_types = {'all', 'parents', 'children'}

def stops_to_map(stops, *, types='parents'):
    if types not in stop_types:
        raise ValueError(
            '"{}" must be one of "{}"'.format(types,
                                              ', '.join(stop_types))
        )
    if types == 'parents':
        stops = stops.dropna(subset=['location_type'])
    elif types == 'children':
        stops = stops.dropna(subset=['parent_station'])
    for _, stop in stops.iterrows():
        geometry_to_map(stop['geometry'], name=stop['stop_name'])
        
stops_to_map(stops.tail(300))

### Us on the map

In [None]:
M.layers.annotation.clear_annotations()

M.set_center(here_wgs84.x, here_wgs84.y, 16)

here = transform_srs(here_wgs84)

geometry_to_map(here, name='Here')

### Which stations are walkable (max 5 minutes walking)

In [None]:
walking_speed = 1.39  # m/s = 5 km/h
walking_time = 5 * 60  # 5 minutes
walking_distance = here.buffer(walking_speed * walking_time)
geometry_to_map(walking_distance, from_proj=calc_proj, name='wlk_dist')

walkable_stops = stops[stops.intersects(walking_distance)]
stops_to_map(walkable_stops)

### Stop times

We assume it will always take us 5 minutes to get to the station and we will wait a max of 5 minutes

In [None]:
stop_times = gtfs['stop_times'].merge(walkable_stops, on='stop_id')
our_stop_times = stop_times[
    (stop_times['departure_time_int'] <= 1810) &
    (stop_times['departure_time_int'] >= 1805)
].rename(columns={'stop_sequence': 'current_sequence'})
print(len(our_stop_times.index))
our_stop_times.head()

### Only trips on Thursday

In [None]:
all_trips = our_stop_times.merge(gtfs['trips'], on='trip_id')
calendar = gtfs['calendar']
thursday_service = calendar[calendar['thursday'] == 1]
trips = all_trips.merge(thursday_service, on='service_id')
print(len(trips.index))
trips.head()

### Which stops connect to the found stops

We want to travel max 30 minutes.

In [None]:
other_stops = gtfs['stop_times'].merge(trips[['current_sequence', 'trip_id']], on='trip_id')
other_stops = other_stops[
    (other_stops['stop_sequence'] > other_stops['current_sequence']) &
    (other_stops['arrival_time_int'] <= 1830)
]

other_stops = stops.merge(
    other_stops[['stop_id', 'arrival_time_int', 'departure_time_int']],
    on='stop_id'
).sort_values(
    ['arrival_time_int']
).drop_duplicates(subset=['stop_id'])
other_stops = other_stops[~other_stops['stop_id'].isin(
    walkable_stops['stop_id']
)]

def get_parents(stops):
    return gtfs['stops'].dropna(
        subset=['location_type']
    ).merge(
        stops.drop_duplicates(subset='parent_station')
        .rename(columns={'parent_station': 'stop_id'}),
        on='stop_id'
    )

parent_stops = get_parents(other_stops[['parent_station', 'arrival_time_int']])
M.set_center(here_wgs84.x, here_wgs84.y, 13)
stops_to_map(parent_stops)

### To where can we walk

Assuming we will walk max 5 minutes or till the orignal 30 minutes have passed.

In [None]:
for _, stop in parent_stops.iterrows():
    wk_time = min(walking_time, (1830 - stop['arrival_time_int']) * 60)
    if not wk_time:
        continue
    walking_distance = stop['geometry'].buffer(walking_speed * wk_time)
    geometry_to_map(walking_distance, from_proj=calc_proj, name=stop['stop_name'])