# GTFS by routes and by agencies and OSM pt_any summary

In [1]:
import pandas as pd
import geopandas as gpd
import os
import time   
import networkx as nx
import osmnx as ox
import numpy as np
import matplotlib.pyplot as plt

import calendar
import datetime
from datetime import timedelta


import urbanaccess as ua

# module with functions to load GTFS data into dataframes
# revise based on origional load function from UrbanAccess, take out the add agency and route type features
import ua_load 

# module with functions to calculate the average headway during the timeframe over departure
import gtfs_headway_analysis as gha

from shapely.geometry import shape,Point, LineString, Polygon

%matplotlib inline


In [2]:
import warnings
import matplotlib.cbook
#warnings.filterwarnings("ignore",category=matplotlib.cbook.mplDeprecation)


In [3]:
# list of global cities and its projection
cities = [{'cityname': 'adelaide', 'region': 'au', 'crs': 'epsg:7845'},
          {'cityname': 'auckland', 'region': 'nz', 'crs': 'epsg:2193'},
          {'cityname': 'baltimore', 'region': 'us', 'crs': 'epsg:32618'},
          {'cityname': 'bangkok', 'region': 'th', 'crs': 'epsg:32647'},
          {'cityname': 'barcelona', 'region': 'es', 'crs': 'epsg:25831'},
          {'cityname': 'belfast', 'region': 'gb', 'crs': 'epsg:29902'},
          {'cityname': 'bern', 'region': 'ch', 'crs': 'epsg:32633'},
          {'cityname': 'chennai', 'region': 'in', 'crs': 'epsg:32644'},
          {'cityname': 'cologne', 'region': 'de', 'crs': 'epsg:32631'},
          {'cityname': 'ghent', 'region': 'be', 'crs': 'epsg:32631'},
          {'cityname': 'graz', 'region': 'at', 'crs': 'epsg:32633'},
          {'cityname': 'hanoi', 'region': 'vn', 'crs': 'epsg:32648'},
          {'cityname': 'hong_kong', 'region': 'hk', 'crs': 'epsg:32650'},
          {'cityname': 'lisbon', 'region': 'pt', 'crs': 'epsg:3763'},
          {'cityname': 'melbourne', 'region': 'au', 'crs': 'epsg:7845'},
          {'cityname': 'mexico_city', 'region': 'mx', 'crs': 'epsg:32614'},
          {'cityname': 'odense', 'region': 'dk', 'crs': 'epsg:32632'},
          {'cityname': 'olomouc', 'region': 'cz', 'crs': 'epsg:32633'},
          {'cityname': 'phoenix', 'region': 'us', 'crs': 'epsg:32612'},
          {'cityname': 'sao_paulo', 'region': 'br', 'crs': 'epsg:32723'},
          {'cityname': 'seattle', 'region': 'us', 'crs': 'epsg:32610'},
          {'cityname': 'sydney', 'region': 'au', 'crs': 'epsg:7845'},
          {'cityname': 'valencia', 'region': 'es', 'crs': 'epsg:25830'},
          {'cityname': 'vic', 'region': 'es', 'crs': 'epsg:25831'}]

# Summarize OSM pt_any within 500m study region bbox

In [4]:
df_osm_pt = pd.DataFrame()
for i in range(len(cities)):
    # generate dict of study region input datasource parameters
    city = cities[i]['cityname']
    region = cities[i]['region']
    project_year = 2019
    distance = 1600

    geopackagePath = '../data/input/{city}_{region}_{project_year}_{distance}m_buffer.gpkg'.format(
            city=city, region=region, project_year=project_year, distance=distance)
    
    # load shapefile
    shape = gpd.GeoDataFrame.from_file(geopackagePath, 
                  layer='urban_study_region')
    polygon = shape['geometry'].iloc[0]
    # create buffer to consider edge effect
    polygon_buffered = polygon.buffer(500)      
    # get bounding box
    bbox = polygon_buffered.bounds
    
    #load destinations for public transport data
    destinations = gpd.read_file(geopackagePath, layer='destinations', bbox=bbox)
    gdf_pt = destinations[destinations['dest_name']=='pt_any']
    d = {'study_region': ['{}'.format(city)], 'OSM_pt_any_counts': ['{}'.format(len(gdf_pt))]}
    df = pd.DataFrame(data=d)
    
    df_osm_pt = df_osm_pt.append(df, ignore_index=True)


In [5]:
df_osm_pt

Unnamed: 0,study_region,OSM_pt_any_counts
0,adelaide,6765
1,auckland,5363
2,baltimore,3947
3,bangkok,1746
4,barcelona,5216
5,belfast,394
6,bern,1000
7,chennai,912
8,cologne,5365
9,ghent,2043


# Summarize GTFS stops by agency and route types

In [None]:
import gtfs_config

# get study region GTFS frequent stop parameters config
GTFS = gtfs_config.GTFS

df_stop_byagency = pd.DataFrame()
for city in GTFS.keys():
    city_config = GTFS['{}'.format(city)]
    gtfsfeed_path = city_config['gtfs_filename']

    bbox = GTFS['{}'.format(city)]['bbox']


    # load GTFS Feed using UrbacAccess load module
    loaded_feeds = ua.gtfs.load.gtfsfeed_to_df(gtfsfeed_path=gtfsfeed_path, validation=True, bbox=bbox, remove_stops_outsidebbox=True)
    df_stop_byagency_1 = loaded_feeds.stops.groupby(['unique_feed_id','unique_agency_id', 'route_type'])[['stop_id']].count()
    df_stop_byagency_1['study_region'] = city
    df_stop_byagency = df_stop_byagency.append(df_stop_byagency_1)
    
    

In [7]:
# merge with OSM pt_any count
df_stop_byagency_osm = pd.merge(df_stop_byagency.reset_index(), df_osm_pt, left_on='study_region', right_on='study_region', how='outer')
df_stop_byagency_osm.to_csv('gtfs_stops_routetypes_agency.csv')