In [1]:
#Python Libs
import sys
import os
import glob
from datetime import datetime
import json
import urllib
import time
import os
import requests
import http.client



#Data Analysis Libs
import pandas as pd
import numpy as np

In [2]:
#Constants
MIN_NUM_ARGS = 4
first_cols = ['cardNum', 'boarding_datetime','gps_datetime','route','busCode','stopPointId']
boarding_key_cols = ['cardNum','boarding_datetime']
gps_key_cols = ['route','busCode','tripNum','stopPointId']
sort_cols = boarding_key_cols + gps_key_cols[:-1] + ['gps_datetime']
max_match_diff = 1800

In [24]:
#Functions
def printUsage():
    print ("Usage: " + sys.argv[0] + " <enhanced-buste-folder-path> <output-folder-path> <otp-server-url> <initial-date> <final-date>")
    
def get_otp_itineraries(otp_url,o_lat,o_lon,d_lat,d_lon,date,time,route,verbose=False):
    otp_http_request = 'routers/cg/plan?fromPlace={},{}&toPlace={},{}&mode=TRANSIT,WALK&date={}&time={}&numItineraries=500&maxWalkingDistance=1000'
    
    otp_request_url = otp_url + otp_http_request.format(o_lat,o_lon,d_lat,d_lon,date.strip(),time,route)
    print(otp_request_url)

    if verbose:
        print (otp_request_url)

    return json.loads(urllib.urlopen(otp_request_url).read())

In [37]:
def get_otp_suggested_trips(od_matrix,otp_url):
    
    req_duration = []
    trips_otp_response = {}
    counter = 0
    for index, row in od_matrix.iterrows():
        id=float(row['stopPointId'])
        date = row['gps_datetime'].strftime('%Y-%m-%d ')
        
        start_time = (row['gps_datetime']-pd.Timedelta('3 h')-pd.Timedelta('2 min')).strftime('%H:%M:%S')
        
        req_start_time = time.time()
        #UFCG -7.217167, -35.908995
        #print(row['gpsLat'])
        #print(row['gpsLon'])
        trip_plan = get_otp_itineraries(otp_url,row['shapeLat'], row['shapeLon'], row['gpsLat'], row['gpsLon'], date,start_time, row['route'])
        #print(trip_plan)
        req_end_time = time.time()
        req_time = req_end_time - req_start_time
        req_duration.append((id,req_time))
        print("OTP request took ", req_end_time - req_start_time,"seconds.")
        trips_otp_response[id] = trip_plan
        counter+=1

        req_dur_df = pd.DataFrame().from_records(req_duration,columns=['id','duration'])
    print (req_dur_df.duration.describe())	

    return trips_otp_response

In [26]:
def extract_otp_trips_legs(otp_trips):
    trips_legs = []

    for trip in otp_trips.keys():
        if 'plan' in otp_trips[trip]:
            itinerary_id = 1
            for itinerary in otp_trips[trip]['plan']['itineraries']:
                date = otp_trips[trip]['plan']['date']/1000
                leg_id = 1
                for leg in itinerary['legs']:
                    route = leg['route'] if leg['route'] != '' else None
                    fromStopId = leg['from']['stopId'].split(':')[1] if leg['mode'] == 'BUS' else None
                    toStopId = leg['to']['stopId'].split(':')[1] if leg['mode'] == 'BUS' else None
                    start_time = long(leg['startTime'])/1000
                    end_time = long(leg['endTime'])/1000
                    duration = (end_time - start_time)/60
                    trips_legs.append((date,trip,itinerary_id,leg_id,start_time,end_time,leg['mode'],route,fromStopId,toStopId, duration))
                    
                    leg_id += 1
                itinerary_id += 1
    return trips_legs

In [27]:
def prepare_otp_legs_df(otp_legs_list):
    labels=['date','user_trip_id','itinerary_id','leg_id','otp_start_time','otp_end_time','mode','route','from_stop_id','to_stop_id','otp_duration_mins']
    return pd.DataFrame.from_records(data=otp_legs_list, columns=labels) \
                    .assign(date = lambda x: pd.to_datetime(x['date'],unit='s').dt.strftime('%Y-%m-%d'),
                            otp_duration_mins = lambda x : (x['otp_end_time'] - x['otp_start_time'])/60,
                            route = lambda x : (x['route']),
                            from_stop_id = lambda x : pd.to_numeric(x['from_stop_id'],errors='coerce'),
                            to_stop_id = lambda x : pd.to_numeric(x['to_stop_id'],errors='coerce')) \
                    .assign(otp_start_time = lambda x : pd.to_datetime(x['otp_start_time'], unit='s'),
                            otp_end_time = lambda x : pd.to_datetime(x['otp_end_time'], unit='s')) \
                    .sort_values(by=['date','user_trip_id','itinerary_id','otp_start_time'])

In [39]:
#Teste para verificar a montagem de itinerários para todos ônibus da cidade
user_trips_file = os.getcwd() + "/data/input/2019_02_01_bus_trips.csv"
output_folder_path = os.getcwd() + "/data/output/" 
otp_server_url = "http://localhost:5601/otp/"

print ("Processing file", user_trips_file)
file_name = user_trips_file.split('/')[-1].replace('.csv','')
file_date = pd.to_datetime(file_name.split('_bus_trips')[0],format='%Y_%m_%d')
if (file_date.dayofweek == 6):
    print ("File date is sunday. File will not be processed.")
else:
    try:
        user_trips = pd.read_csv(user_trips_file, low_memory=False)
        # Filtering just trips starting from Hector's home (bus stop)
        user_trips = user_trips.loc[(user_trips['gps_datetime'] != '-')] 
        user_trips['gps_datetime'] = pd.to_datetime(user_trips['gps_datetime'], format='%d-%m-%Y %H:%M:%S')
        otp_suggestions = get_otp_suggested_trips(user_trips,otp_server_url)
        otp_legs_df = prepare_otp_legs_df(extract_otp_trips_legs(otp_suggestions))
        otp_legs_df.drop_duplicates(subset=['date','user_trip_id','leg_id','otp_end_time','mode', 'route','otp_duration_mins', 'from_stop_id', 'to_stop_id'], inplace=True)

        
        otp_legs_df.to_csv(output_folder_path + '/' + file_name + '_otp_itineraries.csv',index=False)
    except Exception as e:
        print (e)
        print ("Error in processing file " + file_name)

('Processing file', '/home/hector/TransferTimeAnalysisCG/workspace/python/people-paths/trips-destination-inference/data/input/2019_02_01_bus_trips.csv')
-7.242401
-35.92365
http://localhost:5601/otp/routers/cg/plan?fromPlace=-7.24269,-35.92352&toPlace=-7.242401,-35.92365&mode=TRANSIT,WALK&date=2019-02-01&time=02:16:58&numItineraries=500&maxWalkingDistance=1000
{u'elevationMetadata': {u'geoidElevation': False, u'ellipsoidToGeoidDifference': -5.0487112101801}, u'plan': {u'date': 1548998218000, u'to': {u'lat': -7.242401, u'vertexType': u'NORMAL', u'lon': -35.92365, u'name': u'Destination', u'orig': u''}, u'itineraries': [{u'walkTime': 28, u'legs': [{u'distance': 35.175000000000004, u'from': {u'vertexType': u'NORMAL', u'name': u'Origin', u'lon': -35.92352, u'departure': 1548998218000, u'lat': -7.24269, u'orig': u''}, u'interlineWithPreviousLeg': False, u'transitLeg': False, u'realTime': False, u'route': u'', u'departureDelay': 0, u'agencyTimeZoneOffset': -10800000, u'to': {u'arrival': 1548

{u'elevationMetadata': {u'geoidElevation': False, u'ellipsoidToGeoidDifference': -5.0487112101801}, u'plan': {u'date': 1548998463000, u'to': {u'lat': -7.248925, u'vertexType': u'NORMAL', u'lon': -35.92824, u'name': u'Destination', u'orig': u''}, u'itineraries': [{u'walkTime': 31, u'legs': [{u'distance': 40.199, u'from': {u'vertexType': u'NORMAL', u'name': u'Origin', u'lon': -35.92835, u'departure': 1548998463000, u'lat': -7.24858, u'orig': u''}, u'interlineWithPreviousLeg': False, u'transitLeg': False, u'realTime': False, u'route': u'', u'departureDelay': 0, u'agencyTimeZoneOffset': -10800000, u'to': {u'arrival': 1548998494000, u'vertexType': u'NORMAL', u'name': u'Destination', u'lon': -35.92824, u'lat': -7.248925, u'orig': u''}, u'rentedBike': False, u'arrivalDelay': 0, u'mode': u'WALK', u'startTime': 1548998463000, u'duration': 31.0, u'steps': [{u'distance': 40.199, u'relativeDirection': u'DEPART', u'elevation': [], u'area': False, u'lon': -35.92839609621853, u'stayOn': False, u'abso

{u'elevationMetadata': {u'geoidElevation': False, u'ellipsoidToGeoidDifference': -5.0487112101801}, u'plan': {u'date': 1548998708000, u'to': {u'lat': -7.246605, u'vertexType': u'NORMAL', u'lon': -35.91892, u'name': u'Destination', u'orig': u''}, u'itineraries': [{u'walkTime': 3, u'legs': [{u'distance': 1.9829999999999999, u'from': {u'vertexType': u'NORMAL', u'name': u'Origin', u'lon': -35.91894, u'departure': 1548998708000, u'lat': -7.24657, u'orig': u''}, u'interlineWithPreviousLeg': False, u'transitLeg': False, u'realTime': False, u'route': u'', u'departureDelay': 0, u'agencyTimeZoneOffset': -10800000, u'to': {u'arrival': 1548998711000, u'vertexType': u'NORMAL', u'name': u'Destination', u'lon': -35.91892, u'lat': -7.246605, u'orig': u''}, u'rentedBike': False, u'arrivalDelay': 0, u'mode': u'WALK', u'startTime': 1548998708000, u'duration': 3.0, u'steps': [{u'distance': 1.9829999999999999, u'relativeDirection': u'DEPART', u'elevation': [], u'area': False, u'lon': -35.91894235975336, u'

{u'elevationMetadata': {u'geoidElevation': False, u'ellipsoidToGeoidDifference': -5.0487112101801}, u'plan': {u'date': 1548998862000, u'to': {u'lat': -7.245583, u'vertexType': u'NORMAL', u'lon': -35.91127, u'name': u'Destination', u'orig': u''}, u'itineraries': [{u'walkTime': 31, u'legs': [{u'distance': 38.650999999999996, u'from': {u'vertexType': u'NORMAL', u'name': u'Origin', u'lon': -35.91132, u'departure': 1548998862000, u'lat': -7.24593, u'orig': u''}, u'interlineWithPreviousLeg': False, u'transitLeg': False, u'realTime': False, u'route': u'', u'departureDelay': 0, u'agencyTimeZoneOffset': -10800000, u'to': {u'arrival': 1548998893000, u'vertexType': u'NORMAL', u'name': u'Destination', u'lon': -35.91127, u'lat': -7.245583, u'orig': u''}, u'rentedBike': False, u'arrivalDelay': 0, u'mode': u'WALK', u'startTime': 1548998862000, u'duration': 31.0, u'steps': [{u'distance': 38.650999999999996, u'relativeDirection': u'DEPART', u'elevation': [], u'area': False, u'lon': -35.91135945893446, 

{u'elevationMetadata': {u'geoidElevation': False, u'ellipsoidToGeoidDifference': -5.0487112101801}, u'plan': {u'date': 1548999015000, u'to': {u'lat': -7.236235, u'vertexType': u'NORMAL', u'lon': -35.91035, u'name': u'Destination', u'orig': u''}, u'itineraries': [{u'walkTime': 80, u'legs': [{u'distance': 104.737, u'from': {u'vertexType': u'NORMAL', u'name': u'Origin', u'lon': -35.91047, u'departure': 1548999015000, u'lat': -7.23717, u'orig': u''}, u'interlineWithPreviousLeg': False, u'transitLeg': False, u'realTime': False, u'route': u'', u'departureDelay': 0, u'agencyTimeZoneOffset': -10800000, u'to': {u'arrival': 1548999095000, u'vertexType': u'NORMAL', u'name': u'Destination', u'lon': -35.91035, u'lat': -7.236235, u'orig': u''}, u'rentedBike': False, u'arrivalDelay': 0, u'mode': u'WALK', u'startTime': 1548999015000, u'duration': 80.0, u'steps': [{u'distance': 104.737, u'relativeDirection': u'DEPART', u'elevation': [], u'area': False, u'lon': -35.91046873899008, u'stayOn': False, u'ab

KeyboardInterrupt: 

In [17]:
#user_trips.dtypes
user_trips = gps_trips.loc[(user_trips['gps_datetime'] != '-')]
user_trips['gps_datetime'] = pd.to_datetime(user_trips['gps_datetime'], format='%d-%m-%Y %H:%M:%S')
user_trips.head()

Unnamed: 0,route,tripNum,shapeId,routeFrequency,shapeSequence,shapeLat,shapeLon,distanceTraveledShape,busCode,gpsPointId,gpsLat,gpsLon,distanceToShapePoint,gps_datetime,stopPointId,streetName,problem
16933,944,1,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,-,-,-,-,2019-02-01 07:52:12,491551,-,BETWEEN
17061,944,2,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,54143,-7.286596,-35.89565,2.3046215,2019-02-01 10:23:59,491551,-,NO_PROBLEM
17110,944,3,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,-,-,-,-,2019-02-01 11:06:17,491551,-,BETWEEN
17143,944,4,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,66914,-7.286622,-35.89559,9.514184,2019-02-01 11:31:50,491551,-,NO_PROBLEM
17220,944,5,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,-,-,-,-,2019-02-01 13:24:14,491551,-,BETWEEN


In [12]:
user_trips_file = os.getcwd() + "/data/input/2019_02_01_bus_trips.csv"
output_folder_path = os.getcwd() + "/data/output/" 
otp_server_url = "http://localhost:5601/otp/"

print ("Processing file", user_trips_file)
file_name = user_trips_file.split('/')[-1].replace('.csv','')
file_date = pd.to_datetime(file_name.split('_bus_trips')[0],format='%Y_%m_%d')
if (file_date.dayofweek == 6):
    print ("File date is sunday. File will not be processed.")
else:
    try:
        user_trips = pd.read_csv(user_trips_file, low_memory=False)
        # Filtering just trips starting from Hector's home (bus stop)
        gps_trips = user_trips.loc[(user_trips['stopPointId'] == 491551)]
        gps_trips = gps_trips.loc[(gps_trips['gps_datetime'] != '-')] 
        gps_trips['gps_datetime'] = pd.to_datetime(gps_trips['gps_datetime'], format='%d-%m-%Y %H:%M:%S')
        #print(gps_trips.head())
        otp_suggestions = get_otp_suggested_trips(gps_trips,otp_server_url)
        otp_legs_df = prepare_otp_legs_df(extract_otp_trips_legs(otp_suggestions))
        otp_legs_df.drop_duplicates(subset=['date','user_trip_id','leg_id','otp_end_time','mode', 'route','otp_duration_mins', 'from_stop_id', 'to_stop_id'], inplace=True)

        
        otp_legs_df.to_csv(output_folder_path + '/' + file_name + '_otp_itineraries.csv',index=False)
    except Exception as e:
        print (e)
        print ("Error in processing file " + file_name)


('Processing file', '/home/hector/TransferTimeAnalysisCG/workspace/python/people-paths/trips-destination-inference/data/input/2019_02_01_bus_trips.csv')
http://localhost:5601/otp/routers/cg/plan?fromPlace=-7.28659,-35.89567&toPlace=-7.217167,-35.908995&mode=TRANSIT,WALK&date=2019-02-01&time=04:50:12&numItineraries=500&maxWalkingDistance=1000
('OTP request took ', 0.4292581081390381, 'seconds.')
http://localhost:5601/otp/routers/cg/plan?fromPlace=-7.28659,-35.89567&toPlace=-7.217167,-35.908995&mode=TRANSIT,WALK&date=2019-02-01&time=07:21:59&numItineraries=500&maxWalkingDistance=1000
('OTP request took ', 0.0747530460357666, 'seconds.')
http://localhost:5601/otp/routers/cg/plan?fromPlace=-7.28659,-35.89567&toPlace=-7.217167,-35.908995&mode=TRANSIT,WALK&date=2019-02-01&time=08:04:17&numItineraries=500&maxWalkingDistance=1000
('OTP request took ', 0.10241103172302246, 'seconds.')
http://localhost:5601/otp/routers/cg/plan?fromPlace=-7.28659,-35.89567&toPlace=-7.217167,-35.908995&mode=TRANSI

('OTP request took ', 0.08926892280578613, 'seconds.')
http://localhost:5601/otp/routers/cg/plan?fromPlace=-7.28659,-35.89567&toPlace=-7.217167,-35.908995&mode=TRANSIT,WALK&date=2019-02-01&time=16:38:28&numItineraries=500&maxWalkingDistance=1000
('OTP request took ', 0.0867011547088623, 'seconds.')
http://localhost:5601/otp/routers/cg/plan?fromPlace=-7.28659,-35.89567&toPlace=-7.217167,-35.908995&mode=TRANSIT,WALK&date=2019-02-01&time=17:42:34&numItineraries=500&maxWalkingDistance=1000
('OTP request took ', 0.09114694595336914, 'seconds.')
count    35.000000
mean      0.111887
std       0.060820
min       0.074753
25%       0.085949
50%       0.095134
75%       0.112807
max       0.429258
Name: duration, dtype: float64
