In [3]:
#Python Libs
import sys
import os
import glob
from datetime import datetime
import json
import urllib
import time
import os
import requests
import http.client



#Data Analysis Libs
import pandas as pd
import numpy as np

In [4]:
#Constants
MIN_NUM_ARGS = 4
first_cols = ['cardNum', 'boarding_datetime','gps_datetime','route','busCode','stopPointId']
boarding_key_cols = ['cardNum','boarding_datetime']
gps_key_cols = ['route','busCode','tripNum','stopPointId']
sort_cols = boarding_key_cols + gps_key_cols[:-1] + ['gps_datetime']
max_match_diff = 1800

In [5]:
#Functions
def printUsage():
    print ("Usage: " + sys.argv[0] + " <enhanced-buste-folder-path> <output-folder-path> <otp-server-url> <initial-date> <final-date>")
    
def get_otp_itineraries(otp_url,o_lat,o_lon,d_lat,d_lon,date,time,route,verbose=False):
    otp_http_request = 'routers/cg/plan?fromPlace={},{}&toPlace={},{}&mode=TRANSIT,WALK&date={}&time={}&numItineraries=500&maxWalkingDistance=1000'
    
    otp_request_url = otp_url + otp_http_request.format(o_lat,o_lon,d_lat,d_lon,date.strip(),time,route)
    print(otp_request_url)

    if verbose:
        print (otp_request_url)

    return json.loads(urllib.urlopen(otp_request_url).read())

In [6]:
def get_otp_suggested_trips(od_matrix,otp_url):
    
    req_duration = []
    trips_otp_response = {}
    counter = 0
    for index, row in od_matrix.iterrows():
        id=float(row['stopPointId'])
        date = row['gps_datetime'].strftime('%Y-%m-%d ')
        
        start_time = (row['gps_datetime']-pd.Timedelta('3 h')-pd.Timedelta('2 min')).strftime('%H:%M:%S')
        
        req_start_time = time.time()
        #UFCG -7.217167, -35.908995
        #print(row['gpsLat'])
        #print(row['gpsLon'])
        trip_plan = get_otp_itineraries(otp_url,row['shapeLat'], row['shapeLon'], '-7.217167', '-35.908995', date,start_time, row['route'])
        #print(trip_plan)
        req_end_time = time.time()
        req_time = req_end_time - req_start_time
        req_duration.append((id,req_time))
        print("OTP request took ", req_end_time - req_start_time,"seconds.")
        trips_otp_response[id] = trip_plan
        counter+=1

        req_dur_df = pd.DataFrame().from_records(req_duration,columns=['id','duration'])
    print (req_dur_df.duration.describe())	

    return trips_otp_response

In [7]:
def extract_otp_trips_legs(otp_trips):
    trips_legs = []

    for trip in otp_trips.keys():
        if 'plan' in otp_trips[trip]:
            itinerary_id = 1
            for itinerary in otp_trips[trip]['plan']['itineraries']:
                date = otp_trips[trip]['plan']['date']/1000
                leg_id = 1
                for leg in itinerary['legs']:
                    route = leg['route'] if leg['route'] != '' else None
                    fromStopId = leg['from']['stopId'].split(':')[1] if leg['mode'] == 'BUS' else None
                    toStopId = leg['to']['stopId'].split(':')[1] if leg['mode'] == 'BUS' else None
                    start_time = long(leg['startTime'])/1000
                    end_time = long(leg['endTime'])/1000
                    duration = (end_time - start_time)/60
                    trips_legs.append((date,trip,itinerary_id,leg_id,start_time,end_time,leg['mode'],route,fromStopId,toStopId, duration))
                    
                    leg_id += 1
                itinerary_id += 1
    return trips_legs

In [8]:
def prepare_otp_legs_df(otp_legs_list):
    labels=['date','user_trip_id','itinerary_id','leg_id','otp_start_time','otp_end_time','mode','route','from_stop_id','to_stop_id','otp_duration_mins']
    return pd.DataFrame.from_records(data=otp_legs_list, columns=labels) \
                    .assign(date = lambda x: pd.to_datetime(x['date'],unit='s').dt.strftime('%Y-%m-%d'),
                            otp_duration_mins = lambda x : (x['otp_end_time'] - x['otp_start_time'])/60,
                            route = lambda x : (x['route']),
                            from_stop_id = lambda x : pd.to_numeric(x['from_stop_id'],errors='coerce'),
                            to_stop_id = lambda x : pd.to_numeric(x['to_stop_id'],errors='coerce')) \
                    .assign(otp_start_time = lambda x : pd.to_datetime(x['otp_start_time'], unit='s'),
                            otp_end_time = lambda x : pd.to_datetime(x['otp_end_time'], unit='s')) \
                    .sort_values(by=['date','user_trip_id','itinerary_id','otp_start_time'])

In [1]:
#Teste para verificar a montagem de itinerários para todos ônibus da cidade
user_trips_file = os.getcwd() + "/data/input/2019_02_01_bus_trips.csv"
output_folder_path = os.getcwd() + "/data/output/" 
otp_server_url = "http://localhost:5601/otp/"

print ("Processing file", user_trips_file)
file_name = user_trips_file.split('/')[-1].replace('.csv','')
file_date = pd.to_datetime(file_name.split('_bus_trips')[0],format='%Y_%m_%d')
if (file_date.dayofweek == 6):
    print ("File date is sunday. File will not be processed.")
else:
    try:
        user_trips = pd.read_csv(user_trips_file, low_memory=False)
        # Filtering just trips starting from Hector's home (bus stop)
        user_trips = user_trips.loc[(user_trips['gps_datetime'] != '-')]
        #user_trips['gps_datetime'] = pd.to_datetime(user_trips['gps_datetime'], format='%d-%m-%Y %H:%M:%S')
        #gps_trips = user_trips.loc[(user_trips['stopPointId'] == 491551)]
        gps_trips = gps_trips.loc[(gps_trips['gps_datetime'] != '-')] 
        gps_trips['gps_datetime'] = pd.to_datetime(gps_trips['gps_datetime'], format='%d-%m-%Y %H:%M:%S')
        #print(gps_trips.head())
        otp_suggestions = get_otp_suggested_trips(gps_trips,otp_server_url)
        otp_legs_df = prepare_otp_legs_df(extract_otp_trips_legs(otp_suggestions))
        otp_legs_df.drop_duplicates(subset=['date','user_trip_id','leg_id','otp_end_time','mode', 'route','otp_duration_mins', 'from_stop_id', 'to_stop_id'], inplace=True)

        
        
        otp_legs_df.to_csv(output_folder_path + '/' + file_name + '_otp_itineraries_pelo_note.csv',index=False)
    except Exception as e:
        print (e)
        print ("Error in processing file " + file_name)

NameError: name 'os' is not defined

In [17]:
user_trips

Unnamed: 0,route,tripNum,shapeId,routeFrequency,shapeSequence,shapeLat,shapeLon,distanceTraveledShape,busCode,gpsPointId,gpsLat,gpsLon,distanceToShapePoint,gps_datetime,stopPointId,streetName,problem
0,220,1,73475,high_frequency,8,-7.24269,-35.92352,503.0,1076,512,-7.242401,-35.92365,35.18971,01-02-2019 05:18:58,386396,-,NO_PROBLEM
1,220,1,73475,high_frequency,10,-7.24447,-35.92281,716.0,1076,-,-,-,-,01-02-2019 05:19:45,491283,-,BETWEEN
2,220,1,73475,high_frequency,14,-7.24517,-35.92448,916.0,1076,-,-,-,-,01-02-2019 05:20:04,386402,-,BETWEEN
3,220,1,73475,high_frequency,20,-7.24556,-35.92619,1110.0,1076,-,-,-,-,01-02-2019 05:20:23,497799,-,BETWEEN
4,220,1,73475,high_frequency,22,-7.24540,-35.92808,1320.0,1076,-,-,-,-,01-02-2019 05:20:55,497800,-,BETWEEN
5,220,1,73475,high_frequency,26,-7.24540,-35.92955,1497.0,1076,588,-7.245263,-35.92942,20.921257,01-02-2019 05:21:31,497801,-,NO_PROBLEM
6,220,1,73475,high_frequency,27,-7.24575,-35.93060,1619.0,1076,-,-,-,-,01-02-2019 05:21:37,501705,-,BETWEEN
7,220,1,73475,high_frequency,28,-7.24634,-35.93201,1788.0,1076,-,-,-,-,01-02-2019 05:21:46,501706,-,BETWEEN
8,220,1,73475,high_frequency,38,-7.24679,-35.93057,2132.0,1076,-,-,-,-,01-02-2019 05:22:05,501707,-,BETWEEN
9,220,1,73475,high_frequency,40,-7.24572,-35.93042,2290.0,1076,-,-,-,-,01-02-2019 05:22:14,501708,-,BETWEEN


In [17]:
#user_trips.dtypes
user_trips = gps_trips.loc[(user_trips['gps_datetime'] != '-')]
user_trips['gps_datetime'] = pd.to_datetime(user_trips['gps_datetime'], format='%d-%m-%Y %H:%M:%S')
user_trips.head()

Unnamed: 0,route,tripNum,shapeId,routeFrequency,shapeSequence,shapeLat,shapeLon,distanceTraveledShape,busCode,gpsPointId,gpsLat,gpsLon,distanceToShapePoint,gps_datetime,stopPointId,streetName,problem
16933,944,1,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,-,-,-,-,2019-02-01 07:52:12,491551,-,BETWEEN
17061,944,2,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,54143,-7.286596,-35.89565,2.3046215,2019-02-01 10:23:59,491551,-,NO_PROBLEM
17110,944,3,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,-,-,-,-,2019-02-01 11:06:17,491551,-,BETWEEN
17143,944,4,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,66914,-7.286622,-35.89559,9.514184,2019-02-01 11:31:50,491551,-,NO_PROBLEM
17220,944,5,72976,high_frequency,40,-7.28659,-35.89567,1398.0,1051,-,-,-,-,2019-02-01 13:24:14,491551,-,BETWEEN


In [11]:
user_trips_file = os.getcwd() + "/data/input/2019_02_10_bus_trips.csv"
output_folder_path = os.getcwd() + "/data/output/" 
otp_server_url = "http://localhost:5601/otp/"

print ("Processing file", user_trips_file)
file_name = user_trips_file.split('/')[-1].replace('.csv','')
file_date = pd.to_datetime(file_name.split('_bus_trips')[0],format='%Y_%m_%d')
if (file_date.dayofweek == 6):
    print ("File date is sunday. File will not be processed.")
else:
    try:
        user_trips = pd.read_csv(user_trips_file, low_memory=False)
        # Filtering just trips starting from Hector's home (bus stop)
        #gps_trips = user_trips.loc[(user_trips['stopPointId'] == 491551)]
        gps_trips = gps_trips.loc[(gps_trips['gps_datetime'] != '-')] 
        gps_trips['gps_datetime'] = pd.to_datetime(gps_trips['gps_datetime'], format='%d-%m-%Y %H:%M:%S')
        #print(gps_trips.head())
        otp_suggestions = get_otp_suggested_trips(gps_trips,otp_server_url)
        otp_legs_df = prepare_otp_legs_df(extract_otp_trips_legs(otp_suggestions))
        otp_legs_df.drop_duplicates(subset=['date','user_trip_id','leg_id','otp_end_time','mode', 'route','otp_duration_mins', 'from_stop_id', 'to_stop_id'], inplace=True)

        
        otp_legs_df.to_csv(output_folder_path + '/' + file_name + '_otp_itineraries.csv',index=False)
    except Exception as e:
        print (e)
        print ("Error in processing file " + file_name)


('Processing file', '/home/hector/TransferTimeAnalysisCG/workspace/python/people-paths/trips-destination-inference/data/input/2019_02_10_bus_trips.csv')
File date is sunday. File will not be processed.


('Processing file', '/home/hector/TransferTimeAnalysisCG/workspace/python/people-paths/trips-destination-inference/data/input/2019_02_10_bus_trips.csv')


NameError: name 'traceback' is not defined