## GTFS - Direct Edges

In [24]:
import pandas as pd
import datetime
import numpy as np
import pickle

DATA_PATH = '../../../input_data/synthetic_examples/input_data/Test1/'

In [25]:
OUTPUT_PATH = '../../../output_data/validation/two_lines/'

## Load Nodes

In [26]:
# Load nodes
FILES_PREFIX = 'morning'
# FILES_PREFIX = 'single_trip'
file_name = OUTPUT_PATH + FILES_PREFIX + '_nodes.pkl'
print(f'Loading nodes from file {file_name}')
nodes_df = pd.read_pickle(file_name)
nodes_df.head(3)

Loading nodes from file ../../../output_data/validation/two_lines/morning_nodes.pkl


Unnamed: 0,index,trip_id,arrival_time,departure_time_stop,stop_id,stop_sequence,route_id,departure_time_trip_departure,stop_code,stop_lat,stop_lon,arrival,departure,node_id
0,1,19407596_081215,07:06:06,07:06:06,12962,6,2544,06:59:00,21257,32.076767,34.844303,2015-12-14 07:06:06,2015-12-14 07:06:06,0
1,2,19407597_081215,07:15:06,07:15:06,12962,6,2544,07:08:00,21257,32.076767,34.844303,2015-12-14 07:15:06,2015-12-14 07:15:06,1
2,3,19407598_081215,07:24:06,07:24:06,12962,6,2544,07:17:00,21257,32.076767,34.844303,2015-12-14 07:24:06,2015-12-14 07:24:06,2


## Compute Direct Edges

In [27]:
# First we verify that the stops in each trip are consecutive

tmp_df = nodes_df

def verify_consective(l):
    if sorted(list(l)) != list(range(min(l), max(l)+1)):
        print('Found non-consecutive stop sequence')
        raise Exception('NON CONSECUTIVE STOP SEQUENCE')

In [28]:
tmp_df[['trip_id', 'stop_sequence']].groupby('trip_id').apply(lambda x: verify_consective(x.stop_sequence))

In [29]:
del tmp_df

No exception -> all stops are consecutive, we can resume

In [30]:
def create_direct_edges_for_trip(x):
    # TODO: Save node_id instead of index
    for index, node in x.iterrows():
        stop_seq = node['stop_sequence']
        # For the same trip we want to take the next node
        next_node = x[x['stop_sequence'] == stop_seq + 1]
        if next_node.shape[0] == 0:
            # This is the last node of the current trip, no outgoing edge
            continue
        assert next_node.shape[0] == 1

        w = ((next_node['departure'] - node['arrival'])).values[0] / np.timedelta64(1, 's')
        d_edge = (node['node_id'], next_node['node_id'].values[0], w)
        direct_edges.add(d_edge)

<span style="background-color: #F1D9F9">TODO: make this multiprocessed: group by trip_id, and then split the data to batches of groups (trips). For each group we will apply the 'create direct edges' using a pool. We need to make sure that directed_edges is thread/process-safe.</span>

In [31]:
# For each trip (grouping by trip), create edges for this trip's consecutive nodes.
from tqdm.auto import tqdm
tqdm.pandas()

direct_edges = set()
nodes_df[['node_id', 'trip_id', 'stop_sequence', 'arrival', 'departure']].groupby('trip_id').progress_apply(create_direct_edges_for_trip)

HBox(children=(IntProgress(value=0, max=26), HTML(value='')))




In [32]:
len(direct_edges)

582

In [33]:
with open(OUTPUT_PATH + FILES_PREFIX + '_direct_edges.pkl', 'wb') as f:
    pickle.dump(direct_edges, f)