In [21]:
# import necessary modules
import pandas as pd
import numpy as np
import math
from datetime import datetime, timedelta

train_in_file = '../dataset/training/trajectories(table 5)_training.csv'
train_out_file = '../refer/training_20min_avg_travel_time.csv'

test_in_file = '../dataset/testing_phase1/trajectories(table 5)_test1.csv'
test_out_file = '../refer/test_20min_avg_travel_time.csv'

def avgTravelTime(in_file, out_file):

    in_file_name = in_file
    out_file_name = out_file

    # Step 1: Load trajectories
    fr = open(in_file_name, 'r')
    fr.readline()  # skip the header
    traj_data = fr.readlines()
    fr.close()
    print(traj_data[0])

    # Step 2: Create a dictionary to store travel time for each route per time
    # window
    travel_times = {}  # key: route_id. Value is also a dictionary of which key is the start time for the time window and value is a list of travel times
    for i in range(len(traj_data)):
        each_traj = traj_data[i].replace('"', '').split(',')
        intersection_id = each_traj[0]
        tollgate_id = each_traj[1]

        route_id = intersection_id + '-' + tollgate_id
        if route_id not in travel_times.keys():
            travel_times[route_id] = {}

        trace_start_time = each_traj[3]
        trace_start_time = datetime.strptime(
            trace_start_time, "%Y-%m-%d %H:%M:%S")
        time_window_minute = int(math.floor(trace_start_time.minute / 20) * 20)
        start_time_window = datetime(trace_start_time.year, trace_start_time.month, trace_start_time.day,
                                     trace_start_time.hour, time_window_minute, 0)
        tt = float(each_traj[-1])  # travel time

        if start_time_window not in travel_times[route_id].keys():
            travel_times[route_id][start_time_window] = [tt]
        else:
            travel_times[route_id][start_time_window].append(tt)

    # Step 3: Calculate average travel time for each route per time window
    fw = open(out_file_name, 'w')
    fw.writelines(','.join(['"intersection_id"', '"tollgate_id"',
                            '"time_window"', '"avg_travel_time"']) + '\n')
    for route in travel_times.keys():
        route_time_windows = list(travel_times[route].keys())
        route_time_windows.sort()
        for time_window_start in route_time_windows:
            time_window_end = time_window_start + timedelta(minutes=20)
            tt_set = travel_times[route][time_window_start]
            avg_tt = round(sum(tt_set) / float(len(tt_set)), 2)
            out_line = ','.join(['"' + route.split('-')[0] + '"', '"' + route.split('-')[1] + '"',
                                 '"[' + str(time_window_start) +
                                 ',' + str(time_window_end) + ')"',
                                 '"' + str(avg_tt) + '"']) + '\n'
            fw.writelines(out_line)
    fw.close()


def train_proc():
    
    avgTravelTime(train_in_file, train_out_file)

def test_proc():
    
    avgTravelTime(test_in_file, test_out_file)
    

In [16]:
train_proc()

"B","3","1065642","2016-07-19 00:14:24","105#2016-07-19 00:14:24#9.56;100#2016-07-19 00:14:34#6.75;111#2016-07-19 00:14:41#13.00;103#2016-07-19 00:14:54#7.47;122#2016-07-19 00:15:02#32.85","70.85"



In [20]:
train_travel_time = pd.read_csv(train_out_file)
train_travel_time = train_travel_time.sort_values(['intersection_id', 'tollgate_id', 'time_window'])
train_travel_time.reset_index()
train_travel_time.head(30)

Unnamed: 0,intersection_id,tollgate_id,time_window,avg_travel_time
13313,A,2,"[2016-07-19 00:20:00,2016-07-19 00:40:00)",58.05
13314,A,2,"[2016-07-19 01:20:00,2016-07-19 01:40:00)",56.87
13315,A,2,"[2016-07-19 01:40:00,2016-07-19 02:00:00)",77.74
13316,A,2,"[2016-07-19 02:20:00,2016-07-19 02:40:00)",42.64
13317,A,2,"[2016-07-19 02:40:00,2016-07-19 03:00:00)",40.17
13318,A,2,"[2016-07-19 03:20:00,2016-07-19 03:40:00)",41.92
13319,A,2,"[2016-07-19 03:40:00,2016-07-19 04:00:00)",39.43
13320,A,2,"[2016-07-19 04:00:00,2016-07-19 04:20:00)",48.13
13321,A,2,"[2016-07-19 04:20:00,2016-07-19 04:40:00)",62.11
13322,A,2,"[2016-07-19 04:40:00,2016-07-19 05:00:00)",46.12


In [22]:
test_proc()

"A","2","1026631","2016-10-18 06:00:14","110#2016-10-18 06:00:14#7.65;123#2016-10-18 06:00:22#4.14;107#2016-10-18 06:00:26#2.39;108#2016-10-18 06:00:29#2.81;120#2016-10-18 06:00:31#0.42;117#2016-10-18 06:00:32#9.54","27.54"



In [23]:
test_travel_time = pd.read_csv(test_out_file)
test_travel_time = test_travel_time.sort_values(['intersection_id', 'tollgate_id', 'time_window'])
test_travel_time.reset_index()
test_travel_time.head(30)

Unnamed: 0,intersection_id,tollgate_id,time_window,avg_travel_time
236,A,2,"[2016-10-18 06:00:00,2016-10-18 06:20:00)",41.1
237,A,2,"[2016-10-18 06:20:00,2016-10-18 06:40:00)",43.68
238,A,2,"[2016-10-18 06:40:00,2016-10-18 07:00:00)",68.02
239,A,2,"[2016-10-18 07:00:00,2016-10-18 07:20:00)",52.61
240,A,2,"[2016-10-18 07:20:00,2016-10-18 07:40:00)",56.17
241,A,2,"[2016-10-18 07:40:00,2016-10-18 08:00:00)",63.6
242,A,2,"[2016-10-18 15:00:00,2016-10-18 15:20:00)",99.96
243,A,2,"[2016-10-18 15:20:00,2016-10-18 15:40:00)",88.36
244,A,2,"[2016-10-18 15:40:00,2016-10-18 16:00:00)",74.89
245,A,2,"[2016-10-18 16:00:00,2016-10-18 16:20:00)",63.3
