In [1]:
import pandas as pd
import numpy as np
import csv
import folium
import matplotlib.pyplot as plt
import time
from datetime import timedelta


# Load Data
converting the csv file to pandas dataframe and set the coordinates of the lanes in dataframe

In [2]:
# read the trajectory data as pandas dataframe
data_path = 'data/competition_dataset.csv'
with open(data_path) as f:
    content = f.readlines()
colnames = content[0].rstrip().replace(';','').split()
df_list = []
for i, row in enumerate(content[1:]):
    content_i = row.strip().split(';')#.replace(';','').split()
    array = np.asanyarray(''.join(content_i[4:]).split()).astype('float').reshape((-1,6))
    df_i = pd.DataFrame(array,columns=colnames[4:])
    df_i[colnames[0]] = int(content_i[0].strip())
    df_i[colnames[1]] = content_i[1].strip()
    df_i[colnames[2]] = float(content_i[2].strip())
    df_i[colnames[3]] = float(content_i[3].strip())
    df_list.append(df_i)
df = pd.concat(df_list,ignore_index=True)
df = df[colnames]

## set the coordinates of the each lanes
set the coordinates of the lanes and calculate the vector and norm for each lanes based on the GPS coordinate.
those vectors and norms will be used for later calculation of the distance from vehicle to lanes.

In [3]:
# prepare input GPS information for lanes in three street

# prepare the lane GPS data for street 28is  oktovriou above
top_lane_list = [[37.991877, 23.731371, 37.992197, 23.731406],
             [37.991884, 23.731324, 37.992197, 23.731406],
             [37.991892, 23.731277, 37.992205, 23.731360],
             [37.991899, 23.731236, 37.992212, 23.731313],
             [37.992197, 23.731406, 37.993157, 23.731645],
             [37.992205, 23.731360, 37.993160, 23.731606],
             [37.992212, 23.731313, 37.993163, 23.731566]
            ]
df_lane_top = pd.DataFrame.from_records(top_lane_list, columns=['start lat', 'start lon', 'end lat', 'end lon'],
                                    index=['far left 0', 'left 0', 'mid 0','right 0','left 1','mid 1','right 1'])
top_lanes_conncetion=[['far left 0', 'left 1'],
                      ['left 0', 'left 1']]
# build the norm and vector for the lane for the later calculation
df_lane_top['norm']= df_lane_top.apply(lambda x : np.linalg.norm(
    np.array([x['start lat'], x['start lon']]) - np.array([x['end lat'], x['end lon']])), axis=1)
df_lane_top['vec lat']= df_lane_top.apply(lambda x : x['start lat'] - x['end lat'], axis=1)
df_lane_top['vec lon']= df_lane_top.apply(lambda x : x['start lon'] - x['end lon'], axis=1)

# prepare the lane GPS data for 28is oktovriou bottom
bot_lane_list = [[37.991277, 23.731315, 37.991366, 23.731423],
             [37.991366, 23.731423, 37.991543, 23.731881],
             [37.991392, 23.731406, 37.991571, 23.731857],
             [37.991418, 23.731388, 37.991599, 23.731833],
             [37.991543, 23.731881, 37.991526, 23.732141]]
df_lane_bot = pd.DataFrame.from_records(bot_lane_list, columns=['start lat', 'start lon', 'end lat', 'end lon'],
                                    index=['bot right 0', 'right 1', 'mid 1','left 1','top right 2'])
bot_lanes_conncetion=[['bot right 0', 'right 1','top right 2']]
# build the norm and vector for the lane for the later calculation
df_lane_bot['norm']= df_lane_bot.apply(lambda x : np.linalg.norm(
    np.array([x['start lat'], x['start lon']]) - np.array([x['end lat'], x['end lon']])), axis=1)
df_lane_bot['vec lat']= df_lane_bot.apply(lambda x : x['start lat'] - x['end lat'], axis=1)
df_lane_bot['vec lon']= df_lane_bot.apply(lambda x : x['start lon'] - x['end lon'], axis=1)
#---------------------------

# prepare the lane GPS data for lane
right_lane_list = [[37.991828, 23.731578, 37.991727, 23.732208],
             [37.991802, 23.731570, 37.991703, 23.732198],
             [37.991770, 23.731564, 37.991676, 23.732186],
             [37.991742, 23.731554, 37.991647, 23.732179],
             [37.991652, 23.732549, 37.990814, 23.737805],
             [37.991620, 23.732540, 37.990786, 23.737798],
             [37.991589, 23.732535, 37.990759, 23.737787]]
df_lane_right = pd.DataFrame.from_records(right_lane_list, columns=['start lat', 'start lon', 'end lat', 'end lon'],
                                    index=['far right 0', 'right 0', 'mid 0','left 0','right 1','mid 1','left 1'])
right_lanes_conncetion = []
# build the norm and vector for the lane for the later calculation
df_lane_right['norm']= df_lane_right.apply(lambda x : np.linalg.norm(
    np.array([x['start lat'], x['start lon']]) - np.array([x['end lat'], x['end lon']])), axis=1)
df_lane_right['vec lat']= df_lane_right.apply(lambda x : x['start lat'] - x['end lat'], axis=1)
df_lane_right['vec lon']= df_lane_right.apply(lambda x : x['start lon'] - x['end lon'], axis=1)

# Define the methods that will be used for the algorithm

In [4]:
# here define the function that we need
def assign_vehicle_to_lane(df_lane, df_at_time, threshold, metric):
    """
    :param df_lane: a pandas dataframe that contains GPS infromation for lanes
    :param df_at_time: a pandas dataframe which containes all the vehicle information at a certain timestamp
    :param threshold: a float value, which is maximal distance that allow a vehicle assigned to a lane
    :param metric: 'lat' or 'lon', which tell function which metric to use to check if vehicle is between head
    and tail coordinate of a lane
    :return: a pandas dataframe that containes the vehicle information including the lane that a vehicle belongs to.
    """
    lane_asign_list = []
    distacne_list = []
    type_list = []
    # asign each vehicle to a lane
    for row in df_at_time.iterrows():
        if metric == 'lon':
            in_lane = df_lane.apply(lambda x: (min(x['start lon'], x['end lon']) <= row[1]['lon'])
                                              and (max(x['start lon'], x['end lon']) >= row[1]['lon']), axis=1)
        elif metric == 'lat':
            in_lane = df_lane.apply(lambda x: (min(x['start lat'], x['end lat']) <= row[1]['lat'])
                                              and (max(x['start lat'], x['end lat']) >= row[1]['lat']), axis=1)
        else:
            raise ValueError('wrong metric')
        dist = df_lane.apply(lambda x: np.cross(np.array([x['vec lat'], x['vec lon']]),
                                                np.array([row[1]['lat'] - x['end lat'], row[1]['lon'] - x['end lon']]))
                                       / x['norm'], axis=1)
        if True in in_lane.values:
            lane_name = dist[in_lane == True].abs().idxmin()
            distacne_list.append(dist[lane_name])
            lane_asign_list.append(lane_name)
            if abs(dist[lane_name]) < threshold:
                type_list.append('lane point')
            else:
                type_list.append('outer point')
        else:
            lane_name = dist.abs().idxmin()
            lane_asign_list.append(lane_name)
            distacne_list.append(dist[lane_name])
            if abs(dist[lane_name]) < threshold:
                type_list.append('intersection point')
            else:
                type_list.append('outer point')
    df_at_time['lane'] = lane_asign_list
    df_at_time['dist to lane'] = distacne_list
    df_at_time['point type'] = type_list
    df_assigned = df_at_time[df_at_time['point type'] == 'lane point']

    return df_assigned


# detect all the queue and save to queue list
def detect_queue(vehicle_distance_threshold, df_lane, df_assigned, lanes_conncetion):
    """
    :param vehicle_distance_threshold: a float value, maximal distance that consider two vehicle the same queue
    :param df_lane: a pandas dataframe that contains information for the lanes
    :param df_assigned: a pandas dataframe which contains the vehicle informatin including which lane that vehicle belongs to.
    :param lanes_conncetion: a list of possible connect between to lanes for example:['right 0','right 1']
    :return: queue_list, which is a list of possible queues with head and tail vehicle information in that queue
    :return: merge_lanes, which is a list of possible queues that come from multiple segments of lanes
    """
    queue_list = []
    queue_dict = {}
    for lane in df_assigned.lane.unique():
        queue_dict[lane] = []
        lane_vec = (df_lane['start lat'][lane] - df_lane['end lat'][lane],
                    df_lane['start lon'][lane] - df_lane['end lon'][lane])
        df_single_lane = df_assigned[df_assigned.lane == lane]
        df_single_lane.sort_values(by=['lat'], inplace=True)
        dist_list = [0]
        start_idx = 0
        for i in range(len(df_single_lane) - 1):
            vec_i = (df_single_lane.iloc[i + 1]['lat'] - df_single_lane.iloc[i]['lat'],
                     df_single_lane.iloc[i + 1]['lon'] - df_single_lane.iloc[i]['lon'])
            dist = np.dot(lane_vec, vec_i) / np.linalg.norm(lane_vec)
            if dist > vehicle_distance_threshold:
                queue_dict[lane].append([df_single_lane.iloc[start_idx], df_single_lane.iloc[i],
                                         i - start_idx + 1])
                start_idx = i + 1
            # dist_list.append(dist)
        if len(df_single_lane) > 1:
            queue_dict[lane].append([df_single_lane.iloc[start_idx], df_single_lane.iloc[len(df_single_lane) - 1],
                                     len(df_single_lane) - start_idx])
        # df_single_lane['dist to previous point'] = dist_list
        # df_single_lane.append(df_single_lane)

    # check if we can connect two queue:
    merge_lanes = []
    for lane_pairs in lanes_conncetion:
        for i in range(len(lane_pairs) - 1):
            if not lane_pairs[i] in queue_dict or not lane_pairs[i + 1] in queue_dict:
                continue
            a_lanes = list(queue_dict[lane_pairs[i]])  # []
            a_lanes.extend(merge_lanes)
            b_lanes = list(queue_dict[lane_pairs[i + 1]])
            for a_lane in a_lanes:
                for b_lane in b_lanes:
                    dist_1 = np.linalg.norm(
                        (a_lane[0]['lat'] - b_lane[-2]['lat'], a_lane[0]['lon'] - b_lane[-2]['lon']))
                    dist_2 = np.linalg.norm(
                        (a_lane[-2]['lat'] - b_lane[0]['lat'], a_lane[-2]['lon'] - b_lane[0]['lon']))
                    if dist_1 <= vehicle_distance_threshold:
                        b_lane.extend(a_lane)
                        merge_lane = b_lane
                        merge_lanes.append(merge_lane)
                    elif dist_2 <= vehicle_distance_threshold:
                        a_lane.extend(b_lane)
                        merge_lane = a_lane
                        merge_lanes.append(merge_lane)
    for i in queue_dict:
        queue_list.extend(queue_dict[i])
    return queue_list, merge_lanes


# retrn the longest queue in terms of number of vehicle
def get_longest_queue(queue_list, merge_lanes):
    """
    calculate the longest queue in terms of number of vehicles
    :param queue_list: queue_list, which is a list of possible queues with head and tail vehicle information in that queue
    :param merge_lanes: which is a list of possible queues that come from multiple segments of lanes
    :return: longest queue among the queue list and merge_lanes
    """
    queue_len_list = []
    queue_list = queue_list+merge_lanes
    for queue_i in queue_list:
        i_len = 0
        for i in range(len(queue_i) // 3):
            i_len += queue_i[i * 3 + 2]
        queue_len_list.append(i_len)

    longest_idxes = np.argwhere(queue_len_list == np.max(queue_len_list)).flatten()
    longest_queue = [queue_list[i] for i in longest_idxes]
    return longest_queue


# return the longest queue in terms of length on map rather than number of vehicle
def get_longest_queue_by_length(queue_list, merge_lanes, df_lane):
    """
    calculate the longest queue in terms coordinate distance between head and tail vehicles of the queue
    :param queue_list: queue_list, which is a list of possible queues with head and tail vehicle information in that queue
    :param merge_lanes: which is a list of possible queues that come from multiple segments of lanes
    :param df_lane: a pandas dataframe that contains information for the lanes
    :return: longest queue among the queue list and merge_lanes, which represented by head and tail vehicle in the queue
    and the length of the queue, if its a queue that comes from mutiple segments of lane, it will be head and tail
    vehicle in every segment.
    """
    queue_len_list = []
    queue_list = queue_list + merge_lanes
    for queue_i in queue_list:
        i_len = 0
        for i in range(len(queue_i) // 2):
            queue_vec = (
            queue_i[i * 2 + 1]['lat'] - queue_i[i * 2]['lat'], queue_i[i * 2 + 1]['lon'] - queue_i[i * 2]['lon'])
            lane = queue_i[i * 2]['lane']
            lane_vec = (df_lane['start lat'][lane] - df_lane['end lat'][lane],
                        df_lane['start lon'][lane] - df_lane['end lon'][lane])
            i_len += np.abs(np.dot(lane_vec, queue_vec) / np.linalg.norm(lane_vec))
        queue_len_list.append(i_len)

    longest_idxes = np.argwhere(queue_len_list == np.max(queue_len_list)).flatten()
    longest_queue = [queue_list[i] for i in longest_idxes]
    return longest_queue


# search every 0.04 seconds from a period of -3 to +3 of the given time
def fine_search(df, timestamp, df_lane, assign_metric, assign_threshold, vehicle_distance_threshold, lanes_conncetion):
    """
    search every 0.04 seconds for the longest queue in the time span [timestamp-3,timestamp+3] for a given timestamp
    and return all the candiatate
    of the longest queue, return the first one occured if multiple maximal queue occurs
    :param df: a pandas dataframe which containes all the vehicle information for all time steps
    :param timestamp: a time where you want to fine search the best result. [timestamp-3,timestamp+3] will be searched
    :param df_lane: a pandas dataframe that contains information for the lanes
    :param assign_metric: 'lat' or 'lon', which tell function which metric to use to check if vehicle is between head
    and tail coordinate of a lane
    :param assign_threshold: a float value, which is maximal distance that allow a vehicle assigned to a lane
    :param vehicle_distance_threshold: a float value, maximal distance that consider two vehicle the same queue
    :param lanes_conncetion: a list of possible connect between to lanes for example:['right 0','right 1']
    :return: longest queue among the time span [timestamp-3,timestamp+3], which represented by head and tail vehicle
    in the queue, if a queue comes from multiple segments of lane, it will be head and tail vehicle in each segment.
    """
    df_finesearch = df[df['time'].between(timestamp - 3, timestamp + 3)]
    longest_queue_list = []
    for timestamp in df_finesearch['time'].unique():
        df_at_time = df_finesearch[df_finesearch['time'] == timestamp]
        # bot
        df_assigned = assign_vehicle_to_lane(df_lane, df_at_time, assign_threshold, metric=assign_metric)
        queue_list, merge_lanes = detect_queue(vehicle_distance_threshold, df_lane, df_assigned, lanes_conncetion)
        if len(queue_list) > 0:
            longest_queue = get_longest_queue(queue_list, merge_lanes)
            longest_queue_list.extend(longest_queue)
    longest_queue = get_longest_queue(longest_queue_list, [])

    # only return the first one, that means the ealierest time when the longest traffic queue occurs
    return longest_queue[0]


# detect if there is a spillback based on the stoped time for head and tail vehicle
def detect_spillback(df, traffic_queue, vehicle_speed_threshold, block_duration_threshold):
    """
    a function that returns if there is a spillback for a traffic queue by check the maximal
    stoped time for head and tail vehicle of that queue.
    :param df: a pandas dataframe which containes all the vehicle information for all time steps
    :param traffic_queue: a list of traffic queue which contains head, tail vehicle of the queue and the length of the
    queue
    :param vehicle_speed_threshold: a threshold of vehicle speed, below which the vehicle will be considered as stoped
    :param block_duration_threshold: a duration threshold that vehicle stoped in seconds. above which the vehicle will
    be considered as in a spillback.
    :return: True or False if a spillback formed, and the longest duration(in seconds) when the vehicle stoped
    """
    # detect for tail vehicle
    df_id = df[df['track_id'] == traffic_queue[-2]['track_id']]
    t_start = df_id['time'].iloc[0]
    t_end = df_id['time'].iloc[-1]
    for timestamp in df_id[df_id.time < traffic_queue[-2]['time']].time.iloc[::-1]:
        if df_id[df_id['time'] == timestamp]['speed'].values > vehicle_speed_threshold:
            t_start = timestamp
            break

    for timestamp in df_id[df_id.time > traffic_queue[-2]['time']].time:
        if df_id[df_id['time'] == timestamp]['speed'].values > vehicle_speed_threshold:
            t_end = timestamp
            break
    t_duration_tail = t_end - t_start

    # detect for head vehicle
    df_id = df[df['track_id'] == traffic_queue[0]['track_id']]
    t_start = df_id['time'].iloc[0]
    t_end = df_id['time'].iloc[-1]
    for timestamp in df_id[df_id.time < traffic_queue[0]['time']].time.iloc[::-1]:
        if df_id[df_id['time'] == timestamp]['speed'].values > vehicle_speed_threshold:
            t_start = timestamp
            break

    for timestamp in df_id[df_id.time > traffic_queue[0]['time']].time:
        if df_id[df_id['time'] == timestamp]['speed'].values > vehicle_speed_threshold:
            t_end = timestamp
            break
    t_duration_head = t_end - t_start

    t_duration = max(t_duration_head, t_duration_tail)
    if t_duration > block_duration_threshold:
        return True, t_duration
    else:
        return False, t_duration


# Detect the longest queue
the detection consists of two stages: 3s search and fine search:
at 3s search stage, we search every 3s to get a rough time when the longest queue occurs.
at fine search stage, for the timestamp we found in the 3s search t3s, we search every 0.04 seconds for the time span [t3s-3, t3s-3].

## 3S search: detect the longest queue for every 3 seconds

In [5]:
# assign the vehicle to the lane
threshold = 0.00002
vehicle_distance_threshold=0.00012

bot_longest_queue_list = []
top_longest_queue_list = []
right_longest_queue_list = []

start_time = time.time()

# search for each seconds
for timestamp in range(0, int(df['time'].max())+1, 3):
    if timestamp%((int(df['time'].max())+1)//20) == 0:
        print(str(timestamp)+' / '+str(int(df['time'].max())+1)+' processed!')
    df_at_time = df[df['time']==timestamp]
    df_at_time = df_at_time[df_at_time['speed']<5]
    
    # bot street
    df_assigned = assign_vehicle_to_lane(df_lane_bot, df_at_time, threshold, metric='lon')
    queue_list, merge_lanes = detect_queue(vehicle_distance_threshold, df_lane_bot, df_assigned, bot_lanes_conncetion)
    if len(queue_list)>0:
        longest_queue = get_longest_queue(queue_list, merge_lanes)
        bot_longest_queue_list.extend(longest_queue)
    
    # top street
    df_assigned = assign_vehicle_to_lane(df_lane_top, df_at_time, threshold, metric='lat')
    queue_list, merge_lanes = detect_queue(vehicle_distance_threshold, df_lane_top, df_assigned, top_lanes_conncetion)
    if len(queue_list)>0:
        longest_queue = get_longest_queue(queue_list, merge_lanes)
        top_longest_queue_list.extend(longest_queue)
    
    # right street
    df_assigned = assign_vehicle_to_lane(df_lane_right, df_at_time, threshold, metric='lon')
    queue_list, merge_lanes = detect_queue(vehicle_distance_threshold, df_lane_right, df_assigned, right_lanes_conncetion)
    if len(queue_list)>0:
        longest_queue = get_longest_queue(queue_list, merge_lanes)
        right_longest_queue_list.extend(longest_queue)

bot_longest_queue = get_longest_queue(bot_longest_queue_list, [])
top_longest_queue = get_longest_queue(top_longest_queue_list, [])
right_longest_queue = get_longest_queue(right_longest_queue_list, [])

elapsed = time.time()-start_time
print('passed time: '+str(timedelta(seconds=elapsed)))

0 / 814 processed!


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


120 / 814 processed!
240 / 814 processed!
360 / 814 processed!
480 / 814 processed!
600 / 814 processed!
720 / 814 processed!
passed time: 0:07:09.454311


## Fine search: for every 0.04 seconds around the top results from the 3s search

In [6]:
start_time = time.time()

bot_fine_search_time = set([queue[0]['time'] for queue in bot_longest_queue])
bot_queue_candidates = [fine_search(df, timestamp, df_lane_bot, 'lon', 
                                threshold, vehicle_distance_threshold, bot_lanes_conncetion)
                        for timestamp in bot_fine_search_time]

top_fine_search_time = set([queue[0]['time'] for queue in top_longest_queue])
top_queue_candidates = [fine_search(df, timestamp, df_lane_top, 'lat', 
                                    threshold, vehicle_distance_threshold, top_lanes_conncetion)
                        for timestamp in top_fine_search_time]

right_fine_search_time = set([queue[0]['time'] for queue in right_longest_queue])
right_queue_candidates = [fine_search(df, timestamp, df_lane_right, 'lat',
                                threshold, vehicle_distance_threshold, right_lanes_conncetion)
                          for timestamp in right_fine_search_time]

elapsed = time.time()-start_time
print('passed time: '+str(timedelta(seconds=elapsed)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


passed time: 0:11:08.200195


##  Calculate the longest queue and detect for each queue if a spillback occurs

In [7]:
top_langest_report = get_longest_queue(top_queue_candidates, []) 
bot_langest_report = get_longest_queue(bot_queue_candidates, []) 
right_langest_report = get_longest_queue(right_queue_candidates, []) 

vehicle_speed_threshold = 10
block_duration_threshold = 120

top_spillback, _ = detect_spillback(df, top_langest_report[0], vehicle_speed_threshold, block_duration_threshold)
bot_spillback, _ = detect_spillback(df, bot_langest_report[0], vehicle_speed_threshold, block_duration_threshold)
right_spillback, _ = detect_spillback(df, right_langest_report[0], vehicle_speed_threshold, block_duration_threshold)

# Export the results into a csv file

In [8]:
df_results = pd.DataFrame(columns=['Area',
                                   'Maximum length of queue',
                                   'Lane the maximum length occurred',
                                   'start lat',
                                   'start lon',
                                   'end lat',
                                   'end lon',
                                   'Timestamp of the maximum queue occurrence',
                                   'whether, when and where a spillback is formed'])

df_results.loc[0] = ['top 28is Oktovrious', 
    sum([top_langest_report[0][i] for i in range(len(top_langest_report[0])) if i%3==2]),
    top_langest_report[0][0]['lane'], 
    top_langest_report[0][0]['lat'], 
    top_langest_report[0][0]['lon'],
    top_langest_report[0][-2]['lat'], 
    top_langest_report[0][-2]['lon'], 
    top_langest_report[0][0]['time'],
    top_spillback ]

df_results.loc[1] = ['bot 28is Oktovrious', 
    sum([bot_langest_report[0][i] for i in range(len(bot_langest_report[0])) if i%3==2]),
    bot_langest_report[0][0]['lane'], 
    bot_langest_report[0][0]['lat'], 
    bot_langest_report[0][0]['lon'],
    bot_langest_report[0][-2]['lat'], 
    bot_langest_report[0][-2]['lon'], 
    bot_langest_report[0][0]['time'],
    bot_spillback ]

df_results.loc[2] = ['Leof. Alexandras', 
    sum([right_langest_report[0][i] for i in range(len(right_langest_report[0])) if i%3==2]),
    right_langest_report[0][0]['lane'], 
    right_langest_report[0][0]['lat'], 
    right_langest_report[0][0]['lon'],
    right_langest_report[0][-2]['lat'], 
    right_langest_report[0][-2]['lon'], 
    right_langest_report[0][0]['time'],
    top_spillback ]

df_results.to_csv('result.csv')