# 06-Trajectory Querying

This notebooks exemplifies the querying of quadkey-indexed trajectories using the Extended Vehicle Energy Dataset.

**Requirements**: Run the `calculate-trajectories.py` script before running this notebook.

In [1]:
import folium
import numpy as np
import pandas as pd
import osmnx as ox
import geopandas as gpd
import networkx as nx
import math

from itertools import pairwise
from db.api import EVedDb
from folium.vector_layers import PolyLine, CircleMarker
from pyquadkey2 import quadkey
from numba import jit
from db.api import EVedDb
from tqdm.notebook import tqdm
from raster.drawing import smooth_line
from geo.qk import tile_to_str

from geo.trajectory import GraphRoute, GraphTrajectory, load_signal_range, load_trajectory_points, load_link_points

## 06.01-Preparation

We start by loading the road network from Ann Arbor, Michigan, using the `GraphRoute` class.

In [2]:
gr = GraphRoute('Ann Arbor, Michigan')

In [3]:
route = gr.generate_route(addr_ini="122 N Thayer St, Ann Arbor, MI 48104, USA",
                          addr_end="1431 Ardmoor Ave, Ann Arbor, MI 48103, USA")

The `fit_bounding_box` uses a list of locations to fit a bounding box for the displayed data and set the appropriate map center and zoom.

In [4]:
def fit_bounding_box(html_map, bb_list):
    if isinstance(bb_list, list):
        ll = np.array(bb_list)
    else:
        ll = bb_list
        
    min_lat, max_lat = ll[:, 0].min(), ll[:, 0].max()
    min_lon, max_lon = ll[:, 1].min(), ll[:, 1].max()
    html_map.fit_bounds([[min_lat, min_lon], [max_lat, max_lon]])
    return html_map

In [5]:
def map_graph_route(graph_route):
    html_map = folium.Map(prefer_canvas=True, tiles="cartodbpositron", max_zoom=20, control_scale=True)
    
    empty_edges = []
    bb_list = []
    route_nodes = graph_route.get_route_nodes()
    
    for loc in route_nodes:
        bb_list.append((loc['y'], loc['x']))
    
    for l0, l1 in pairwise(route_nodes):
        line = [(l0['y'], l0['x']), (l1['y'], l1['x'])]
        
        PolyLine(line, weight=5, opacity=0.5).add_to(html_map)
        
    return fit_bounding_box(html_map, bb_list)

In [6]:
map_graph_route(gr)

## 06.02-Querying Using an Arbitrary Trajectory

In this section we will use the above trajectory to query the database for overlapping _trajectories_ and _trajectory segments_. Wer start by declaring some supporting functions and explain the process along the way.

Let's try it out with the above route:

In [7]:
route_df = pd.DataFrame(data=gr.get_route_quadkeys(), columns=["quadkey", "bearing"])
route_df

Unnamed: 0,quadkey,bearing
0,217847244599,180.1
1,217847245594,267.9
2,217847245125,183.1
3,217847238463,178.4
4,217847245583,267.9
...,...,...
245,217847245159,179.1
246,217847245579,267.9
247,217847244191,179.8
248,217847244181,179.0


As you can see from the result above, we can now match the quadkeys to the existing _links_ while enforcing a similar bearing. This is, in essence, how we query.

In [8]:
load_link_points(1)

[(42.302595, -83.704197), (42.302585, -83.704604)]

The `get_contiguous_ranges` determines all the consecutive ranges of signal identifiers. The function accepts as inputs two NumPy arrays. The first array contains the links initial identifiers while the second contains the final identifiers. The function merges all contiguous identifiers to an Nx2 array where the first column contains the  initial index and the second contains the ending index.

In [None]:
@jit(nopython=True)
def get_contiguous_ranges(signal_ini, signal_end):
    ranges = np.zeros((signal_ini.shape[0], 2))
    ini = signal_ini[0]
    end = signal_end[0]
    
    j = 0
    for i in range(1, signal_ini.shape[0]):
        end = signal_end[i-1]
        if signal_ini[i] != end:
            ranges[j, 0] = ini
            ranges[j, 1] = end
            j += 1
            ini = signal_ini[i]
            
    if j == 0:
        ranges[j, 0] = ini
        ranges[j, 1] = end
        j += 1

    return ranges[:j, :]

In [None]:
def get_route_matching_trajectories(g, route, level=20, angle_delta=2.5):
    links = get_route_overlapping_links(g, route, level, angle_delta)
    trajectories = np.unique(links[:, 1])
    return trajectories, links

In [None]:
def get_overlapping_signal_ranges(g, route, level=20, angle_delta=2.5):
    trajectories, links = get_route_matching_trajectories(g, route, level, angle_delta)
    
    ranges = []
    for t in trajectories:
        index = links[:, 1] == t
        
        signal_ini = links[index, 2]
        signal_end = links[index, 3]
        ranges.extend(get_contiguous_ranges(signal_ini, signal_end).tolist())
    return ranges

In [None]:
def map_matching_links(graph_route):
    html_map = folium.Map(prefer_canvas=True, tiles="cartodbpositron", max_zoom=20, control_scale=True)
    
    empty_edges = []
    bb_list = []
    
    ranges = graph_route.get_overlapping_signal_ranges()
    for r in tqdm(ranges):
        line = load_signal_range(r)
        if len(line):
            bb_list.extend(line)
            PolyLine(line, weight=3, color="red", opacity=0.5, popup=r).add_to(html_map)
            
    line = [(loc['y'], loc['x']) for loc in graph_route.get_route_nodes()]
    bb_list.extend(line)
        
    PolyLine(line, weight=5, opacity=0.5).add_to(html_map)

    return fit_bounding_box(html_map, bb_list)

In [None]:
map_matching_links(gr)

In [None]:
# map_matching_links(g, route)

In [None]:
def jaccard_similarity(set0, set1):
    return len(set0 & set1) / len(set0 | set1)

In [None]:
def calculate_trajectory_matches(g, route, level=20):
    trajectories, links = get_route_matching_trajectories(g, route, level)
    
    route_qks = {qk[0] for qk in get_route_quadkeys(g, route, level)}
    data = []
    for trajectory in trajectories:
        traj_qks = load_trajectory_quadkeys(int(trajectory))
        similarity = jaccard_similarity(traj_qks, route_qks)
        data.append((trajectory, similarity))
    return data

In [10]:
match_df = pd.DataFrame(data=gr.calculate_trajectory_matches(), columns=['traj_id', 'similarity'])

In [11]:
match_df["percent_rank"] = match_df["similarity"].rank(pct=True)

In [12]:
match_df.sort_values("percent_rank", ascending=False)

Unnamed: 0,traj_id,similarity,percent_rank
1032,15501,0.372807,1.000000
737,10596,0.252427,0.999619
1061,15806,0.244565,0.999239
1223,18550,0.223529,0.998858
747,10616,0.219512,0.998478
...,...,...,...
529,7013,0.002252,0.001903
647,9069,0.002217,0.001522
764,10673,0.002208,0.001142
415,5402,0.002008,0.000761


In [13]:
match_df[match_df["percent_rank"] > 0.95].sort_values("percent_rank", ascending=False)

Unnamed: 0,traj_id,similarity,percent_rank
1032,15501,0.372807,1.000000
737,10596,0.252427,0.999619
1061,15806,0.244565,0.999239
1223,18550,0.223529,0.998858
747,10616,0.219512,0.998478
...,...,...,...
2218,28444,0.123675,0.951674
2258,28525,0.123636,0.951294
2308,28672,0.123457,0.950913
2317,28701,0.123239,0.950533


In [None]:
def get_top_match_trajectories_r(g, route, top=0.05):
    match_df = pd.DataFrame(data=calculate_trajectory_matches(g, route), columns=['traj_id', 'similarity'])
    match_df["percent_rank"] = match_df["similarity"].rank(pct=True)
    
    filtered_df = match_df[match_df["percent_rank"] > (1.0 - top)]
    trajectories = filtered_df["traj_id"].values
    return trajectories

In [19]:
def map_top_matching_trajectories_r(graph_route, top=0.05):
    html_map = folium.Map(prefer_canvas=True, tiles="cartodbpositron", max_zoom=20, control_scale=True)
    
    empty_edges = []
    bb_list = []
    
    trajectories = graph_route.get_top_match_trajectories(top=0.05)
    for traj_id in trajectories:
        line = load_trajectory_points(int(traj_id))
        if len(line) > 0:
            bb_list.extend(line)
            PolyLine(line, weight=3, color="red", opacity=0.5).add_to(html_map)

    line = []
    for loc in graph_route.get_route_nodes():
        p = (loc['y'], loc['x'])
        line.append(p)
        bb_list.append(p)
        
    PolyLine(line, weight=5, opacity=0.5).add_to(html_map)

    return fit_bounding_box(html_map, bb_list)

In [20]:
map_top_matching_trajectories_r(gr)

## 06.03-Querying Using an Existing Trajectory

In this section we will perform the same query but using a known trajectory instead

In [None]:
def load_matching_links(traj_id, angle_delta=2.5):
    db = EVedDb()
    
    sql = """
    select     q.link_id
    ,          q.quadkey
    ,          l.traj_id
    from       link_qk q
    inner join link l on l.link_id = q.link_id
    inner join (
        select     q.quadkey
        ,          l.bearing
        from       link_qk q
        inner join link l on l.link_id = q.link_id
        where      l.traj_id = ?
    ) x on x.quadkey = q.quadkey
    where l.bearing > 0 and x.bearing > 0 and cos(radians(x.bearing - l.bearing)) >= cos(radians(?));
    """
    traj_df = db.query_df(sql, [traj_id, angle_delta])
    return traj_df

In [None]:
def get_top_match_trajectories_t(traj_id, top=0.05):
    df = load_matching_links(traj_id)
    trajectories = np.unique(df["traj_id"].values)
    query_set = set(df[df["traj_id"] == traj_id]["quadkey"].values)
    
    traj_df = pd.DataFrame(data=trajectories, columns=["traj_id"])
    traj_df["similarity"] = [jaccard_similarity(query_set, set(df[df["traj_id"] == t]["quadkey"].values)) \
                             for t in trajectories]
    traj_df["percent_rank"] = traj_df["similarity"].rank(pct=True)
    
    filtered_df = traj_df[traj_df["percent_rank"] > (1.0 - top)]
    trajectories = filtered_df["traj_id"].values
    return trajectories    

In [None]:
def get_matching_links_t(traj_id):
    df = load_matching_links(traj_id)
    links = np.unique(df["link_id"].values)
    return links    

In [None]:
def map_top_matching_trajectories_t(traj_id, top=0.05):
    html_map = folium.Map(prefer_canvas=True, tiles="cartodbpositron", max_zoom=20, control_scale=True)
    
    bb_list = []
    gt = GraphTrajectory(int(traj_id))
    
    trajectories = gt.get_top_matching_trajectories(top)
    for trajectory in trajectories:
        if trajectory != traj_id:
            line = load_trajectory_points(int(trajectory))
            if len(line) > 0:
                bb_list.extend(line)
                PolyLine(line, weight=3, color="red", opacity=0.5, popup=str(trajectory)).add_to(html_map)

    line = load_trajectory_points(int(traj_id))
    PolyLine(line, weight=5, opacity=0.5).add_to(html_map)

    return fit_bounding_box(html_map, bb_list)

In [None]:
map_top_matching_trajectories_t(traj_id=4, top=0.01)

In [None]:
def map_matching_links_t(traj_id):
    html_map = folium.Map(prefer_canvas=True, tiles="cartodbpositron", max_zoom=20, control_scale=True)
    
    bb_list = []
    gt = GraphTrajectory(int(traj_id))

    line = load_trajectory_points(int(traj_id))
    PolyLine(line, weight=12, opacity=0.5).add_to(html_map)
    
    bb_list.extend(line)
    
    links = gt.get_matching_links()
    print(len(links))
    for link in links:
        line = load_link_points(int(link))
        if len(line) > 0:
            bb_list.extend(line)
            PolyLine(line, weight=3, color="red", opacity=0.5, popup=str(link)).add_to(html_map)

    return fit_bounding_box(html_map, bb_list)

In [None]:
map_matching_links_t(traj_id=4)