In [0]:
query = """
select * from main_prod.datascience_scratchpad.nyc_traj_all_ts where userid = '16808692'
"""

df = spark.sql(query)
display(df)

In [0]:
df_test = df.toPandas()

In [0]:
!pip install folium
import folium

from folium import plugins

def display_traj(lon_lat_list, ts_list = None):
    coordinates = [[x[1],x[0]] for x in lon_lat_list]
    # Initialize map
    m = folium.Map(location=coordinates[0], zoom_start=15)


    # for lat,lon in pip_list:
    #     folium.Marker(
    #         location=[lat, lon],
    #         popup="Pip Location",
    #         icon=folium.Icon(color='red')  # Color can be 'red', 'blue', 'green', 'purple', etc.
    #     ).add_to(m)

    folium.Marker(
        location=coordinates[0],
        popup="Start Location",
        icon=folium.Icon(color='green')  # Color can be 'red', 'blue', 'green', 'purple', etc.
    ).add_to(m)
    # Add markers
    for i, (lat, lon) in enumerate(coordinates[1:]):
        if ts_list is not None:
            folium.Marker([lat, lon], popup=ts_list[i+1]).add_to(m)
        else:
            folium.Marker([lat, lon], popup=i+1).add_to(m)

    # Draw arrows between points
    for i in range(len(coordinates) - 1):
        start = coordinates[i]
        end = coordinates[i + 1]

        # Draw the line
        line = folium.PolyLine([start, end], color="blue", weight=3, opacity=0.7).add_to(m)

        # Add directional arrow
        plugins.PolyLineTextPath(
            line,
            '➤',  # arrow symbol
            repeat=True,
            offset=7,
            attributes={'fill': 'blue', 'font-weight': 'bold', 'font-size': '16'}
            ).add_to(m)

    # Save and show the map
    m.save("map_with_arrows.html")
    return m


In [0]:
import math
def meters2lonlat(merc_points):
    lon_lats = []
    for point in merc_points:
        x,y = point[0],point[1]
        semimajoraxis = 6378137.0
        lon = x / semimajoraxis / 0.017453292519943295
        t = math.exp(y / 3189068.5)
        lat = math.asin((t - 1) / (t + 1)) / 0.017453292519943295
        lon_lats.append([lon,lat])
    return lon_lats

In [0]:
# !pip install torch
import sys
sys.path.append("..")
from utils.traj import simplify, mask, shift, subset
import numpy as np

In [0]:
idx = 0
traj, time_indices = df_test['merc_seq'].values[idx], df_test['timestamps'].values[idx]
traj = np.stack(traj).tolist()

In [0]:
time_indices = [x.to_numpy() for x in time_indices]
# time_indices

In [0]:
# import numpy as np
# def mask_2(src, time_indices=None):
#     l = len(src)
#     arr = np.array(src)
#     # print(len(src), len(time_indices))
#     if time_indices is not None:
#         time_arr = np.array(time_indices)
#         mask_idx_1 = np.random.choice(l, int(l * 0.7), replace=False)
#         mask_idx = []
#         for i in mask_idx_1:
#             if i<len(time_indices)-1 and int((time_indices[i+1] - time_indices[i]))/1e9/60 > 60:
#                 mask_idx.append(i)
#         mask_idx = np.array(mask_idx)
#         return np.delete(arr, mask_idx, 0).tolist(), np.delete(time_arr, mask_idx, 0).tolist()
#     else:
#         mask_idx = np.random.choice(l, int(l * 0.7), replace=False)
#         return np.delete(arr, mask_idx, 0).tolist()

In [0]:
import random
def truncated_rand(mu = 0, sigma = 1, factor = 1000, bound_lo = -500, bound_hi = 500):
    # using the defaults parameters, the success rate of one-pass random number generation is ~96%
    # gauss visualization: https://www.desmos.com/calculator/jxzs8fz9qr?lang=zh-CN
    while True:
        n = random.gauss(mu, sigma) * factor
        if bound_lo <= n <= bound_hi:
            break
    return n

def shift_2(src, time_indices=None):
    if time_indices is not None:
        # time_shifted = time_shift(time_indices)
        return [[p[0] + truncated_rand(), p[1] + truncated_rand()] for p in src], time_indices
    else:
        return [[p[0] + truncated_rand(), p[1] + truncated_rand()] for p in src]

In [0]:
def mask_2(src, time_indices=None):
    l = len(src)
    arr = np.array(src)
    # print(len(src), len(time_indices))
    if time_indices is not None:
        time_arr = np.array(time_indices)
        mask_idx_1 = np.random.choice(l, int(l * 0.5), replace=False)
        mask_idx = []
        for i in mask_idx_1:
            if i<len(time_indices)-1 and int((time_indices[i+1] - time_indices[i]))/1e9/60 < 60:
                mask_idx.append(i)
        return np.delete(arr, mask_idx, 0).tolist(), np.delete(time_arr, mask_idx, 0).tolist()
    else:
        mask_idx = np.random.choice(l, int(l * 0.5), replace=False)
        return np.delete(arr, mask_idx, 0).tolist()

In [0]:
def simplify_by_time(src, time_indices):
    new_src = []
    new_time_indices = []
    for i in range(len(src)):
        if i == 0:
            new_src.append(src[i])
            new_time_indices.append(time_indices[i])
        else:
            if int((time_indices[i] - new_time_indices[-1]))/1e9/60 > 10:
                new_src.append(src[i])
                new_time_indices.append(time_indices[i])
    return new_src, new_time_indices


In [0]:
traj1, time_indices1 = simplify(traj, time_indices)
traj2, time_indices2 = mask_2(traj, time_indices)
traj3, time_indices3 = shift_2(traj, time_indices)
traj4, time_indices4 = simplify_by_time(traj, time_indices)

In [0]:
display_traj(meters2lonlat(traj), time_indices)

In [0]:
display_traj(meters2lonlat(traj1), time_indices1)

In [0]:
display_traj(meters2lonlat(traj2), time_indices2)

In [0]:
display_traj(meters2lonlat(traj3), time_indices3)

In [0]:
display_traj(meters2lonlat(traj4), time_indices4)

In [0]:
from utils.traj import reverse, large_time_shift

traj5, time_indices5 = reverse(traj, time_indices)
traj6, time_indices6 = large_time_shift(traj, time_indices)

In [0]:
display_traj(meters2lonlat(traj), time_indices)

In [0]:
display_traj(meters2lonlat(traj5), time_indices5)

In [0]:
display_traj(meters2lonlat(traj6), time_indices6)

In [0]:
import random
def translate(traj, time_indices):
    theta = random.uniform(0, 2 * np.pi)

    # Step 2: Define shift amount in meters
    distance =random.randint(2000,4000)  # shift trajectory by 500 meters
    dx = distance * np.cos(theta)
    dy = distance * np.sin(theta)

    # Step 3: Apply shift to all coordinates
    shifted_coords = [(x + dx, y + dy) for x, y in traj]
    return shifted_coords, time_indices


In [0]:
traj6, time_indices6 = translate(traj, time_indices)
display_traj(meters2lonlat(traj6), time_indices6)