# Trajectory Reconstruction

The robustness of the proposed approach is evaluated against adversarial attacks by simulating the reconstruction of real trajectories from the published synthetic trajectories using the same model, by computing the Reconstruction Error (RE).

More details can be found in the research paper.


##### **Imports**

In [1]:
import os
# oneDNN warning suppression TF 2.4.1
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
import copy

import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import tilemapbase
import statistics
import math

from typing import List, Tuple

import keras
import tensorflow as tf
from keras import Sequential

from utils.data import *
from utils.plots import *
from utils.metrics import *
from models import *
from apu_trajgen import *

import warnings
warnings.filterwarnings('ignore', category=UserWarning)

## **Experiments (Porto dataset)**

In [4]:
selected_dataset = "PORTO"
# The mde_k distances are computed in the trajgen-fixed-k.py script
# They represent the MDE for k=1, k=2, k=3, and k=4 (computed for 5000 trajectories)
# These values are used to determine the adaptive k values based on the privacy and utility constraints
mde_k = [76, 257, 3585, 7185] # mean distances for k=1, k=2, k=3, and k=4

#### **Load Model and Test Data** 

In [5]:
# Load the test data 
X_test = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_X_test.pkl") # the input trajectory data
Y_test = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_Y_test.pkl") # the test trajectory data
test_seq_len = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_seq_len_test.pkl") # the sequence lenght of the input trajectory data
normalization_ranges = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_normalization_ranges_test.pkl") # the scaler used to normalize the data

normalization_ranges = {"min": normalization_ranges["min"][0:2], "max": normalization_ranges["max"][0:2]}

# Save the model
mdl = load_pickle( MODEL_FOLDER + "mdlgru-"+selected_dataset.lower()+".pkl")

# Model for BS = 1
model_sl = create_GRU_model(GRU_cells= LSTM_CELLS,
                          seq_len = 1,
                          num_feat = NUM_FEATS,
                          batch_size = 1,
                          stateful = True,
                          return_seq = RETURN_SEQ,
                          num_outputs = NUM_OUTPUTS,
                          LR = LR,
                          SEED = SEED,
                          ragged = False)


# Set weights and states
model_sl.set_weights(mdl.get_weights())

#### **Reconstruct trajectories** 

In [3]:
# Recontruct the trajectories from the predictions

selected_dataset = "PORTO"

mean_min = [80, 170, 250, 350] # privacy contraint
mean_max = [155, 350, 450, 550] # utility contraint

k_means = [76, 257, 3585, 7185]

n_trajs = 10

for mmin, mmax in zip(mean_min, mean_max):
        
    print("Processing trajectories with mean_min = " + str(mmin) + " and mean_max = " + str(mmax))
    
    normalization_ranges = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_normalization_ranges_test.pkl") # the scaler used to normalize the data
    normalization_ranges = {"min": normalization_ranges["min"][0:2], "max": normalization_ranges["max"][0:2]}
        
    for i in range(len(k_means)):
        if k_means[i] > mmin:
                k_min = i+1
                break
    k_max = k_min + round((mmax-mmin)/k_means[0]) + 1

    Y_pred_k = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_Y_pred_adaptive_k_mean_min" + str(mmin) 
            + "_mean_max" + str(mmax) + "_ntrajs_" + str(n_trajs) + ".pkl")
    
    Y_pred_k = add_speed_column(Y_pred_k, time_diff_seconds=15)  # The sampling rate of 15 seconds
    
    data = []
    for arr in Y_pred_k:
        df = pd.DataFrame(arr, columns=COLUMNS_INPUT)
        data.append(df)

    # # Normalize the data using scaler or normalization ranges
    # scaler, data = normalize_trajectory_data(dataset = data, normalization_type = "min-max")
    data = min_max_normalize_third_column(data)

    num_of_traj = len(Y_pred_k)
    # # Create X and Y from the data
    X, Y =  create_X_Y_from_data(data, num_of_traj, k=1)

    # Train Data Preparation (the same format as the test data)
    X_pred_k, Y_pred_k, test_traj_seq_lengths = test_data_preparation(TRAINING_TESTING_SAME_FILE = TRAINING_TESTING_SAME_FILE,
                                                                    X = copy.deepcopy(X), Y = copy.deepcopy(Y),
                                                                    num_of_traj = num_of_traj,
                                                                    training_size = 0,
                                                                    SEQ_LEN = SEQ_LEN,
                                                                    NUM_FEATS = NUM_FEATS,
                                                                    TESTING_FILE = None,
                                                                    data = data)
    
    
    Y_reconstructed_k = apu_trajgen_adaptive_k(mdl = model_sl,
                                            X_t = copy.deepcopy(X_pred_k),
                                            test_traj_seq_lengths = test_traj_seq_lengths,
                                            SEQ_LENGTH = 1,
                                            NUM_FEATS = NUM_FEATS,
                                            su_funct = compute_su_score1,
                                            su_funct_args={"mean_min": mmin, "mean_max": mmax},
                                            normalization_ranges = normalization_ranges,
                                            k_min = k_min,
                                            k_max = k_max,
                                            save_results = False)
    
    save_pickle(Y_reconstructed_k, DATA_FOLDER + selected_dataset.lower() + "_Y_reconstructed_adaptive_k_mean_min" + str(mmin) 
            + "_mean_max" + str(mmax) + "_ntrajs_" + str(n_trajs) + ".pkl")

Processing trajectories with mean_min = 80 and mean_max = 155
Processing trajectories with mean_min = 170 and mean_max = 350
Processing trajectories with mean_min = 250 and mean_max = 450
Processing trajectories with mean_min = 350 and mean_max = 550


#### **Plot trajectories** 

In [7]:
# Select a trajectory to visualize
traj_idx = 5

n_trajs = 10


mean_min = [80, 170, 250, 350] # privacy contraint
mean_max = [155, 350, 450, 550] # utility contraint

# Load the data and plot trajectory with id=traj_idx
Y_test = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_Y_test_adaptive_k_ntrajs_" + str(n_trajs) + ".pkl")
test_seq_len = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_test_seq_len_adaptive_k_ntrajs_" + str(n_trajs) + ".pkl")

for mmin, mmax in zip(mean_min, mean_max):
        
    normalization_ranges = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_normalization_ranges_test.pkl") # the scaler used to normalize the data
    
    normalization_ranges = {"min": normalization_ranges["min"][0:2], "max": normalization_ranges["max"][0:2]}
    
    Y_test_dn = denormalize_data(dataset = copy.deepcopy(Y_test), normalization_ranges = normalization_ranges)
    
    Y_pred_k = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_Y_pred_adaptive_k_mean_min" + str(mmin) 
            + "_mean_max" + str(mmax) + "_ntrajs_" + str(n_trajs) + ".pkl")
    
    # Denormalize the data using the scaler or normalization ranges
    Y_pred_k_dn = denormalize_data(dataset = Y_pred_k, normalization_ranges = normalization_ranges) 
    
    Y_reconstructed_k = load_pickle(DATA_FOLDER + selected_dataset.lower() + "_Y_reconstructed_adaptive_k_mean_min" + str(mmin) 
            + "_mean_max" + str(mmax) + "_ntrajs_" + str(n_trajs) + ".pkl")
    
    Y_reconstructed_k = Y_reconstructed_k[0]
    
    Y_reconstructed_k_dn = denormalize_data(dataset = Y_reconstructed_k, normalization_ranges = normalization_ranges) 
    
    dists_k = compute_point_to_point_haversine_distances(Y_test_dn[traj_idx][0:test_seq_len[traj_idx]], Y_pred_k_dn[traj_idx])
    
    dists_k_reconstructed = compute_point_to_point_haversine_distances(Y_test_dn[traj_idx][1:test_seq_len[traj_idx]], Y_reconstructed_k_dn[traj_idx])

    plot_trajectory_map_attack(actual_norm = Y_test_dn[traj_idx][1:test_seq_len[traj_idx]],
                        predicted_norm  = Y_pred_k_dn[traj_idx][1:test_seq_len[traj_idx]],
                        attack_norm = Y_reconstructed_k_dn[traj_idx],
                        dists_p = dists_k,
                        dists_a = dists_k_reconstructed,
                        dist_min = mmin,
                        dist_max = mmax,
                        aspect_ratio = 2.2,
                        adaptive = True,
                        traj_id = traj_idx,
                        scatter = True,
                        savePath = DATA_FOLDER + selected_dataset.lower() + "_reconstructed_mean_min" + str(mmin) + "_mean_max" + str(mmax) + "_trajid_" + str(traj_idx))
    print("Point-to-point distance between the real and protected trajectory:")
    print (dists_k)
    print("##################################")
    print("Targeted mean: ")
    print("Mean min: " + str(mmin) + " / Mean max: " + str(mmax))
    print("Mean distance between the real and protected trajectory:")
    print(np.mean(dists_k))
    print("Mean distance between the real and reconstructed trajectory:")
    print(np.mean(dists_k_reconstructed))
    print("##################################")

NameError: name 'plot_trajectory_map_attack' is not defined