In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import keras
import joblib
import utm
import tensorflow as tf
import matplotlib.pyplot as plt
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [2]:
### Enter Sequence Length below that should be used for Validation check

In [3]:
total_seq_len = 33
input_len = 11

In [5]:
#unseen validation data

data = pd.read_csv("data/traj_data_100_33_test.csv")
data.describe()

Unnamed: 0.1,Unnamed: 0,Vehicle_ID,X,Y,X_REL,Y_REL
count,4828.0,4828.0,4828.0,4828.0,4828.0,4828.0
mean,2413.5,18778.873239,151.19388,-33.889892,-8.520204e-07,7e-06
std,1393.86788,9274.585355,0.000503,0.000934,1.876288e-05,2e-05
min,0.0,0.0,151.192849,-33.891898,-5.7886e-05,-7e-05
25%,1206.75,8200.0,151.193588,-33.890393,-1.093717e-05,0.0
50%,2413.5,22650.0,151.194002,-33.88975,3.194999e-08,4e-06
75%,3620.25,26200.0,151.194192,-33.888994,8.72995e-06,1.7e-05
max,4827.0,29900.0,151.194606,-33.888825,0.0001194359,7.8e-05


In [6]:
#global scaler
#scaler = joblib.load("scaler_300len_100seq.save")
#scaler = joblib.load("scaler_100_seq.save")

In [7]:
"""
Preprocess data, removed torch tensors return type, now np arrays
"""
def preprocess_dataset(data, max = 10, max_len = 40, input_len = 30):
    global scaler
    scaler = joblib.load("scaler_100_33_seq.save")
    
    #scaler = MinMaxScaler(feature_range=(0, max))
    #data[['X_REL', 'Y_REL']] = scaler.fit_transform(data[['X_REL', 'Y_REL']])
    #data[['X_REL', 'Y_REL']] = scaler.fit_transform(data[['X_REL', 'Y_REL']])
    data[['X_REL', 'Y_REL']] = scaler.transform(data[['X_REL', 'Y_REL']])
    unique_peds = data['Vehicle_ID'].unique()
    unique_peds = sorted(unique_peds)
    inputs = []
    outputs = []
    indexes = []
    for ped in unique_peds:
        if (len(data[data['Vehicle_ID'] == ped]) >= max_len):
          seq_inner = []
          indexes_inner = []
          i = 0
          for indx, row in data[data['Vehicle_ID'] == ped].iterrows():
            x = round(row['X_REL'])
            y = round(row['Y_REL'])
            ## Cantor pairing function:
            bin = y * max + x
            i += 1
            if i == max_len:
              break
            seq_inner.append(int(bin))
            indexes_inner.append(ped)
          inputs.append([seq_inner[0:input_len]])
          outputs.append([seq_inner[input_len + 1:]])
          indexes.append([indexes_inner])
    train_inputs, test_inputs, train_targets, test_targets, train_indx, test_indx = train_test_split(inputs, outputs, indexes, train_size=0.1, random_state=0)
    return np.array(train_inputs), np.array(test_inputs), np.array(train_targets), np.array(test_targets), train_indx, test_indx

In [8]:
train_data_inputs, test_data_inputs, train_data_targets, test_data_targets, train_indx, test_indx = preprocess_dataset(data, max = 30, max_len = total_seq_len, input_len = input_len)



In [9]:
train_data_inputs[1]

array([[464, 465, 465, 465, 465, 465, 465, 466, 466, 466, 466]])

In [10]:
"""Reshaping data for input into LSTM"""
train_data_inputs = train_data_inputs.reshape(train_data_inputs.shape[0], train_data_inputs.shape[2], 1)
train_data_targets = train_data_targets.reshape(train_data_targets.shape[0], train_data_targets.shape[2], 1)

test_data_inputs = test_data_inputs.reshape(test_data_inputs.shape[0], test_data_inputs.shape[2], 1)
test_data_targets = test_data_targets.reshape(test_data_targets.shape[0], test_data_targets.shape[2], 1)

In [13]:
"""Load Pretrained LSTM Model for correct sequence length"""
model = keras.models.load_model('models/New_LSTM_33seq.h5')


In [14]:
target_len = total_seq_len - input_len - 2
def get_predictions(model, data_source_inputs, data_source_targets):

    predictions = []
    ground_truths = []
    inputs = []
    for i in range(0, data_source_inputs.shape[0], 1):
        data = data_source_inputs[i,:,:]
        res = model.predict(data_source_inputs[i:i+1])
        ground_truths.append(data_source_targets[i].squeeze().tolist())
        predictions.append((tf.argmax(res[0,:,:], axis=1)).numpy().tolist())
        inputs.append(data.squeeze().tolist())
            
    return inputs, ground_truths, predictions

In [15]:
inputs_list, target_list, preds_list = get_predictions(model, test_data_inputs, test_data_targets)

In [16]:
def get_real_coords(target_list, preds_list, inputs_list, test_indx):
    """
    Get the real word coordinates of trajectories starting from bins
    All input parameteres must be lists of lists and be of the same length
    Returns a dataframe with vehicle ID and all of its real word coordinates in Lat/Long
    """
    x_cords_real = []
    y_cords_real = []
    x_cords_pred = []
    y_cords_pred = []
    vehicle_ids = []

    for ind in range(len(test_indx)):

      for inp in test_indx[ind]:
        for inp_nr in inp[:-1]:
          vehicle_ids.append(inp_nr)

      for inp in inputs_list[ind]:
        x = inp % 30
        y = inp / 30 
        x_cords_real.append(x)
        y_cords_real.append(y)

        x_cords_pred.append(x)
        y_cords_pred.append(y)
        
      for inp in target_list[ind]:
        x = inp % 30
        y = inp / 30 
        x_cords_real.append(x)
        y_cords_real.append(y)

      for inp in preds_list[ind]:
        x = inp % 30
        y = inp / 30 
        x_cords_pred.append(x)
        y_cords_pred.append(y)

    dict_pd = {'Vehicle_ID': vehicle_ids, 'X_REAL': x_cords_real, 'Y_REAL': y_cords_real, 'X_PRED': x_cords_pred, 'Y_PRED': y_cords_pred} 
    output_df = pd.DataFrame(dict_pd)
    output_df[['X_REAL', 'Y_REAL']] = scaler.inverse_transform(output_df[['X_REAL', 'Y_REAL']])
    output_df[['X_PRED', 'Y_PRED']] = scaler.inverse_transform(output_df[['X_PRED', 'Y_PRED']])
    
    unique_vehicle_ids = output_df['Vehicle_ID'].unique()
    for id in unique_vehicle_ids:
      subset = data.loc[data['Vehicle_ID'] == id, ['X', 'Y']].head(1)
      x_origin, y_origin = subset.X.item(), subset.Y.item()
      output_df.loc[output_df['Vehicle_ID'] == id, ['X_REAL', 'X_PRED']] = output_df.loc[output_df['Vehicle_ID'] == id, ['X_REAL', 'X_PRED']] + x_origin
      output_df.loc[output_df['Vehicle_ID'] == id, ['Y_REAL', 'Y_PRED']] = output_df.loc[output_df['Vehicle_ID'] == id, ['Y_REAL', 'Y_PRED']] + y_origin
    return output_df

def calc_meter_dist(xt, yt, xp, yp):
    """
    Converts real_world Latitude Longitude coordinates into UTM coordinates
    to get a difference in meters between prediction and target values
    """
    tar_utm = utm.from_latlon(yt, xt)
    pred_utm = utm.from_latlon(yp, xp)

    dist_x = tar_utm[0] - pred_utm[0]
    dist_y = tar_utm[1] - pred_utm[1]
    
    return np.sqrt((dist_x ** 2) + (dist_y ** 2))

def calculate_performance_metrics(df):
    """
    Calculates Average Displacement Error (ADE) and Final Displacement Error (FDE)
    for a given data frame containing the features:
        X_REAL, Y_REAL, X_PRED, Y_PRED
    """
    unique_vehicle_ids = df['Vehicle_ID'].unique()
    res = []
    for id in unique_vehicle_ids:
        res.append(df[df['Vehicle_ID'] == id][input_len:])
    res_df = pd.concat(res)
        
    res_df['DIST'] = res_df.apply(lambda row: calc_meter_dist(row['X_REAL'],row['Y_REAL'],row['X_PRED'],row['Y_PRED']),axis=1) 
    ADE = res_df['DIST'].mean()
    
    FDE = 0
    counter = 0
    for id in unique_vehicle_ids:
        x = res_df[res_df['Vehicle_ID'] == id]
        FDE += x.tail(1).DIST.values
        counter += 1
    FDE = float(FDE/counter)
        
    print("Average Displacement Error (meters): ", ADE)
    print("Average Final Displacement Error: (meters)", FDE)
    return df, res_df

def calculate_bin_acc(target_list, preds_list):
  acc = 0.
  nr_of_perfect_preds = 0
  for ind in range(len(target_list)):
    tmp_acc = sum(1 for x,y in zip(target_list[ind],preds_list[ind]) if x == y) / len(preds_list[0])
    acc += tmp_acc
    if (tmp_acc == 1):
      nr_of_perfect_preds += 1
  print("Accuracy: ", acc / len(target_list))
  print("% of perfect predictions: ", nr_of_perfect_preds / len(target_list))

In [17]:
coords = get_real_coords(target_list, preds_list, inputs_list, test_indx)
result_df, dist_df = calculate_performance_metrics(coords)
calculate_bin_acc(target_list, preds_list)

Average Displacement Error (meters):  0.6456477321198278
Average Final Displacement Error: (meters) 0.925336542950845
Accuracy:  0.3867187500000001
% of perfect predictions:  0.0234375


In [24]:
result_df[result_df['Vehicle_ID'] == 9200]

Unnamed: 0,Vehicle_ID,X_REAL,Y_REAL,X_PRED,Y_PRED
0,9200,151.194101,-33.890129,151.194101,-33.890129
1,9200,151.194101,-33.890129,151.194101,-33.890129
2,9200,151.194101,-33.890129,151.194101,-33.890129
3,9200,151.194101,-33.890129,151.194101,-33.890129
4,9200,151.194101,-33.89012,151.194101,-33.89012
5,9200,151.19411,-33.89012,151.19411,-33.89012
6,9200,151.19411,-33.89012,151.19411,-33.89012
7,9200,151.19411,-33.89012,151.19411,-33.89012
8,9200,151.19411,-33.89012,151.19411,-33.89012
9,9200,151.19411,-33.89012,151.19411,-33.89012


In [None]:
dist_df
a = dist_df[dist_df['Vehicle_ID'].isin(interesting_trajs)] 

In [None]:
a

In [None]:
#interesting_trajs = [11800, 98100, 273800, 110600, 134300, 245700, 160400, 232500, 71600, 39900, 57800, 105900, 243900, 92400, 145000, 1000, 241900, 204700]
interesting_trajs = [257700, 211800, 148800, 71700, 4200]
rslt_df = result_df[result_df['Vehicle_ID'].isin(interesting_trajs)] 

In [None]:
rslt_df

In [18]:
result_df.to_csv("lstm_33_preds.csv")

In [None]:
def vis_trajectory_scatter(input_list, target_list, preds_list, lag_time = 1.0):

  for i in range(1, 100, 1):
    #print("ID: ", test_indx[i][0])
    plt.figure(figsize=(6,6))
    inputs = input_list[i]
    targets = target_list[i]
    preds = preds_list[i]

    xy_inputs = []
    xy_preds = []
    xy_target = []

    for inp in inputs:
      x = inp % 30
      y = inp / 30 
      xy_inputs.append([x, y])
    
    for inp in preds:
      x = inp % 30
      y = inp / 30 
      xy_preds.append([x, y])

    for inp in targets:
      x = inp % 30
      y = inp / 30 
      xy_target.append([x, y])

    x_inp, y_inp = zip(*xy_inputs)
    x_tar, y_tar = zip(*xy_target)
    x_pred, y_pred = zip(*xy_preds)

    plt.scatter(x_inp,y_inp)
    plt.scatter(x_tar,y_tar, color='black')
    plt.scatter(x_pred,y_pred, color='r')

    plt.xlim(0, 30)
    plt.ylim(0, 30)

    plt.show()

In [None]:
vis_trajectory_scatter(inputs_list, target_list, preds_list)

In [None]:
#interesting? 18, 34, 106, 110, 264, 848, 998 #number 59, 56 for 300seq,