In [2]:
import argparse
import gc
import logging
import os
import sys
import time
from torch.utils.data import DataLoader
import logging
import math
import numpy as np
import torch
from torch.utils.data import Dataset
import random
from collections import defaultdict
import torch
import inspect
from contextlib import contextmanager
import subprocess

FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
logger = logging.getLogger(__name__)

if torch.cuda.is_available():
  device = torch.device("cuda")

In [26]:
# DATASET OPTIONS
OBS_LEN = 8
PRED_LEN = 12

SINGLE_TRAIN_DATASET_PATH = "/content/drive/My Drive/Datasets/extrapolation_test"
SINGLE_VAL_DATASET_PATH = "/content/drive/My Drive/Datasets/zara1/val"
SINGLE_TEST_DATASET_PATH = "/content/drive/My Drive/Datasets/extrapolation_test"
CHECKPOINT_NAME = "/content/drive/My Drive/Datasets/Weights/ExtrapolationTest/Mid_test_reproduce.pt"

# NUMBER OF CONDITION FLAG - activate any one of the following flags
SINGLE_CONDITIONAL_MODEL = True  # For single condition
MULTI_CONDITIONAL_MODEL = False  # For multi condition
MODEL_TYPE = 0  # 0 for prediction and 1 for simulation

# MAX SPEEDS FOR ARGOVERSE AND ETH/UCY DATASETS
# for argoverse
AV_MAX_SPEED = 1.6
OTHER_MAX_SPEED = 2.2
AGENT_MAX_SPEED = 2

# for eth/ucy
ETH_MAX_SPEED = 2.0  #2.3
HOTEL_MAX_SPEED = 2.0
UNIV_MAX_SPEED = 2.0
ZARA1_MAX_SPEED = 2.0
ZARA2_MAX_SPEED = 2.0

# PYTORCH DATA LOADER OPTIONS
NUM_WORKERS = 2
BATCH_MULTI_CONDITION = 32
BATCH_SINGLE_CONDITION = 16
BATCH_NORM = False
ACTIVATION_RELU = 'relu'
ACTIVATION_LEAKYRELU = 'leakyrelu'
ACTIVATION_SIGMOID = 'sigmoid'

# Time between consecutive frames
FRAMES_PER_SECOND_SINGLE_CONDITION = 0.4
FRAMES_PER_SECOND_MULTI_CONDITION = 0.1
NORMALIZATION_FACTOR = 10

# ENCODER DECODER HIDDEN DIMENSION OPTIONS FOR SINGLE AND MULTI CONDITION
H_DIM_GENERATOR_MULTI_CONDITION = 32
H_DIM_DISCRIMINATOR_MULTI_CONDITION = 64

H_DIM_GENERATOR_SINGLE_CONDITION = 32
H_DIM_DISCRIMINATOR_SINGLE_CONDITION = 64

MLP_INPUT_DIM_MULTI_CONDITION = 4
MLP_INPUT_DIM_SINGLE_CONDITION = 3

# HYPER PARAMETERS OPTIONS
G_LEARNING_RATE, D_LEARNING_RATE = 1e-3, 1e-3
NUM_LAYERS = 1
DROPOUT = 0
NUM_EPOCHS_MULTI_CONDITION = 50
NUM_EPOCHS_SINGLE_CONDITION = 25
CHECKPOINT_EVERY = 100
MLP_DIM = 64
EMBEDDING_DIM = 16
BOTTLENECK_DIM = 32
NOISE_DIM = (8, )

L2_LOSS_WEIGHT = 1

NUM_ITERATIONS = 3200
POOLING_TYPE = False
AGGREGATION_TYPE = True
ATTENTION_TYPE = False
MAX_CONSIDERED_PED = 5
USE_GPU = 1

# SPEED CONTROL FLAGS
TEST_METRIC = 1  # 0 for ground_truth speed. To simulate trajectories, change the flag to 1. This flag is used during testing and inference phase.
TRAIN_METRIC = 0  # Used for training the model with the ground truth
VERIFY_OUTPUT_SPEED = 1

# ADD_SPEED_EVERY_FRAME, STOP_PED, CONSTANT_SPEED_FOR_ALL_PED, ADD_SPEED_PARTICULAR_FRAME - Only one flag out of the 4 can be activated at once.

# Below flag is set to true if multi condition model on argoverse dataset is set to true.
DIFFERENT_SPEED_MULTI_CONDITION = True
AV_SPEED = 0.4
AGENT_SPEED = 0.9
OTHER_SPEED = 0.0

CONSTANT_SPEED_MULTI_CONDITION = False  # CONSTANT_SPEED flag for multi condition
CS_MULTI_CONDITION = 0.4  # Constant speed multi condition

# Below flag is set to true if single condition model on eth/ucy dataset is set to true.

# Change any one of the below flag to True
STOP_PED_SINGLE_CONDITION = False  # Speed 0 will be imposed if the flag is set to True

CONSTANT_SPEED_SINGLE_CONDITION = True
CS_SINGLE_CONDITION = 0.55  # Constant speed single condition

ANIMATED_VISUALIZATION_CHECK = 0

G_STEPS = 1
D_STEPS = 2
SR_STEPS = 1
BEST_K = 20
PRINT_EVERY = 100
NUM_SAMPLES = 20
NOISE = True
NUM_SAMPLE_CHECK = 100

In [4]:
import torch
import random
import os


def bce_loss(input, target):
    neg_abs = -input.abs()
    loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
    return loss.mean()


def gan_g_loss(scores_fake):
    y_fake = torch.ones_like(scores_fake) * random.uniform(0.7, 1.2)
    return bce_loss(scores_fake, y_fake)


def gan_d_loss(scores_real, scores_fake):
    y_real = torch.ones_like(scores_real) * random.uniform(0.7, 1.2)
    y_fake = torch.zeros_like(scores_fake) * random.uniform(0, 0.3)
    loss_real = bce_loss(scores_real, y_real)
    loss_fake = bce_loss(scores_fake, y_fake)
    return loss_real + loss_fake


def l2_loss(pred_traj, pred_traj_gt, loss_mask, random=0, mode='average', speed_reg=None):
    seq_len, batch, _ = pred_traj.size()
    if speed_reg != None:
        loss = (pred_traj_gt.permute(1, 0, 2) - pred_traj.permute(1, 0, 2)) ** 2
    else:
        loss = (loss_mask.unsqueeze(dim=2) *
            (pred_traj_gt.permute(1, 0, 2) - pred_traj.permute(1, 0, 2)) ** 2)
    if mode == 'sum':
        return torch.sum(loss)
    elif mode == 'average':
        return torch.sum(loss) / torch.numel(loss_mask.data)
    elif mode == 'raw':
        return loss.sum(dim=2).sum(dim=1)


def mae_loss(pred_traj, pred_traj_gt, random=0, mode='average', speed_reg=None):
    seq_len, batch, _ = pred_traj.size()
    loss = torch.abs(pred_traj_gt.permute(1, 0, 2) - pred_traj.permute(1, 0, 2))
    if mode == 'sum':
        return torch.sum(loss)
    elif mode == 'raw':
        return loss.sum(dim=2).sum(dim=1)


def displacement_error(pred_traj, pred_traj_gt, consider_ped=None, mode='sum'):
    seq_len, _, _ = pred_traj.size()
    loss = pred_traj_gt.permute(1, 0, 2) - pred_traj.permute(1, 0, 2)
    loss = loss ** 2
    if consider_ped is not None:
        loss = torch.sqrt(loss.sum(dim=2)).sum(dim=1) * consider_ped
    else:
        loss = torch.sqrt(loss.sum(dim=2)).sum(dim=1)
    if mode == 'sum':
        return torch.sum(loss)
    elif mode == 'raw':
        return loss


def final_displacement_error(
        pred_pos, pred_pos_gt, consider_ped=None, mode='sum'
):
    loss = pred_pos_gt - pred_pos
    loss = loss ** 2
    if consider_ped is not None:
        loss = torch.sqrt(loss.sum(dim=1)) * consider_ped
    else:
        loss = torch.sqrt(loss.sum(dim=1))
    if mode == 'raw':
        return loss
    else:
        return torch.sum(loss)


def mean_speed_error(real_speed, fake_speed):
    # Mean speed loss over all timesteps - Used only for feedback and not for training the model
    speed_loss = torch.abs(real_speed - fake_speed)
    add_loss = torch.sum(speed_loss, dim=1)
    add_loss_1 = torch.sum(add_loss)
    return add_loss_1


def final_speed_error(real_speed, fake_speed):
    # Final traj speed loss - Used only for feedback and not for training the model
    speed_loss = torch.abs(real_speed - fake_speed)
    add_loss_1 = torch.sum(speed_loss)
    return add_loss_1


def relative_to_abs(rel_traj, start_pos):
    rel_traj = rel_traj.permute(1, 0, 2)
    displacement = torch.cumsum(rel_traj, dim=1)
    start_pos = torch.unsqueeze(start_pos, dim=1)
    abs_traj = displacement + start_pos
    return abs_traj.permute(1, 0, 2)


def get_dataset_name(path):
    dataset_name = os.path.basename(os.path.dirname(path))
    return dataset_name


In [5]:
# PREPROCESSING

In [27]:
import os
import math

import numpy as np
from torch.utils.data import DataLoader

import torch
from torch.utils.data import Dataset


def data_loader(path, metric, train_or_val):
    dset = TrajectoryDataset(
        path,
        metric, train_or_val)

    if MULTI_CONDITIONAL_MODEL:
        loader = DataLoader(dset, batch_size=BATCH_MULTI_CONDITION, shuffle=True, num_workers=NUM_WORKERS, collate_fn=seq_collate)
    else:
        loader = DataLoader(dset, batch_size=BATCH_SINGLE_CONDITION, shuffle=True, num_workers=NUM_WORKERS, collate_fn=seq_collate)
    return dset, loader


def seq_collate(data):
    if MULTI_CONDITIONAL_MODEL:
        (obs_seq_list, pred_seq_list, obs_seq_rel_list, pred_seq_rel_list, loss_mask_list, obs_obj_abs_speed,
        pred_obj_abs_speed, obs_label, pred_label, obs_obj_rel_speed) = zip(*data)
    else:
        (obs_seq_list, pred_seq_list, obs_seq_rel_list, pred_seq_rel_list, loss_mask_list, obs_obj_abs_speed,
         pred_obj_abs_speed, obs_obj_rel_speed) = zip(*data)

    _len = [len(seq) for seq in obs_seq_list]
    cum_start_idx = [0] + np.cumsum(_len).tolist()
    seq_start_end = [[start, end]
                     for start, end in zip(cum_start_idx, cum_start_idx[1:])]
    obs_traj = torch.cat(obs_seq_list, dim=0).permute(2, 0, 1)
    pred_traj = torch.cat(pred_seq_list, dim=0).permute(2, 0, 1)
    obs_traj_rel = torch.cat(obs_seq_rel_list, dim=0).permute(2, 0, 1)
    pred_traj_rel = torch.cat(pred_seq_rel_list, dim=0).permute(2, 0, 1)
    obs_obj_abs_speed = torch.cat(obs_obj_abs_speed, dim=0).permute(2, 0, 1)
    pred_obj_abs_speed = torch.cat(pred_obj_abs_speed, dim=0).permute(2, 0, 1)
    seq_start_end = torch.LongTensor(seq_start_end)
    loss_mask = torch.cat(loss_mask_list, dim=0)
    obs_obj_rel_speed = torch.cat(obs_obj_rel_speed, dim=0).permute(2, 0, 1)

    if MULTI_CONDITIONAL_MODEL:
        obs_label = torch.cat(obs_label, dim=0).permute(2, 0, 1)
        pred_label = torch.cat(pred_label, dim=0).permute(2, 0, 1)

    if MULTI_CONDITIONAL_MODEL:
        out = [
            obs_traj, pred_traj, obs_traj_rel, pred_traj_rel, loss_mask, seq_start_end, obs_obj_abs_speed,
            pred_obj_abs_speed, obs_label, pred_label, obs_obj_rel_speed
        ]
    else:
        out = [
            obs_traj, pred_traj, obs_traj_rel, pred_traj_rel, loss_mask, seq_start_end, obs_obj_abs_speed,
            pred_obj_abs_speed, obs_obj_rel_speed
        ]

    return tuple(out)


def sigmoid(x):
    return 1 / (1 + math.exp(-x))


def read_file(_path):
    data = []
    i = 0
    if MULTI_CONDITIONAL_MODEL:
        with open(_path, 'r') as f:
            for line in f:
                if i == 0:
                    i += 1
                    continue
                line = line.strip().split(',')
                line = [i for i in line]
                data.append(line)
    else:
        with open(_path, 'r') as f:
            for line in f:
                line = line.strip().split('\t')
                line = [float(i) for i in line]
                data.append(line)
    return np.asarray(data)


def get_min_max_distance(seq_len, all_files):
    ped_speed = []
    for path in all_files:
        data = read_file(path)
        frames = np.unique(data[:, 0]).tolist()
        frame_data = []
        for frame in frames:
            frame_data.append(data[frame == data[:, 0], :5])
        num_sequences = int(math.ceil((len(frames) - seq_len + 1)))

        for idx in range(0, num_sequences):
            curr_seq_data = np.concatenate(frame_data[idx:idx + seq_len], axis=0)
            obj_in_curr_seq = np.unique(curr_seq_data[:, 1])
            for _, obj_id in enumerate(obj_in_curr_seq):
                curr_obj_seq = curr_seq_data[curr_seq_data[:, 1] == obj_id, :]
                pad_front = frames.index(curr_obj_seq[0, 0]) - idx
                pad_end = frames.index(curr_obj_seq[-1, 0]) - idx + 1
                label = curr_obj_seq[0, 2]
                if pad_end - pad_front != seq_len:
                    continue
                curr_obj_x_axis_new = [0.0] + [np.square(t - s) for s, t in
                                               zip(curr_obj_seq[:, 2], curr_obj_seq[1:, 2])]
                curr_obj_y_axis_new = [0.0] + [np.square(t - s) for s, t in
                                               zip(curr_obj_seq[:, 3], curr_obj_seq[1:, 3])]

                curr_obj_dist = np.sqrt(np.add(curr_obj_x_axis_new, curr_obj_y_axis_new))
                curr_obj_speed = curr_obj_dist / 0.4
                ped_speed.append(np.max(curr_obj_speed))
                ped_speed.append(np.min(curr_obj_speed))
    ped_speed = np.array(ped_speed).reshape(-1, 1)
    # Find the domain-wise max and min speed to normalize the speed values
    max_ped_speed = np.amax(ped_speed)
    unique, counts = np.unique(ped_speed, return_counts=True)
    a = np.asarray((unique, counts)).T
    for b in a:
        print(b)
    return max_ped_speed



class TrajectoryDataset(Dataset):
    """Dataloder for the Trajectory datasets"""

    def __init__(
            self, data_dir, metric=0, train_or_val = None
    ):
        super(TrajectoryDataset, self).__init__()

        self.data_dir = data_dir
        SEQ_LEN = OBS_LEN + PRED_LEN
        self.train_or_test = metric
        self.train_or_val = train_or_val

        all_files = os.listdir(self.data_dir)
        all_files = [os.path.join(self.data_dir, _path) for _path in all_files]
        #max_ped_speed = get_min_max_distance(SEQ_LEN, all_files)
        num_obj_in_seq = []
        seq_list = []
        seq_list_rel = []
        obj_abs_speed = []
        obj_rel_speed = []
        obj_label = []
        loss_mask_list = []
        for path in all_files:
            data = read_file(path)
            frames = np.unique(data[:, 0]).tolist()
            frame_data = []
            for frame in frames:
                frame_data.append(data[frame == data[:, 0], :])
            num_sequences = int(math.ceil((len(frames) - SEQ_LEN + 1)))

            for idx in range(0, num_sequences + 1):
                curr_seq_data = np.concatenate(
                    frame_data[idx:idx + SEQ_LEN], axis=0)

                obj_in_curr_seq = np.unique(curr_seq_data[:, 1])
                curr_loss_mask = np.zeros((len(obj_in_curr_seq), SEQ_LEN))
                curr_seq_rel = np.zeros((len(obj_in_curr_seq), 2, SEQ_LEN))
                curr_seq_rel_speed = np.zeros((len(obj_in_curr_seq), SEQ_LEN))
                curr_seq = np.zeros((len(obj_in_curr_seq), 2, SEQ_LEN))
                _curr_obj_abs_speed = np.zeros((len(obj_in_curr_seq), SEQ_LEN))
                _curr_obj_label = np.zeros((len(obj_in_curr_seq), SEQ_LEN))
                num_obj_considered = 0

                for _, obj_id in enumerate(obj_in_curr_seq):
                    curr_obj_seq = curr_seq_data[curr_seq_data[:, 1] == obj_id, :]
                    if MULTI_CONDITIONAL_MODEL:
                        label = curr_obj_seq[0, 2]
                    pad_front = frames.index(curr_obj_seq[0, 0]) - idx
                    pad_end = frames.index(curr_obj_seq[-1, 0]) - idx + 1
                    if pad_end - pad_front != SEQ_LEN:
                        continue
                    if MULTI_CONDITIONAL_MODEL:
                        if len(curr_obj_seq[:, 0]) != SEQ_LEN:
                            continue
                    if MULTI_CONDITIONAL_MODEL:
                        curr_obj_x_axis_new = [0.0] + [np.square(float(t) - float(s)) for s, t in
                                                   zip(curr_obj_seq[:, 3], curr_obj_seq[1:, 3])]
                        curr_obj_y_axis_new = [0.0] + [np.square(float(t) - float(s)) for s, t in
                                                   zip(curr_obj_seq[:, 4], curr_obj_seq[1:, 4])]
                    else:
                        curr_obj_x_axis_new = [np.square(t - s) for s, t in
                                                       zip(curr_obj_seq[:, 2], curr_obj_seq[1:, 2])]
                        curr_obj_y_axis_new = [np.square(t - s) for s, t in
                                                       zip(curr_obj_seq[:, 3], curr_obj_seq[1:, 3])]

                    curr_obj_dist = np.sqrt(np.add(curr_obj_x_axis_new, curr_obj_y_axis_new))
                    curr_obj_dist = np.insert(curr_obj_dist, 0, curr_obj_dist[0])
                    # As 50 records are available, we need to divide by 0.1 and we multiply by 10 as a normalization factor.
                    # For faster computing, we skip that step and directly pass through sigmoid layer
                    if SINGLE_CONDITIONAL_MODEL:
                        curr_obj_abs_speed = curr_obj_dist / FRAMES_PER_SECOND_SINGLE_CONDITION
                    else:
                        curr_obj_abs_speed = curr_obj_dist / (FRAMES_PER_SECOND_MULTI_CONDITION * NORMALIZATION_FACTOR)
                    #if any(0.66 <= i <= 1.32 for i in curr_obj_abs_speed):  #DURING TRAINING PHASE
                    if any(0.66 > i > 1.32 for i in curr_obj_abs_speed):  # DURING TESTING PHASE
                        continue


                    curr_obj_abs_speed = [sigmoid(x) for x in curr_obj_abs_speed]
                    curr_obj_abs_speed = np.around(curr_obj_abs_speed, decimals=4)
                    curr_obj_abs_speed = np.transpose(curr_obj_abs_speed)
                    _idx = num_obj_considered

                    if MULTI_CONDITIONAL_MODEL:
                        if label == 'AV':
                            embedding_label = 0.1
                        elif label == 'OTHERS':
                            embedding_label = 0.2
                        elif label == 'AGENT':
                            embedding_label = 0.3
                        curr_obj_seq = np.transpose(curr_obj_seq[:, 3:5])
                        _curr_obj_label[_idx, pad_front:pad_end] = embedding_label
                    else:
                        curr_obj_seq = np.transpose(curr_obj_seq[:, 2:])
                    curr_obj_seq = curr_obj_seq.astype(float)
                    curr_obj_seq = np.around(curr_obj_seq, decimals=4)

                    rel_curr_obj_seq = np.zeros(curr_obj_seq.shape)
                    rel_curr_obj_seq[:, 1:] = curr_obj_seq[:, 1:] - curr_obj_seq[:, :-1]
                    curr_seq[_idx, :, pad_front:pad_end] = curr_obj_seq
                    curr_seq_rel[_idx, :, pad_front:pad_end] = rel_curr_obj_seq

                    rel_curr_obj_speed = np.zeros(curr_obj_abs_speed.shape)
                    rel_curr_obj_speed[1:] = curr_obj_abs_speed[1:] - curr_obj_abs_speed[:-1]

                    curr_loss_mask[_idx, pad_front:pad_end] = 1
                    _curr_obj_abs_speed[_idx, pad_front:pad_end] = curr_obj_abs_speed
                    curr_seq_rel_speed[_idx, pad_front:pad_end] = rel_curr_obj_speed
                    num_obj_considered += 1

                if num_obj_considered > 1:
                    num_obj_in_seq.append(num_obj_considered)
                    loss_mask_list.append(curr_loss_mask[:num_obj_considered])
                    obj_abs_speed.append(_curr_obj_abs_speed[:num_obj_considered])
                    if MULTI_CONDITIONAL_MODEL:
                        obj_label.append(_curr_obj_label[:num_obj_considered])
                    seq_list.append(curr_seq[:num_obj_considered])
                    seq_list_rel.append(curr_seq_rel[:num_obj_considered])
                    obj_rel_speed.append(curr_seq_rel_speed[:num_obj_considered])

        self.num_seq = len(seq_list)
        seq_list = np.concatenate(seq_list, axis=0)
        seq_list_rel = np.concatenate(seq_list_rel, axis=0)
        obj_abs_speed = np.concatenate(obj_abs_speed, axis=0)
        obj_rel_speed = np.concatenate(obj_rel_speed, axis=0)
        loss_mask_list = np.concatenate(loss_mask_list, axis=0)
        obj_abs_speed = torch.from_numpy(obj_abs_speed).type(torch.float)
        obj_rel_speed = torch.from_numpy(obj_rel_speed).type(torch.float)
        if MULTI_CONDITIONAL_MODEL:
            obj_label = np.concatenate(obj_label, axis=0)

        # Convert numpy -> Torch Tensor
        self.obs_traj = torch.from_numpy(
            seq_list[:, :, :OBS_LEN]).type(torch.float)
        self.pred_traj = torch.from_numpy(
            seq_list[:, :, OBS_LEN:]).type(torch.float)
        self.obs_traj_rel = torch.from_numpy(
            seq_list_rel[:, :, :OBS_LEN]).type(torch.float)
        self.pred_traj_rel = torch.from_numpy(
            seq_list_rel[:, :, OBS_LEN:]).type(torch.float)

        self.obs_obj_abs_speed = obj_abs_speed[:, :OBS_LEN].unsqueeze(dim=1).type(torch.float)
        self.pred_obj_abs_speed = obj_abs_speed[:, OBS_LEN:].unsqueeze(dim=1).type(torch.float)

        self.obs_obj_rel_speed = obj_rel_speed[:, :OBS_LEN].unsqueeze(dim=1).type(torch.float)
        self.loss_mask = torch.from_numpy(loss_mask_list).type(torch.float)

        if MULTI_CONDITIONAL_MODEL:
            self.obs_obj_label = torch.from_numpy(obj_label[:, :OBS_LEN]).unsqueeze(dim=1).type(torch.float)
            self.pred_obj_label = torch.from_numpy(obj_label[:, OBS_LEN:]).unsqueeze(dim=1).type(torch.float)

        cum_start_idx = [0] + np.cumsum(num_obj_in_seq).tolist()
        self.seq_start_end = [
            (start, end)
            for start, end in zip(cum_start_idx, cum_start_idx[1:])
        ]

    def __len__(self):
        return self.num_seq

    def __getitem__(self, index):
        start, end = self.seq_start_end[index]
        if MULTI_CONDITIONAL_MODEL:
            out = [
                self.obs_traj[start:end, :], self.pred_traj[start:end, :],
                self.obs_traj_rel[start:end, :], self.pred_traj_rel[start:end, :],
                self.loss_mask[start:end, :], self.obs_obj_abs_speed[start:end, :],
                self.pred_obj_abs_speed[start:end, :], self.obs_obj_label[start:end, :],
                self.pred_obj_label[start:end, :], self.obs_obj_rel_speed[start:end, :]
            ]
        else:
            out = [
                self.obs_traj[start:end, :], self.pred_traj[start:end, :],
                self.obs_traj_rel[start:end, :], self.pred_traj_rel[start:end, :],
                self.loss_mask[start:end, :], self.obs_obj_abs_speed[start:end, :],
                self.pred_obj_abs_speed[start:end, :], self.obs_obj_rel_speed[start:end, :]
            ]
        return out

In [7]:
# MODEL ARCHITECTURE

In [8]:
import torch
import torch.nn as nn
import math
from scipy.spatial.distance import pdist, squareform
import numpy as np
import torch.nn.functional as F


def make_mlp(dim_list, activation='leakyrelu', batch_norm=True, dropout=0):
    layers = []
    for dim_in, dim_out in zip(dim_list[:-1], dim_list[1:]):
        layers.append(nn.Linear(dim_in, dim_out))
        if batch_norm:
            layers.append(nn.BatchNorm1d(dim_out))
        if activation == 'relu':
            layers.append(nn.ReLU())
        elif activation == 'leakyrelu':
            layers.append(nn.LeakyReLU())
        elif activation == 'sigmoid':
            layers.append(nn.Sigmoid())
        if dropout > 0:
            layers.append(nn.Dropout(p=dropout))
    return nn.Sequential(*layers)


class SpeedEncoderDecoder(nn.Module):
    def __init__(self, h_dim):
        super(SpeedEncoderDecoder, self).__init__()

        self.embedding_dim = EMBEDDING_DIM
        self.num_layers = NUM_LAYERS
        self.h_dim = h_dim

        self.speed_decoder = nn.LSTM(EMBEDDING_DIM, h_dim, NUM_LAYERS, dropout=DROPOUT)
        self.speed_mlp = nn.Linear(h_dim, 1)
        self.speed_embedding = nn.Linear(1, EMBEDDING_DIM)

    def init_hidden(self, batch):
        if USE_GPU:
            c_s, r_s = torch.zeros(self.num_layers, batch, self.h_dim).cuda(), torch.zeros(self.num_layers, batch, self.h_dim).cuda()
        else:
            c_s, r_s = torch.zeros(self.num_layers, batch, self.h_dim), torch.zeros(self.num_layers, batch, self.h_dim)
        return c_s, r_s

    def forward(self, obs_speed, final_enc_h, label=None):
        sig_layer = nn.Sigmoid()
        batch = obs_speed.size(1)
        pred_speed_fake = []
        final_enc_h = final_enc_h.view(-1, self.h_dim)
        next_speed = obs_speed[-1, :, :]
        decoder_input = self.speed_embedding(next_speed.view(-1, 1))
        decoder_input = decoder_input.view(1, batch, self.embedding_dim)

        decoder_h = final_enc_h.unsqueeze(dim=0)  # INITIALIZE THE DECODER HIDDEN STATE
        if USE_GPU:
            decoder_c = torch.zeros(self.num_layers, batch, self.h_dim).cuda()
        else:
            decoder_c = torch.zeros(self.num_layers, batch, self.h_dim)

        state_tuple = (decoder_h, decoder_c)  # INITIALIZE THE STATE TUPLES

        for id in range(PRED_LEN):
            output, state_tuple = self.speed_decoder(decoder_input, state_tuple)
            next_dec_speed = self.speed_mlp(output.view(-1, self.h_dim))
            next_speed = sig_layer(next_dec_speed.view(-1, 1))
            decoder_input = self.speed_embedding(next_speed.view(-1, 1))
            decoder_input = decoder_input.view(1, batch, self.embedding_dim)

            pred_speed_fake.append(next_speed.view(-1, 1))

        pred_speed_fake = torch.stack(pred_speed_fake, dim=0)
        return pred_speed_fake

class Encoder(nn.Module):
    def __init__(self, h_dim, mlp_input_dim):
        super(Encoder, self).__init__()

        self.mlp_dim = MLP_DIM
        self.h_dim = h_dim
        self.embedding_dim = EMBEDDING_DIM
        self.num_layers = NUM_LAYERS
        self.mlp_input_dim = mlp_input_dim

        self.encoder = nn.LSTM(EMBEDDING_DIM, h_dim, NUM_LAYERS, dropout=DROPOUT)

        self.spatial_embedding = nn.Linear(mlp_input_dim, EMBEDDING_DIM)

    def init_hidden(self, batch):
        if USE_GPU:
            c_s, r_s = torch.zeros(self.num_layers, batch, self.h_dim).cuda(), torch.zeros(self.num_layers, batch, self.h_dim).cuda()
        else:
            c_s, r_s = torch.zeros(self.num_layers, batch, self.h_dim), torch.zeros(self.num_layers, batch, self.h_dim)
        return c_s, r_s

    def forward(self, obs_traj, obs_ped_speed, label=None):
        batch = obs_traj.size(1)
        if MULTI_CONDITIONAL_MODEL:
            embedding_input = torch.cat([obs_traj, obs_ped_speed, label], dim=2)
        else:
            embedding_input = torch.cat([obs_traj, obs_ped_speed], dim=2)
        traj_speed_embedding = self.spatial_embedding(embedding_input.contiguous().view(-1, self.mlp_input_dim))
        obs_traj_embedding = traj_speed_embedding.view(-1, batch, self.embedding_dim)
        state_tuple = self.init_hidden(batch)
        output, state = self.encoder(obs_traj_embedding, state_tuple)
        final_h = state[0]
        return final_h


def sigmoid(x):
    return 1 / (1 + math.exp(-x))


class Decoder(nn.Module):
    def __init__(self, h_dim, mlp_input_dim):
        super(Decoder, self).__init__()

        self.mlp_dim = MLP_DIM
        self.h_dim = h_dim
        self.embedding_dim = EMBEDDING_DIM
        self.mlp_input_dim = mlp_input_dim

        self.decoder = nn.LSTM(EMBEDDING_DIM, h_dim, NUM_LAYERS, dropout=DROPOUT)

        self.spatial_embedding = nn.Linear(mlp_input_dim, EMBEDDING_DIM)
        self.hidden2pos = nn.Linear(h_dim, 2)

    def forward(self, last_pos, last_pos_rel, state_tuple, seq_start_end, pred_ped_speed, train_or_test, fake_ped_speed, label=None):
        batch = last_pos.size(0)
        pred_traj_fake_rel = []
        if train_or_test == 0:
            if MULTI_CONDITIONAL_MODEL:
                last_pos_speed = torch.cat([last_pos_rel, pred_ped_speed[0, :, :], label[0, :, :]], dim=1)
            else:
                last_pos_speed = torch.cat([last_pos_rel, pred_ped_speed[0, :, :]], dim=1)
        elif train_or_test == 1:  # USED FOR PREDICTION PURPOSE
            if MULTI_CONDITIONAL_MODEL:
                last_pos_speed = torch.cat([last_pos_rel, fake_ped_speed[0, :, :], label[0, :, :]], dim=1)
            else:
                last_pos_speed = torch.cat([last_pos_rel, fake_ped_speed[0, :, :]], dim=1)
        else:  # USED FOR SIMULATION PURPOSE
            if MULTI_CONDITIONAL_MODEL:
                next_speed = speed_control(pred_ped_speed[0, :, :], seq_start_end, label=label[0, :, :])
                last_pos_speed = torch.cat([last_pos_rel, next_speed, label[0, :, :]], dim=1)
            else:
                next_speed = speed_control(pred_ped_speed[0, :, :], seq_start_end)
                last_pos_speed = torch.cat([last_pos_rel, next_speed], dim=1)
        decoder_input = self.spatial_embedding(last_pos_speed)
        decoder_input = decoder_input.view(1, batch, self.embedding_dim)

        for id in range(PRED_LEN):
            output, state_tuple = self.decoder(decoder_input, state_tuple)
            rel_pos = self.hidden2pos(output.view(-1, self.h_dim))
            curr_pos = rel_pos + last_pos
            if id + 1 != PRED_LEN:
                if train_or_test == 0:
                    speed = pred_ped_speed[id + 1, :, :]
                    if MULTI_CONDITIONAL_MODEL:
                        curr_label = label[0, :, :]
                elif train_or_test == 1:
                    speed = fake_ped_speed[id + 1, :, :]
                    if MULTI_CONDITIONAL_MODEL:
                        curr_label = label[0, :, :]
                else:
                    if SINGLE_CONDITIONAL_MODEL:
                        speed = speed_control(pred_ped_speed[id, :, :], seq_start_end, id=id+1)
                    elif MULTI_CONDITIONAL_MODEL:
                        curr_label = label[0, :, :]
                        speed = speed_control(pred_ped_speed[0, :, :], seq_start_end, label=curr_label)
            if MULTI_CONDITIONAL_MODEL:
                decoder_input = torch.cat([rel_pos, speed, curr_label], dim=1)
            else:
                decoder_input = torch.cat([rel_pos, speed], dim=1)
            decoder_input = self.spatial_embedding(decoder_input)
            decoder_input = decoder_input.view(1, batch, self.embedding_dim)

            pred_traj_fake_rel.append(rel_pos.view(batch, -1))
            last_pos = curr_pos

        pred_traj_fake_rel = torch.stack(pred_traj_fake_rel, dim=0)
        return pred_traj_fake_rel


class PoolingModule(nn.Module):
    """Todo"""

    def __init__(self, h_dim, mlp_input_dim):
        super(PoolingModule, self).__init__()
        self.mlp_dim = MLP_DIM
        self.h_dim = h_dim
        self.bottleneck_dim = BOTTLENECK_DIM
        self.embedding_dim = EMBEDDING_DIM
        self.mlp_input_dim = mlp_input_dim

        mlp_pre_dim = self.embedding_dim + self.h_dim
        mlp_pre_pool_dims = [mlp_pre_dim, 512, BOTTLENECK_DIM]

        self.pos_embedding = nn.Linear(2, EMBEDDING_DIM)
        self.mlp_pre_pool = make_mlp(mlp_pre_pool_dims, activation=ACTIVATION_RELU, batch_norm=BATCH_NORM, dropout=DROPOUT)

    def forward(self, h_states, seq_start_end, train_or_test, last_pos, label=None):
        pool_h = []
        for _, (start, end) in enumerate(seq_start_end):
            start = start.item()
            end = end.item()
            num_ped = end - start
            curr_hidden_ped = h_states.view(-1, self.h_dim)[start:end]
            repeat_hstate = curr_hidden_ped.repeat(num_ped, 1).view(num_ped, num_ped, -1)
            feature = last_pos[start:end]
            curr_end_pos_1 = feature.repeat(num_ped, 1)
            curr_end_pos_2 = feature.unsqueeze(dim=1).repeat(1, num_ped, 1).view(-1, 2)
            social_features = curr_end_pos_1[:, :2] - curr_end_pos_2[:, :2]
            position_feature_embedding = self.pos_embedding(social_features.contiguous().view(-1, 2))
            pos_mlp_input = torch.cat(
                [repeat_hstate.view(-1, self.h_dim), position_feature_embedding.view(-1, self.embedding_dim)], dim=1)
            pos_attn_h = self.mlp_pre_pool(pos_mlp_input)
            curr_pool_h = pos_attn_h.view(num_ped, num_ped, -1).max(1)[0]
            pool_h.append(curr_pool_h)
        pool_h = torch.cat(pool_h, dim=0)
        return pool_h


class AggregationModule(nn.Module):

    def __init__(self, h_dim, mlp_input_dim):
        super(AggregationModule, self).__init__()
        self.mlp_dim = MLP_DIM
        self.h_dim = h_dim
        self.bottleneck_dim = BOTTLENECK_DIM
        self.embedding_dim = EMBEDDING_DIM
        self.mlp_input_dim = mlp_input_dim

        mlp_pre_dim = self.h_dim * MAX_CONSIDERED_PED
        mlp_pre_pool_dims = [mlp_pre_dim, 512, BOTTLENECK_DIM]

        self.pos_embedding = nn.Linear(2, EMBEDDING_DIM)
        self.mlp_pre_pool = make_mlp(mlp_pre_pool_dims, activation=ACTIVATION_RELU, batch_norm=BATCH_NORM, dropout=DROPOUT)

    def forward(self, h_states, seq_start_end, train_or_test, last_pos, label=None):
        pool_h = []
        for _, (start, end) in enumerate(seq_start_end):
            start = start.item()
            end = end.item()
            num_ped = end - start
            curr_hidden_ped = h_states.view(-1, self.h_dim)[start:end]

            feature = last_pos[start:end].cpu().data.numpy()
            dist = squareform(pdist(feature, metric="euclidean"))
            idx = np.argsort(dist)
            req_h_states = []
            for ids in idx:
                req_ids = torch.from_numpy(ids).type(torch.cuda.FloatTensor).view(num_ped, 1)
                new_h_states = torch.cat([curr_hidden_ped, req_ids], dim=1)
                sorted = new_h_states[new_h_states[:, -1].sort()[1]]
                required_h_states = sorted[:, :-1].contiguous().view(1, -1)
                if num_ped >= MAX_CONSIDERED_PED:
                    req_h_states.append(required_h_states[:, :(MAX_CONSIDERED_PED*self.h_dim)])
                else:
                    h_state_zeros = torch.zeros(1, self.h_dim * MAX_CONSIDERED_PED).cuda()
                    h_state_zeros[:, :self.h_dim * num_ped] = required_h_states
                    req_h_states.append(h_state_zeros)
            aggregated_h_states = torch.cat(req_h_states, dim=0)
            agg_h = self.mlp_pre_pool(aggregated_h_states)
            pool_h.append(agg_h)
        pool_h = torch.cat(pool_h, dim=0)
        return pool_h


class AttentionModule(nn.Module):

    def __init__(self, h_dim, mlp_input_dim):
        super(AttentionModule, self).__init__()
        self.mlp_dim = MLP_DIM
        self.h_dim = h_dim
        self.bottleneck_dim = BOTTLENECK_DIM
        self.embedding_dim = EMBEDDING_DIM
        self.mlp_input_dim = mlp_input_dim

        mlp_pre_dim = self.h_dim + self.embedding_dim
        mlp_pre_pool_dims = [mlp_pre_dim, 512, BOTTLENECK_DIM]
        self.attn = nn.Linear(MAX_CONSIDERED_PED*BOTTLENECK_DIM, MAX_CONSIDERED_PED)

        self.pos_embedding = nn.Linear(2, EMBEDDING_DIM)
        self.mlp_pre_pool = make_mlp(mlp_pre_pool_dims, activation=ACTIVATION_RELU, batch_norm=BATCH_NORM, dropout=DROPOUT)

    def forward(self, h_states, seq_start_end, train_or_test, last_pos, label=None):
        pool_h = []
        for _, (start, end) in enumerate(seq_start_end):
            start = start.item()
            end = end.item()
            num_ped = end - start
            curr_hidden_ped = h_states.view(-1, self.h_dim)[start:end]
            repeat_hstate = curr_hidden_ped.repeat(num_ped, 1).view(num_ped, num_ped, -1)
            feature = last_pos[start:end]
            curr_end_pos_1 = feature.repeat(num_ped, 1)
            curr_end_pos_2 = feature.unsqueeze(dim=1).repeat(1, num_ped, 1).view(-1, 2)
            social_features = curr_end_pos_1[:, :2] - curr_end_pos_2[:, :2]
            feature = last_pos[start:end].cpu().data.numpy()
            dist = squareform(pdist(feature, metric="euclidean"))
            idx = np.argsort(dist)
            req_h_states = []
            social_features = social_features.view(num_ped, num_ped, 2)
            if num_ped < MAX_CONSIDERED_PED:
                social_feature_embedding = self.pos_embedding(social_features.contiguous().view(-1, 2))
                h_state_zeros = torch.zeros(num_ped, MAX_CONSIDERED_PED, self.h_dim).cuda()
                feature_zeros = torch.zeros(num_ped, MAX_CONSIDERED_PED, self.embedding_dim).cuda()
                h_state_zeros[:num_ped, :num_ped, :] = repeat_hstate.view(num_ped, num_ped, self.h_dim)
                feature_zeros[:num_ped, :num_ped, :] = social_feature_embedding.view(num_ped, num_ped, self.embedding_dim)
                concat_features = torch.cat([h_state_zeros.view(-1, self.h_dim), feature_zeros.view(-1, self.embedding_dim)], dim=1)
            else:
                for ids, curr_features, curr_h_states in zip(idx, social_features, repeat_hstate):
                    req_ids = torch.from_numpy(ids).type(torch.cuda.FloatTensor).view(num_ped, 1)
                    new_h_states = torch.cat([curr_h_states, req_ids], dim=1)
                    new_features = torch.cat([curr_features, req_ids], dim=1)
                    h_states_sorted = new_h_states[new_h_states[:, -1].sort()[1]][:MAX_CONSIDERED_PED, :]
                    features_sorted = new_features[new_features[:, -1].sort()[1]][:MAX_CONSIDERED_PED, :]
                    required_h_states = h_states_sorted[:, :-1]
                    required_features = features_sorted[:, :-1]
                    social_feature_embedding = self.pos_embedding(required_features.contiguous().view(-1, 2))
                    req_h_states.append(torch.cat([required_h_states, social_feature_embedding], dim=1))
                concat_features = torch.stack(req_h_states, dim=0)
            attn_h = self.mlp_pre_pool(concat_features.view(-1, (self.h_dim+self.embedding_dim)))
            attn_h = attn_h.view(num_ped, MAX_CONSIDERED_PED, -1)
            attn_w = F.softmax(self.attn(attn_h.view(num_ped, -1)), dim=1)
            attn_w = attn_w.view(num_ped, MAX_CONSIDERED_PED, 1)
            attn_h = torch.sum(attn_h * attn_w, dim=1)
            pool_h.append(attn_h)
        pool_h = torch.cat(pool_h, dim=0)
        return pool_h


def speed_control(pred_traj_first_speed, seq_start_end, label=None, id=None):
    """This method acts as speed regulator. Using this method, user can add
    speed at one/more frames, stop the agent and so on"""
    for _, (start, end) in enumerate(seq_start_end):
        start = start.item()
        end = end.item()
        if MULTI_CONDITIONAL_MODEL:
            av_tensor = [1, 0, 0]
            av = torch.FloatTensor(av_tensor)
            other_tensor = [0, 1, 0]
            other = torch.FloatTensor(other_tensor)
            agent_tensor = [0, 0, 1]
            agent = torch.FloatTensor(agent_tensor)
            if DIFFERENT_SPEED_MULTI_CONDITION:
                for a, b in zip(range(start, end), label):
                    if torch.all(torch.eq(b, av)):
                        pred_traj_first_speed[a] = sigmoid(AV_SPEED * AV_MAX_SPEED)
                    elif torch.all(torch.eq(b, other)):
                        pred_traj_first_speed[a] = sigmoid(OTHER_SPEED * OTHER_MAX_SPEED)
                    elif torch.all(torch.eq(b, agent)):
                        pred_traj_first_speed[a] = sigmoid(AGENT_SPEED * AGENT_MAX_SPEED)
            elif CONSTANT_SPEED_MULTI_CONDITION:
                # To make all pedestrians travel at same and constant speed throughout
                for a, b in zip(range(start, end), label):
                    if torch.eq(b, 0.1):
                        pred_traj_first_speed[a] = sigmoid(CS_MULTI_CONDITION * AV_MAX_SPEED)
                    elif torch.eq(b, 0.2):
                        pred_traj_first_speed[a] = sigmoid(CS_MULTI_CONDITION * OTHER_MAX_SPEED)
                    elif torch.eq(b, 0.3):
                        pred_traj_first_speed[a] = sigmoid(CS_MULTI_CONDITION * AGENT_MAX_SPEED)
        elif SINGLE_CONDITIONAL_MODEL:
            if CONSTANT_SPEED_SINGLE_CONDITION:
                dataset_name = get_dataset_name(SINGLE_TEST_DATASET_PATH)
                if dataset_name == 'eth':
                    speed_to_simulate = ZARA1_MAX_SPEED * CS_SINGLE_CONDITION
                elif dataset_name == 'hotel':
                    speed_to_simulate = ETH_MAX_SPEED * CS_SINGLE_CONDITION
                elif dataset_name == 'univ':
                    speed_to_simulate = ZARA1_MAX_SPEED * CS_SINGLE_CONDITION
                elif dataset_name == 'zara1':
                    speed_to_simulate = ETH_MAX_SPEED * CS_SINGLE_CONDITION
                elif dataset_name == 'zara2':
                    speed_to_simulate = ETH_MAX_SPEED * CS_SINGLE_CONDITION

                # To add an additional speed for each pedestrain and every frame
                for a in range(start, end):
                    pred_traj_first_speed[a] = sigmoid(speed_to_simulate)

            elif STOP_PED_SINGLE_CONDITION:
                # To stop all pedestrians
                for a in range(start, end):
                    pred_traj_first_speed[a] = sigmoid(0)
            elif ADD_SPEED_PARTICULAR_FRAME and len(FRAMES_TO_ADD_SPEED) > 0:
                for a in range(start, end):
                    # Add speed to particular frame for all pedestrian
                    sorted_frames = FRAMES_TO_ADD_SPEED
                    for frames in sorted_frames:
                        if id == frames:
                            pred_traj_first_speed[a] = sigmoid(ETH_MAX_SPEED * MAX_SPEED)
                        else:
                            pred_traj_first_speed[a] = pred_traj_first_speed[a]

    return pred_traj_first_speed.view(-1, 1)


class TrajectoryGenerator(nn.Module):
    def __init__(self, mlp_dim, h_dim):
        super(TrajectoryGenerator, self).__init__()

        self.mlp_dim = MLP_DIM
        self.h_dim = h_dim

        self.mlp_input_dim = mlp_dim
        self.h_dim = h_dim

        self.embedding_dim = EMBEDDING_DIM
        self.noise_dim = NOISE_DIM
        self.num_layers = NUM_LAYERS
        self.bottleneck_dim = BOTTLENECK_DIM

        self.encoder = Encoder(h_dim=h_dim, mlp_input_dim=mlp_dim)
        self.decoder = Decoder(h_dim = h_dim, mlp_input_dim=mlp_dim)

        self.noise_first_dim = NOISE_DIM[0]

        if POOLING_TYPE:
            self.conditionalPoolingModule = PoolingModule(h_dim=h_dim, mlp_input_dim=mlp_dim)
            mlp_decoder_context_dims = [h_dim + BOTTLENECK_DIM, MLP_DIM, h_dim - self.noise_first_dim]
        elif AGGREGATION_TYPE:
            self.aggregation_module = AggregationModule(h_dim=h_dim, mlp_input_dim=mlp_dim)
            mlp_decoder_context_dims = [h_dim + BOTTLENECK_DIM, MLP_DIM, h_dim - self.noise_first_dim]
        elif ATTENTION_TYPE:
            self.attention_module = AttentionModule(h_dim=h_dim, mlp_input_dim=mlp_dim)
            mlp_decoder_context_dims = [h_dim + BOTTLENECK_DIM, MLP_DIM, h_dim - self.noise_first_dim]
        else:
            mlp_decoder_context_dims = [h_dim, MLP_DIM, h_dim - self.noise_first_dim]

        self.mlp_decoder_context = make_mlp(mlp_decoder_context_dims, activation=ACTIVATION_RELU, batch_norm=BATCH_NORM,
                                            dropout=DROPOUT)

    def add_noise(self, _input, seq_start_end):
        noise_shape = (seq_start_end.size(0),) + self.noise_dim
        if USE_GPU:
            z_decoder = torch.randn(*noise_shape).cuda()
        else:
            z_decoder = torch.randn(*noise_shape)
        _list = []
        for idx, (start, end) in enumerate(seq_start_end):
            noise = z_decoder[idx].view(1, -1).repeat(end.item() - start.item(), 1)
            _list.append(torch.cat([_input[start:end], noise], dim=1))
        decoder_h = torch.cat(_list, dim=0)
        return decoder_h

    def forward(self, obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed, pred_traj, train_or_test, fake_ped_speed, obs_obj_rel_speed, obs_label=None, pred_label=None, user_noise=None):
        batch = obs_traj_rel.size(1)
        if MULTI_CONDITIONAL_MODEL:
            final_encoder_h = self.encoder(obs_traj_rel, obs_ped_speed, label=obs_label)
        else:
            final_encoder_h = self.encoder(obs_traj_rel, obs_ped_speed, label=None)
        if POOLING_TYPE:
            pm_final_vector = self.conditionalPoolingModule(final_encoder_h, seq_start_end, train_or_test, obs_traj[-1, :, :])
            mlp_decoder_context_input = torch.cat([final_encoder_h.view(-1, self.h_dim), pm_final_vector], dim=1)
        elif AGGREGATION_TYPE:
            agg_final_vector = self.aggregation_module(final_encoder_h, seq_start_end, train_or_test, obs_traj[-1, :, :])
            mlp_decoder_context_input = torch.cat([final_encoder_h.view(-1, self.h_dim), agg_final_vector], dim=1)
        elif ATTENTION_TYPE:
            attn_final_vector = self.attention_module(final_encoder_h, seq_start_end, train_or_test, obs_traj[-1, :, :])
            mlp_decoder_context_input = torch.cat([final_encoder_h.view(-1, self.h_dim), attn_final_vector], dim=1)
        else:
            mlp_decoder_context_input = final_encoder_h.view(-1, self.h_dim)
        noise_input = self.mlp_decoder_context(mlp_decoder_context_input)

        decoder_h = self.add_noise(noise_input, seq_start_end).unsqueeze(dim=0)
        if USE_GPU:
            decoder_c = torch.zeros(self.num_layers, batch, self.h_dim).cuda()
        else:
            decoder_c = torch.zeros(self.num_layers, batch, self.h_dim)

        state_tuple = (decoder_h, decoder_c)

        if MULTI_CONDITIONAL_MODEL:
            decoder_out = self.decoder(obs_traj[-1], obs_traj_rel[-1], state_tuple, seq_start_end, pred_ped_speed,
            train_or_test, fake_ped_speed, label=pred_label)
        else:
            decoder_out = self.decoder(obs_traj[-1], obs_traj_rel[-1], state_tuple, seq_start_end, pred_ped_speed,
            train_or_test, fake_ped_speed)
        pred_traj_fake_rel = decoder_out

        # LOGGING THE OUTPUT FOR MULTI CONDITIONAL MODEL WHEN THE PREDICTED LENGTH IS MORE - useful to check the speed condition
        if train_or_test == 3:
            simulated_trajectories = []
            for _, (start, end) in enumerate(seq_start_end):
                start = start.item()
                end = end.item()
                obs_test_traj = obs_traj[:, start:end, :]
                pred_test_traj_rel = pred_traj_fake_rel[:, start:end, :]
                pred_test_traj = relative_to_abs(pred_test_traj_rel, obs_test_traj[-1])
                speed_added = pred_ped_speed[0, start:end, :]
                print(speed_added)
                print(pred_test_traj)
                simulated_trajectories.append(pred_test_traj)
        return pred_traj_fake_rel, decoder_h.view(-1, self.h_dim)


class TrajectoryDiscriminator(nn.Module):
    def __init__(self, h_dim, mlp_dim):
        super(TrajectoryDiscriminator, self).__init__()

        self.encoder = Encoder(h_dim, mlp_input_dim=mlp_dim)

        real_classifier_dims = [h_dim, MLP_DIM, 1]
        self.real_classifier = make_mlp(real_classifier_dims, activation=ACTIVATION_RELU, batch_norm=BATCH_NORM, dropout=DROPOUT)

    def forward(self, traj, traj_rel, ped_speed, label=None):
        if MULTI_CONDITIONAL_MODEL:
            final_h = self.encoder(traj_rel, ped_speed, label=label)  # final layer of the encoder is returned
        else:
            final_h = self.encoder(traj_rel, ped_speed, label=None)  # final layer of the encoder is returned
        scores = self.real_classifier(final_h.squeeze())  # mlp - 64 --> 1024 --> 1
        return scores


In [9]:
def init_weights(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        nn.init.kaiming_normal_(m.weight)

In [10]:
print("Process Started")
train_path = SINGLE_TRAIN_DATASET_PATH
val_path = SINGLE_VAL_DATASET_PATH
print("Initializing train dataset")
train_dset, train_loader = data_loader(train_path, 0, 'train')
print("Initializing val dataset")
_, val_loader = data_loader(val_path, 0, 'val')

Process Started
Initializing train dataset
Initializing val dataset


In [11]:
iterations_per_epoch = len(train_dset) / BATCH_SINGLE_CONDITION / D_STEPS

NUM_ITERATIONS = int(iterations_per_epoch * NUM_EPOCHS_SINGLE_CONDITION)
generator = TrajectoryGenerator(mlp_dim=MLP_INPUT_DIM_SINGLE_CONDITION,
                                h_dim=H_DIM_GENERATOR_SINGLE_CONDITION)
discriminator = TrajectoryDiscriminator(mlp_dim=MLP_INPUT_DIM_SINGLE_CONDITION,
                                        h_dim=H_DIM_DISCRIMINATOR_SINGLE_CONDITION)
speed_regressor = SpeedEncoderDecoder(h_dim=H_DIM_GENERATOR_SINGLE_CONDITION)
required_epoch = NUM_EPOCHS_SINGLE_CONDITION

print(iterations_per_epoch)
generator.apply(init_weights)
generator.type(torch.cuda.FloatTensor).train()
print('Here is the generator:')
print(generator)

discriminator.apply(init_weights)
discriminator.type(torch.cuda.FloatTensor).train()
print('Here is the discriminator:')
print(discriminator)

speed_regressor.apply(init_weights)
speed_regressor.type(torch.cuda.FloatTensor).train()
print('Here is the Speed Regressor:')
print(speed_regressor)

56.125
Here is the generator:
TrajectoryGenerator(
  (encoder): Encoder(
    (encoder): LSTM(16, 32)
    (spatial_embedding): Linear(in_features=3, out_features=16, bias=True)
  )
  (decoder): Decoder(
    (decoder): LSTM(16, 32)
    (spatial_embedding): Linear(in_features=3, out_features=16, bias=True)
    (hidden2pos): Linear(in_features=32, out_features=2, bias=True)
  )
  (aggregation_module): AggregationModule(
    (pos_embedding): Linear(in_features=2, out_features=16, bias=True)
    (mlp_pre_pool): Sequential(
      (0): Linear(in_features=160, out_features=512, bias=True)
      (1): ReLU()
      (2): Linear(in_features=512, out_features=32, bias=True)
      (3): ReLU()
    )
  )
  (mlp_decoder_context): Sequential(
    (0): Linear(in_features=64, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=24, bias=True)
    (3): ReLU()
  )
)
Here is the discriminator:
TrajectoryDiscriminator(
  (encoder): Encoder(
    (encoder): LSTM(16, 64)
    (sp

In [12]:
g_loss_fn = gan_g_loss
d_loss_fn = gan_d_loss

optimizer_g = torch.optim.Adam(generator.parameters(), lr=G_LEARNING_RATE)
optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=D_LEARNING_RATE)
optimizer_speed_regressor = torch.optim.Adam(speed_regressor.parameters(), lr=D_LEARNING_RATE)

In [13]:
t, epoch = 0, 0
checkpoint = {
    'G_losses': defaultdict(list),
    'D_losses': defaultdict(list),
    'g_state': None,
    'g_optim_state': None,
    'd_state': None,
    'd_optim_state': None,
    'g_best_state': None,
    'd_best_state': None
}
ade_list, fde_list, train_ade, train_fde, avg_speed_error, f_speed_error = [], [], [], [], [], []


In [14]:
def discriminator_step(batch, generator, discriminator, d_loss_fn, optimizer_d):
    """This step is similar to Social GAN Code"""
    if USE_GPU:
        batch = [tensor.cuda() for tensor in batch]
    else:
        batch = [tensor for tensor in batch]
    if MULTI_CONDITIONAL_MODEL:
        (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed, pred_ped_speed, obs_label, pred_label,
         obs_obj_rel_speed) = batch
        generator_out, _ = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                  pred_traj_gt, TRAIN_METRIC, None, obs_obj_rel_speed, obs_label=obs_label, pred_label=pred_label)
    else:
        (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed, pred_ped_speed, obs_obj_rel_speed) = batch
        generator_out, _ = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                  pred_traj_gt, TRAIN_METRIC, None, obs_obj_rel_speed, obs_label=None, pred_label=None)

    losses = {}
    loss = torch.zeros(1).to(pred_traj_gt)

    pred_traj_fake_rel = generator_out
    pred_traj_fake = relative_to_abs(pred_traj_fake_rel, obs_traj[-1])

    traj_real = torch.cat([obs_traj, pred_traj_gt], dim=0)
    traj_real_rel = torch.cat([obs_traj_rel, pred_traj_gt_rel], dim=0)
    traj_fake = torch.cat([obs_traj, pred_traj_fake], dim=0)
    traj_fake_rel = torch.cat([obs_traj_rel, pred_traj_fake_rel], dim=0)
    ped_speed = torch.cat([obs_ped_speed, pred_ped_speed], dim=0)
    if MULTI_CONDITIONAL_MODEL:
        label_info = torch.cat([obs_label, pred_label], dim=0)
        scores_fake = discriminator(traj_fake, traj_fake_rel, ped_speed, label=label_info)
        scores_real = discriminator(traj_real, traj_real_rel, ped_speed, label=label_info)
    else:
        scores_fake = discriminator(traj_fake, traj_fake_rel, ped_speed, label=None)
        scores_real = discriminator(traj_real, traj_real_rel, ped_speed, label=None)

    data_loss = d_loss_fn(scores_real, scores_fake)
    losses['D_data_loss'] = data_loss.item()
    loss += data_loss
    losses['D_total_loss'] = loss.item()

    optimizer_d.zero_grad()
    loss.backward()
    optimizer_d.step()

    return losses


def speed_regressor_step(batch, generator, speed_regressor, optimizer_speed_regressor):
    losses = {}
    speed_loss = []

    if USE_GPU:
        batch = [tensor.cuda() for tensor in batch]
    else:
        batch = [tensor for tensor in batch]
    if MULTI_CONDITIONAL_MODEL:
        (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed, pred_ped_speed,
        obs_label, pred_label, obs_obj_rel_speed) = batch
    else:
        (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed,
         pred_ped_speed, obs_obj_rel_speed) = batch

    if MULTI_CONDITIONAL_MODEL:
        _, final_enc_h = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                              pred_traj_gt, TRAIN_METRIC, None, obs_obj_rel_speed, obs_label=obs_label, pred_label=pred_label)
    else:
        _, final_enc_h = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                  pred_traj_gt, TRAIN_METRIC, None, obs_obj_rel_speed, obs_label=None, pred_label=None)

    fake_ped_speed = speed_regressor(obs_ped_speed, final_enc_h)
    loss_mask = loss_mask[:, OBS_LEN:]

    speed_loss.append(L2_LOSS_WEIGHT * mae_loss(
            fake_ped_speed,
            pred_ped_speed,
            mode='raw',
            speed_reg='speed_regressor'))

    total_speed_loss = torch.zeros(1).to(pred_ped_speed)
    speed_loss = torch.stack(speed_loss, dim=1)

    for start, end in seq_start_end.data:
        _speed_loss = speed_loss[start:end]
        _speed_loss = torch.sum(_speed_loss, dim=0)
        _speed_loss = torch.min(_speed_loss) / torch.sum(loss_mask[start:end])
        total_speed_loss += _speed_loss
    losses['Speed_Regression_Loss'] = total_speed_loss.item()

    optimizer_speed_regressor.zero_grad()
    total_speed_loss.backward()
    optimizer_speed_regressor.step()

    return losses


def generator_step(batch, generator, discriminator, g_loss_fn, optimizer_g):
    """This step is similar to Social GAN Code"""
    if USE_GPU:
        batch = [tensor.cuda() for tensor in batch]
    else:
        batch = [tensor for tensor in batch]
    if MULTI_CONDITIONAL_MODEL:
        (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed, pred_ped_speed,
        obs_label, pred_label, obs_obj_rel_speed) = batch
    else:
        (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed, pred_ped_speed, obs_obj_rel_speed) = batch

    losses = {}
    loss = torch.zeros(1).to(pred_traj_gt)
    g_l2_loss_rel = []

    loss_mask = loss_mask[:, OBS_LEN:]

    for _ in range(BEST_K):
        if MULTI_CONDITIONAL_MODEL:
            generator_out, final_enc_h = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                  pred_traj_gt, TRAIN_METRIC, None, obs_obj_rel_speed, obs_label=obs_label, pred_label=pred_label)
        else:
            generator_out, final_enc_h = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                      pred_traj_gt, TRAIN_METRIC, None, obs_obj_rel_speed, obs_label=None, pred_label=None)

        pred_traj_fake_rel = generator_out
        pred_traj_fake = relative_to_abs(pred_traj_fake_rel, obs_traj[-1])

        if L2_LOSS_WEIGHT > 0:
            g_l2_loss_rel.append(L2_LOSS_WEIGHT * l2_loss(
                pred_traj_fake_rel,
                pred_traj_gt_rel,
                loss_mask,
                mode='raw'))

    g_l2_loss_sum_rel = torch.zeros(1).to(pred_traj_gt)
    if L2_LOSS_WEIGHT > 0:
        g_l2_loss_rel = torch.stack(g_l2_loss_rel, dim=1)
        for start, end in seq_start_end.data:
            _g_l2_loss_rel = g_l2_loss_rel[start:end]
            _g_l2_loss_rel = torch.sum(_g_l2_loss_rel, dim=0)
            _g_l2_loss_rel = torch.min(_g_l2_loss_rel) / torch.sum(loss_mask[start:end])
            g_l2_loss_sum_rel += _g_l2_loss_rel
        losses['G_l2_loss_rel'] = g_l2_loss_sum_rel.item()
        loss += g_l2_loss_sum_rel
    traj_fake = torch.cat([obs_traj, pred_traj_fake], dim=0)
    traj_fake_rel = torch.cat([obs_traj_rel, pred_traj_fake_rel], dim=0)
    ped_speed = torch.cat([obs_ped_speed, pred_ped_speed], dim=0)
    if MULTI_CONDITIONAL_MODEL:
        label_info = torch.cat([obs_label, pred_label], dim=0)
        scores_fake = discriminator(traj_fake, traj_fake_rel, ped_speed, label=label_info)
    else:
        scores_fake = discriminator(traj_fake, traj_fake_rel, ped_speed, label=None)
    discriminator_loss = g_loss_fn(scores_fake)

    loss += discriminator_loss
    losses['G_discriminator_loss'] = discriminator_loss.item()
    losses['G_total_loss'] = loss.item()

    optimizer_g.zero_grad()
    loss.backward()
    optimizer_g.step()

    return losses


def check_accuracy(loader, generator, discriminator, d_loss_fn, speed_regressor):
    d_losses = []
    metrics = {}
    g_l2_losses_abs, g_l2_losses_rel = ([],) * 2
    disp_error, f_disp_error, mean_speed_disp_error, final_speed_disp_error = [], [], [], []
    total_traj = 0
    loss_mask_sum = 0
    generator.eval()
    with torch.no_grad():
        for batch in loader:
            if USE_GPU:
                batch = [tensor.cuda() for tensor in batch]
            else:
                batch = [tensor for tensor in batch]
            if MULTI_CONDITIONAL_MODEL:
                (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed,
                 pred_ped_speed, obs_label, pred_label, obs_obj_rel_speed) = batch
            else:
                (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed,
                 pred_ped_speed, obs_obj_rel_speed) = batch

            if MULTI_CONDITIONAL_MODEL:
                pred_traj_fake_rel, final_enc_h = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                  pred_traj_gt, TRAIN_METRIC, None, obs_obj_rel_speed, obs_label=obs_label, pred_label=pred_label)
            else:
                pred_traj_fake_rel, final_enc_h = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                      pred_traj_gt, TRAIN_METRIC, None, obs_obj_rel_speed, obs_label=None, pred_label=None)

            fake_ped_speed = speed_regressor(obs_ped_speed, final_enc_h)

            pred_traj_fake = relative_to_abs(pred_traj_fake_rel, obs_traj[-1])
            loss_mask = loss_mask[:, OBS_LEN:]

            g_l2_loss_abs, g_l2_loss_rel = cal_l2_losses(
                pred_traj_gt, pred_traj_gt_rel, pred_traj_fake,
                pred_traj_fake_rel, loss_mask
            )

            abs_speed_los = cal_mae_speed_loss(pred_ped_speed, fake_ped_speed)
            ade = displacement_error(pred_traj_gt, pred_traj_fake)
            fde = final_displacement_error(pred_traj_gt, pred_traj_fake)

            traj_real = torch.cat([obs_traj, pred_traj_gt], dim=0)
            traj_real_rel = torch.cat([obs_traj_rel, pred_traj_gt_rel], dim=0)
            traj_fake = torch.cat([obs_traj, pred_traj_fake], dim=0)
            traj_fake_rel = torch.cat([obs_traj_rel, pred_traj_fake_rel], dim=0)
            ped_speed = torch.cat([obs_ped_speed, pred_ped_speed], dim=0)
            if MULTI_CONDITIONAL_MODEL:
                label_info = torch.cat([obs_label, pred_label], dim=0)
                scores_fake = discriminator(traj_fake, traj_fake_rel, ped_speed, label=label_info)
                scores_real = discriminator(traj_real, traj_real_rel, ped_speed, label=label_info)
            else:
                scores_fake = discriminator(traj_fake, traj_fake_rel, ped_speed, label=None)
                scores_real = discriminator(traj_real, traj_real_rel, ped_speed, label=None)

            d_loss = d_loss_fn(scores_real, scores_fake)
            d_losses.append(d_loss.item())

            g_l2_losses_abs.append(g_l2_loss_abs.item())
            g_l2_losses_rel.append(g_l2_loss_rel.item())
            disp_error.append(ade.item())
            f_disp_error.append(fde.item())
            mean_speed_disp_error.append(abs_speed_los.item())

            loss_mask_sum += torch.numel(loss_mask.data)
            total_traj += pred_traj_gt.size(1)
            if total_traj >= NUM_SAMPLE_CHECK:
                break

    metrics['d_loss'] = sum(d_losses) / len(d_losses)
    metrics['g_l2_loss_abs'] = sum(g_l2_losses_abs) / loss_mask_sum
    metrics['g_l2_loss_rel'] = sum(g_l2_losses_rel) / loss_mask_sum
    metrics['ade'] = sum(disp_error) / (total_traj * PRED_LEN)
    metrics['fde'] = sum(f_disp_error) / total_traj
    metrics['mean_l2_speed'] = sum(mean_speed_disp_error) / len(mean_speed_disp_error)

    generator.train()
    return metrics


def cal_l2_losses(pred_traj_gt, pred_traj_gt_rel, pred_traj_fake, pred_traj_fake_rel, loss_mask):
    g_l2_loss_abs = l2_loss(pred_traj_fake, pred_traj_gt, loss_mask, mode='sum')
    g_l2_loss_rel = l2_loss(pred_traj_fake_rel, pred_traj_gt_rel, loss_mask, mode='sum')
    return g_l2_loss_abs, g_l2_loss_rel


def cal_mae_speed_loss(pred_speed_gt, pred_speed_fake):
    g_l2_speed_loss = mae_loss(pred_speed_gt, pred_speed_fake, speed_reg='speed_reg', mode='sum')
    return g_l2_speed_loss


def cal_msae(real_speed, fake_traj):
    fake_output_speed = fake_speed(fake_traj)
    real_speed = real_speed.permute(1, 0, 2)
    msae = mean_speed_error(real_speed, fake_output_speed)
    return msae


def fake_speed(fake_traj):
    output_speed = []
    sigmoid_speed = nn.Sigmoid()
    for a, b in zip(fake_traj[:, :], fake_traj[1:, :]):
        dist = torch.pairwise_distance(a, b)
        speed = sigmoid_speed(dist)
        output_speed.append(speed.view(1, -1))
    output_fake_speed = torch.cat(output_speed, dim=0).unsqueeze(dim=2).permute(1, 0, 2)
    return output_fake_speed


def cal_fse(real_speed, fake_traj):
    last_two_traj_info = fake_traj[-2:, :, :]
    fake_output_speed = fake_speed(last_two_traj_info)
    fse = final_speed_error(real_speed.unsqueeze(dim=2), fake_output_speed)
    return fse


In [15]:
val_ade_list, val_fde_list, train_ade, train_fde, train_avg_speed_error, val_avg_speed_error, val_msae_list = [], [], [], [], [], [], []
train_ade_list, train_fde_list = [], []
while epoch < required_epoch:
    gc.collect()
    d_steps_left, g_steps_left, speed_regression_steps_left = D_STEPS, G_STEPS, SR_STEPS
    epoch += 1
    print('Starting epoch {}'.format(epoch))
    disc_loss, gent_loss, sr_loss = [], [], []
    for batch in train_loader:
        if d_steps_left > 0:
            losses_d = discriminator_step(batch, generator, discriminator, d_loss_fn, optimizer_d)
            disc_loss.append(losses_d['D_total_loss'])
            d_steps_left -= 1
        elif g_steps_left > 0:
            losses_g = generator_step(batch, generator, discriminator, g_loss_fn, optimizer_g)
            speed_regression_loss = speed_regressor_step(batch, generator, speed_regressor, optimizer_speed_regressor)
            losses_g['Speed_Regression_Loss'] = speed_regression_loss['Speed_Regression_Loss']
            sr_loss.append(speed_regression_loss['Speed_Regression_Loss'])
            gent_loss.append(losses_g['G_discriminator_loss'])
            g_steps_left -= 1

        if d_steps_left > 0 or g_steps_left > 0:
            continue

        #gen_writer.add_scalar('loss', gent_loss[0], epoch)
        #dis_writer.add_scalar('loss', disc_loss[0], epoch)
        #gen_writer.close()
        #dis_writer.close()
        #disc_loss.clear()
        #gent_loss.clear()

        if t > 0 and t % CHECKPOINT_EVERY == 0:

            print('t = {} / {}'.format(t + 1, NUM_ITERATIONS))
            for k, v in sorted(losses_d.items()):
                print('  [D] {}: {:.3f}'.format(k, v))
            for k, v in sorted(losses_g.items()):
                print('  [G] {}: {:.3f}'.format(k, v))

            print('Checking stats on val ...')
            metrics_val = check_accuracy(val_loader, generator, discriminator, d_loss_fn, speed_regressor)
            print('Checking stats on train ...')
            metrics_train = check_accuracy(train_loader, generator, discriminator, d_loss_fn, speed_regressor)

            for k, v in sorted(metrics_val.items()):
                print('  [val] {}: {:.3f}'.format(k, v))
            for k, v in sorted(metrics_train.items()):
                print('  [train] {}: {:.3f}'.format(k, v))

            val_ade_list.append(metrics_val['ade'])
            val_fde_list.append(metrics_val['fde'])

            train_ade_list.append(metrics_train['ade'])
            train_fde_list.append(metrics_train['fde'])
            val_msae_list.append(metrics_val['mean_l2_speed'])

            if metrics_val.get('ade') == min(val_ade_list) or metrics_val['ade'] < min(val_ade_list) or metrics_val.get('fde') == min(val_fde_list) or metrics_val['fde'] < min(val_fde_list):
                checkpoint['g_best_state'] = generator.state_dict()
            if metrics_val.get('ade') == min(val_ade_list) or metrics_val['ade'] < min(val_ade_list):
                print('New low for avg_disp_error')
                checkpoint['best_g_state'] = generator.state_dict()
            if metrics_val.get('fde') == min(val_fde_list) or metrics_val['fde'] < min(val_fde_list):
                print('New low for final_disp_error')
            if metrics_val.get('mean_l2_speed') == min(val_msae_list) or metrics_val['mean_l2_speed'] < min(val_msae_list):
                print('New low for Speed regressor model')
                checkpoint['best_regressor_state'] = speed_regressor.state_dict()

            checkpoint['g_state'] = generator.state_dict()
            checkpoint['g_optim_state'] = optimizer_g.state_dict()
            checkpoint['d_state'] = discriminator.state_dict()
            checkpoint['d_optim_state'] = optimizer_d.state_dict()
            checkpoint['regressor_state'] = speed_regressor.state_dict()
            torch.save(checkpoint, CHECKPOINT_NAME)
            print('Done.')

        t += 1
        d_steps_left = D_STEPS
        g_steps_left = G_STEPS
        if t >= NUM_ITERATIONS:
            break


Starting epoch 1
Starting epoch 2
Starting epoch 3
t = 101 / 1403
  [D] D_data_loss: 1.386
  [D] D_total_loss: 1.386
  [G] G_discriminator_loss: 0.693
  [G] G_l2_loss_rel: 0.072
  [G] G_total_loss: 0.765
  [G] Speed_Regression_Loss: 0.385
Checking stats on val ...
Checking stats on train ...
  [val] ade: 0.820
  [val] d_loss: 1.386
  [val] fde: 1.710
  [val] g_l2_loss_abs: 1.312
  [val] g_l2_loss_rel: 1.312
  [val] mean_l2_speed: 20.275
  [train] ade: 0.883
  [train] d_loss: 1.386
  [train] fde: 1.215
  [train] g_l2_loss_abs: 1.212
  [train] g_l2_loss_rel: 1.212
  [train] mean_l2_speed: 76.526
New low for avg_disp_error
New low for final_disp_error
New low for Speed regressor model
Done.
Starting epoch 4
Starting epoch 5
Starting epoch 6
t = 201 / 1403
  [D] D_data_loss: 1.386
  [D] D_total_loss: 1.386
  [G] G_discriminator_loss: 0.693
  [G] G_l2_loss_rel: 0.095
  [G] G_total_loss: 0.788
  [G] Speed_Regression_Loss: 0.480
Checking stats on val ...
Checking stats on train ...
  [val] ad

In [16]:

def get_traj(trajectories, sequences, labels=None):
    print("Enter the sequence you want to visualize from:", sequences)
    seq_start = int(input("Enter the sequence start: "))
    seq_end = int(input("Enter the sequence end:"))
    positions = trajectories[:, seq_start:seq_end, :]
    if MULTI_CONDITIONAL_MODEL:
        label = labels[:, seq_start:seq_end, :]
        return positions, label
    else:
        return positions


def get_distance(trajectories):
    euclid_distance = []
    for a, b in zip(trajectories[:, :], trajectories[1:, :]):
        dist = torch.pairwise_distance(a, b)
        dist = dist.cpu().detach().numpy()
        euclid_distance.append(dist.reshape(1, -1))
    euclid_distance = torch.from_numpy(np.concatenate(euclid_distance, axis=0)).type(torch.float)
    return euclid_distance


def inverse_sigmoid(speeds, max_speed=None, labels=None):
    simulated_speed = []
    inv = torch.log((speeds / (1 - speeds)))
    if SINGLE_CONDITIONAL_MODEL:
        print("The current speeds are: ", inv / max_speed)
    else:
        labels = labels.view(PRED_LEN, -1)
        for speed, agent in zip(inv, labels[:PRED_LEN-1, :]):
            for a, b, in zip(speed, agent):
                if torch.eq(b, 0.1):
                    s = a / AV_MAX_SPEED
                    simulated_speed.append(s.view(1, 1))
                elif torch.eq(b, 0.2):
                    s = a / OTHER_MAX_SPEED
                    simulated_speed.append(s.view(1, 1))
                elif torch.eq(b, 0.3):
                    s = a / AGENT_MAX_SPEED
                    simulated_speed.append(s.view(1, 1))
        simulated_speed = torch.cat(simulated_speed, dim=0)
        print('the labels are: ', labels)
        print("The current speeds are: ", simulated_speed.view(PRED_LEN-1, -1))


def get_speed_from_distance(distance):
    # Since we skip the speed calculation (see trajectories.py for more explanation), we directly pass the distance through sigmoid layer
    if MULTI_CONDITIONAL_MODEL:
        sigmoid_speed = torch.sigmoid(distance)
    else:
        speed = distance / FRAMES_PER_SECOND_SINGLE_CONDITION
        sigmoid_speed = torch.sigmoid(speed)
    return sigmoid_speed


def get_max_speed(path):
    if path == "eth":
        return ZARA1_MAX_SPEED
    elif path == "hotel":
        return ETH_MAX_SPEED
    elif path == "zara1":
        return ETH_MAX_SPEED
    elif path == "zara2":
        return ETH_MAX_SPEED
    elif path == "univ":
        return ETH_MAX_SPEED


def verify_speed(traj, sequences, labels=None):
    if MULTI_CONDITIONAL_MODEL:
        traj, label = get_traj(traj, sequences, labels=labels)
    else:
        dataset_name = get_dataset_name(SINGLE_TEST_DATASET_PATH)
        traj = get_traj(traj, sequences, labels=None)
    dist = get_distance(traj)
    speed = get_speed_from_distance(dist)
    # We calculate inverse sigmoid to verify the speed
    if MULTI_CONDITIONAL_MODEL:
        inverse_sigmoid(speed, labels=label)
    else:
        maxspeed= get_max_speed(dataset_name)
        inverse_sigmoid(speed, max_speed=maxspeed)


In [17]:
hi

NameError: ignored

In [28]:

from scipy.spatial.distance import pdist, squareform


def collisionPercentage(traj, sequences):
    collided_or_not = []
    no_of_frames = 0
    for (start, end) in sequences:
        curr_Traj = traj[:, start:end, :]
        curr_Traj = traj[:, start:end, :].cpu().data.numpy()
        # no_of_frames += curr_frame
        curr_collided_peds = 0
        peds = 0
        for trajectories in curr_Traj:
            peds += trajectories.shape[0]
            dist = squareform(pdist(trajectories, metric="euclidean"))
            np.fill_diagonal(dist, np.nan)
            for rows in dist:
                if any(i <= 0.1 for i in rows):
                    curr_collided_peds += 1

        percentage_of_collision_in_curr_frame = curr_collided_peds / peds
        collided_or_not.append(percentage_of_collision_in_curr_frame)

    collision = sum(collided_or_not) / len(collided_or_not)
    a = sum(collided_or_not)

    return torch.tensor(collision)


def evaluate_helper(error, traj, seq_start_end):
    sum_ = []
    curr_best_traj = []
    for (start, end) in seq_start_end:
        sum_.append(torch.min(torch.sum(error[start.item():end.item()], dim=0)))
        idx = torch.argmin(torch.sum(error[start.item():end.item()], dim=0))
        curr_best_traj.append(traj[idx, :, start:end, :])
    return torch.cat(curr_best_traj, dim=1), sum(sum_)


def evaluate(loader, generator, num_samples, speed_regressor):
    ade_outer, fde_outer, simulated_output, total_traj, sequences, labels, observed_traj = [], [], [], [], [], [], []
    with torch.no_grad():
        for batch in loader:
            if USE_GPU:
                batch = [tensor.cuda() for tensor in batch]
            else:
                batch = [tensor for tensor in batch]
            if MULTI_CONDITIONAL_MODEL:
                (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed,
                 pred_ped_speed, obs_label, pred_label, obs_obj_rel_speed) = batch
            else:
                (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, loss_mask, seq_start_end, obs_ped_speed,
                 pred_ped_speed, obs_obj_rel_speed) = batch

            ade, fde, traj_op, traj_obs = [], [], [], []
            total_traj.append(pred_traj_gt.size(1))
            sequences.append(seq_start_end)
            if MULTI_CONDITIONAL_MODEL:
                labels.append(pred_label)

            for _ in range(num_samples):
                if MULTI_CONDITIONAL_MODEL:
                    #fake_pred_speed = speed_regressor()
                    _, final_enc_h = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                                   pred_traj_gt, 2, None, obs_obj_rel_speed, obs_label=obs_label, pred_label=pred_label)
                    fake_speed = speed_regressor(obs_ped_speed, final_enc_h)
                    pred_traj_fake_rel, _ = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                                   pred_traj_gt,
                                                   TEST_METRIC, fake_speed, obs_obj_rel_speed, obs_label=obs_label, pred_label=pred_label)
                else:
                    _, final_enc_h = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                                   pred_traj_gt, 0, None, obs_obj_rel_speed, obs_label=None, pred_label=None)
                    fake_speed = speed_regressor(obs_ped_speed, final_enc_h)
                    pred_traj_fake_rel, _ = generator(obs_traj, obs_traj_rel, seq_start_end, obs_ped_speed, pred_ped_speed,
                                                   pred_traj_gt,
                                                   TEST_METRIC, fake_speed, obs_obj_rel_speed, obs_label=None, pred_label=None)

                    #for a, b in zip(fake_speed, pred_ped_speed):
                    #    print(a, b)


                pred_traj_fake = relative_to_abs(pred_traj_fake_rel, obs_traj[-1])
                ade.append(displacement_error(pred_traj_fake, pred_traj_gt, mode='raw'))
                fde.append(final_displacement_error(pred_traj_fake[-1], pred_traj_gt[-1], mode='raw'))
                traj_op.append(pred_traj_fake.unsqueeze(dim=0))
                traj_obs.append(obs_traj.unsqueeze(dim=0))
                #print('obs_traj', obs_traj)
                #print('pred_traj', pred_traj_fake)
                #print('pred_traj_GT', pred_traj_gt)

            best_traj, min_ade_error = evaluate_helper(torch.stack(ade, dim=1), torch.cat(traj_op, dim=0),
                                                       seq_start_end)
            #print('best', best_traj)
            staked_obs = torch.cat(traj_obs, dim=0)
            obs = staked_obs[0]
            observed_traj.append(obs)
            _, min_fde_error = evaluate_helper(torch.stack(fde, dim=1), torch.cat(traj_op, dim=0), seq_start_end)
            ade_outer.append(min_ade_error)
            fde_outer.append(min_fde_error)
            simulated_output.append(best_traj)

        ade = sum(ade_outer) / (sum(total_traj) * PRED_LEN)
        fde = sum(fde_outer) / (sum(total_traj))
        simulated_traj = torch.cat(simulated_output, dim=1)
        total_obs = torch.cat(observed_traj, dim=1).permute(1, 0, 2)
        if MULTI_CONDITIONAL_MODEL:
            all_labels = torch.cat(labels, dim=1)
        last_items_in_sequences = []
        curr_sequences = []
        i = 0
        for sequence_list in sequences:
            last_sequence = sequence_list[-1]
            if i > 0:
                last_items_sum = sum(last_items_in_sequences)
                curr_sequences.append(last_items_sum + sequence_list)
            last_items_in_sequences.append(last_sequence[1])
            if i == 0:
                curr_sequences.append(sequence_list)
                i += 1
                continue

        sequences = torch.cat(curr_sequences, dim=0)
        colpercent = collisionPercentage(simulated_traj, sequences)
        #create_data(simulated_traj.permute(1, 0, 2), sequences)
        print(colpercent * 100)

        if TEST_METRIC == 2:
            if SINGLE_CONDITIONAL_MODEL:
                # The speed can be verified for different sequences and this method runs for n number of batches.
                verify_speed(simulated_traj, sequences, labels=None)
            else:
                verify_speed(simulated_traj, sequences, labels=all_labels)

        return ade, fde, colpercent * 100


def main():
    checkpoint = torch.load(CHECKPOINT_NAME)
    if MULTI_CONDITIONAL_MODEL:
        generator = TrajectoryGenerator(mlp_dim=MLP_INPUT_DIM_MULTI_CONDITION,
                                        h_dim=H_DIM_GENERATOR_MULTI_CONDITION)
        speed_regressor = SpeedEncoderDecoder(h_dim=H_DIM_GENERATOR_MULTI_CONDITION)
    else:
        generator = TrajectoryGenerator(mlp_dim=MLP_INPUT_DIM_SINGLE_CONDITION,
                                        h_dim=H_DIM_GENERATOR_SINGLE_CONDITION)
        speed_regressor = SpeedEncoderDecoder(h_dim=H_DIM_GENERATOR_SINGLE_CONDITION)
    generator.load_state_dict(checkpoint['g_state'])
    speed_regressor.load_state_dict(checkpoint['regressor_state'])
    if USE_GPU:
        generator.cuda()
    generator.train()
    speed_regressor.cuda()
    speed_regressor.train()

    if MULTI_CONDITIONAL_MODEL:
        test_dataset = MULTI_TEST_DATASET_PATH
    else:
        test_dataset = SINGLE_TEST_DATASET_PATH
    print('Initializing Test dataset')
    _, loader = data_loader(test_dataset, TEST_METRIC, 'test')
    print('Test dataset preprocessing done')
    if TEST_METRIC == 2:
        num_samples = 20
    else:
        num_samples = NUM_SAMPLES
    cm, ade_final, fde_final = [], [], []
    for _ in range(20):
        ade, fde, ca = evaluate(loader, generator, NUM_SAMPLES, speed_regressor)
        cm.append(ca)
        ade_final.append(ade)
        fde_final.append(fde)
        print('Pred Len: {}, ADE: {:.2f}, FDE: {:.2f}'.format(PRED_LEN, ade, fde))
    print('average collision: ', sum(cm)/len(cm))
    print('average ade: ', sum(ade_final) / len(ade_final))
    print('average fde: ', sum(fde_final) / len(fde_final))


if __name__ == '__main__':
    main()



Initializing Test dataset
Test dataset preprocessing done
tensor(0.2669)
Pred Len: 12, ADE: 0.48, FDE: 0.99
tensor(0.2457)
Pred Len: 12, ADE: 0.48, FDE: 0.98


KeyboardInterrupt: ignored