In [18]:
from utils import *

In [19]:
from tqdm import tqdm

In [20]:
import lutorpy as lua
require("nn")
require("optim")
require("cutorch")
require("cunn")

True

In [21]:
np.random.seed(0)

In [22]:
time_range = np.arange(80, 112000, 20)

In [23]:
train_indeces = np.load('src/time_train_indeces.npy')
test_indeces = np.load('src/time_test_indeces.npy')

In [32]:
M = M_star = 5
N = NUMBER_OF_PEDESTRIANS
X = Y = 256

In [25]:
location = np.load('/pedestrians_location.npy') # [NUMBER_OF_PEDESTRIANS x NUMBER_OF_FRAMES x 2]
location[:,:,0] = (X * location[:,:,0] / FRAME_WIDTH).astype(np.int64)
location[:,:,1] = (Y * location[:,:,1] / FRAME_HEIGHT).astype(np.int64)
# location[i,t] = [x,y] -- location of i-th pedestrian at time point t (aka frame 20*t) -- [0,0] for absent pedestians

paths = np.load('src/all_not_ext_paths_with_ids.npy') # paths[j] = [i,t,x,y] -- all pedestrians' coordinates at all frames
paths[:,2] = (X * paths[:,2] / FRAME_WIDTH).astype(np.int64)
paths[:,3] = (Y * paths[:,3] / FRAME_HEIGHT).astype(np.int64)

In [26]:
# M-1 time points before curr_time and M_star after
def get_t(curr_time, M, M_star):
    step = 20
    return np.arange(curr_time - step*(M-1), curr_time + step*(M_star+1), step)

In [27]:
model = nn.Sequential()

filters_num = 64
kW = kH = 3
kT = 1

# https://github.com/torch/nn/blob/master/doc/convolution.md#nn.VolumetricConvolution
model._add(nn.VolumetricConvolution(1, filters_num, 2*M, kW, kH, 1, 1, 1, 0)) # conv1
model._add(nn.VolumetricConvolution(filters_num, filters_num, kT, kW, kH, 1, 1, 1, 0, 1, 1)) # conv2
model._add(nn.VolumetricConvolution(filters_num, filters_num, kT, kW, kH, 1, 1, 1, 0, 1, 1)) # conv3

# https://github.com/torch/nn/blob/master/doc/convolution.md#nn.VolumetricMaxPooling
model._add(nn.VolumetricMaxPooling(1, 2, 2, 1, 2, 2))

# https://github.com/torch/nn/blob/master/doc/simple.md#nn.CAdd
model._add(nn.CAdd(1, int((Y-1)/2), int((X-1)/2))) # bias

model._add(nn.VolumetricConvolution(filters_num, filters_num, kT, kW, kH, 1, 1, 1, 0, 1, 1)) # conv4
model._add(nn.VolumetricConvolution(filters_num, filters_num, kT, kW, kH, 1, 1, 1, 0, 1, 1)) # conv5
model._add(nn.VolumetricConvolution(filters_num, filters_num, kT, kW, kH, 1, 1, 1, 0, 1, 1)) # conv6

# https://github.com/torch/nn/blob/master/doc/convolution.md#spatialfullconvolution
model._add(nn.VolumetricFullConvolution(filters_num, 1, 2*M_star, 4, 4, 1, 2, 2)) # deconv

criterion = nn.MSECriterion()

learning_rate = 0.01

In [28]:
def from_numpy(array):
    return torch.fromNumpyArray(array)

def to_numpy(array):
    return (array.asNumpyArray())

In [31]:
for curr_time in tqdm(time_range[train_indeces[:10]]):
    t = get_t(curr_time, M, M_star)
    ind_t = (t/20).astype(int) # for indexing in location

    pedestrians_in_scene = paths[paths[:,1] == t[M-1]] # getting all pedestrians who located on map at curr_time

    disp_volume = np.zeros((X, Y, 2*M))
    disp_volume_star = np.zeros((X, Y, 2*M_star))

    for p in pedestrians_in_scene:
        l = location[p[0],ind_t[:M]] # locations of pedestrian at t_1, t_2, ..., t_M time points
        l_star = location[p[0],ind_t[M:]]

        # filling zeros absent pedestrians
        for i, row in enumerate(l):
            if (row == 0).all():
                l[i] = l[-1] + np.array([X, Y]) # to get zero in displacement vector

        for i, row in enumerate(l_star):
            if (row == 0).all():
                l_star[i] = l_star[-1] + np.array([X, Y])

        d = (l[-1] - l) / np.array([X, Y]) + 1
        d_star = (l_star[-1] - l_star) / np.array([X, Y]) + 1
        disp_volume[p[2], p[3]] = d.ravel() # put in [X, Y]-th cell of disp_volume displacement vector
        disp_volume_star[location[p[0],ind_t[-1]][0], location[p[0],ind_t[-1]][1]] = d_star.ravel()    

    mask = (disp_volume_star != 0).astype(int)
    
    for _ in np.arange(2):
        pred = to_numpy(model._forward(from_numpy(disp_volume.T[np.newaxis])))
        criterion._forward(from_numpy(pred*(mask.T[np.newaxis])), from_numpy((disp_volume_star).T[np.newaxis]))
        model._zeroGradParameters()
        model._backward(from_numpy(disp_volume.T[np.newaxis]),
                        criterion._backward(from_numpy(pred*(mask.T[np.newaxis])), from_numpy(disp_volume_star.T[np.newaxis])))
        model._updateParameters(learning_rate)

100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


In [None]:
# THERE IS A PROBLEM HERE:
# we lose pedestrians' ids after coding into disp_volume so to decode we need somehow match previous path and predicted

# displacement volume decode
xs, ys = disp_volume[:,:,0].nonzero() # getting indices of non-zero predicted coordinates at first (t_M+1) timepoint
for i, j in zip(xs,ys):
    np.array([i,j]) - (disp_volume[i,j].reshape(-1,2) - 1) * np.array([X, Y])