In [1]:
from utils import *

In [2]:
from tqdm import tqdm

In [3]:
import torch
from torch.autograd import Variable
from torch.nn import Parameter
import torch.nn as nn
import torch.nn.functional as F

In [4]:
from math import ceil

In [5]:
np.random.seed(0)

In [6]:
time_range = np.arange(80, 112000, 20)

In [7]:
train_indeces = np.load('src/time_train_indeces.npy')
validation_indeces = np.load('src/time_validation_indeces.npy')
test_indeces = np.load('src/time_test_indeces.npy')

In [8]:
M = M_star = 5
N = NUMBER_OF_PEDESTRIANS
X = Y = 256

In [9]:
location = np.load('src/pedestrians_location.npy') # [NUMBER_OF_PEDESTRIANS x NUMBER_OF_FRAMES x 2]
location[:,:,0] = (X * location[:,:,0] / FRAME_WIDTH).astype(np.int64)
location[:,:,1] = (Y * location[:,:,1] / FRAME_HEIGHT).astype(np.int64)
# location[i,t] = [x,y] -- location of i-th pedestrian at time point t (aka frame 20*t) -- [0,0] for absent pedestians

ped_paths = np.load('src/all_not_ext_paths_with_ids.npy')
# ped_paths[j] = [i,t,x,y] -- all pedestrians' coordinates at all frames
ped_paths[:,2] = (X * ped_paths[:,2] / FRAME_WIDTH).astype(np.int64)
ped_paths[:,3] = (Y * ped_paths[:,3] / FRAME_HEIGHT).astype(np.int64)

In [10]:
# M-1 time points before curr_time and M_star after
def get_t(curr_time, M, M_star):
    step = 20
    return np.arange(curr_time - step*(M-1), curr_time + step*(M_star+1), step)

In [11]:
def to_cuda(arr):
    return Variable(torch.from_numpy(arr).cuda())

In [12]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
    
        self.conv1 = nn.Conv3d(in_channels=1, out_channels=64, kernel_size=(2*M, 3, 3), bias=False)
        self.conv2 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=(1, 3, 3), padding=(0, 1, 1), bias=False)
        self.conv3 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=(1, 3, 3), padding=(0, 1, 1), bias=False)
        
        self.maxpool = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
        
        self.bias_map = Parameter(torch.rand(int((X-1)/2), int((Y-1)/2)))
        
        self.conv4 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=(1, 3, 3), padding=(0, 1, 1), bias=False)
        self.conv5 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=(1, 3, 3), padding=(0, 1, 1), bias=False)
        self.conv6 = nn.Conv3d(in_channels=64, out_channels=64, kernel_size=(1, 3, 3), padding=(0, 1, 1), bias=False)
        
        self.deconv = nn.ConvTranspose3d(in_channels=64, out_channels=1, kernel_size=(2*M_star, 4, 4), stride=(1, 2, 2),
                                         bias=False)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        
        x = self.maxpool(x)
        
        x = x + self.bias_map.unsqueeze(0).expand_as(x)
        
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        
        x = self.deconv(x)
        
        return x
    

model = Net()
model = model.double()
model.cuda()

criterion = nn.MSELoss(size_average=False).cuda()

In [13]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.015)

In [14]:
batch_size = 8
fails = []
train_losses = []
validation_losses = []

In [15]:
train_indeces = train_indeces[:3200]

In [16]:
def get_arrays(indeces):
    
    dv = np.zeros((0, X, Y, 2*M))
    dv_star = np.zeros((0, X, Y, 2*M_star))
    m = np.zeros((0, X, Y, 2*M))

    for curr_time in time_range[indeces]:
        
        t = get_t(curr_time, M, M_star)
        ind_t = (t/20).astype(int) # for indexing in location

        pedestrians_in_scene = ped_paths[ped_paths[:,1] == t[M-1]] # getting all pedestrians who located on map at curr_time

        disp_volume = np.zeros((X, Y, 2*M))
        disp_volume_star = np.zeros((X, Y, 2*M_star))

        for p in pedestrians_in_scene:
            l = location[p[0],ind_t[:M]] # locations of pedestrian at t_1, t_2, ..., t_M time points
            l_star = location[p[0],ind_t[M:]]

            # filling zeros absent pedestrians
            for i, row in enumerate(l):
                if (row == 0).all():
                    l[i] = l[-1] + np.array([X, Y]) # to get zero in displacement vector

            for i, row in enumerate(l_star):
                if (row == 0).all():
                    l_star[i] = l_star[-1] + np.array([X, Y])

            d = (l[-1] - l) / np.array([X, Y]) + 1
            d_star = (l_star[-1] - l_star) / np.array([X, Y]) + 1
            disp_volume[p[2], p[3]] = d.ravel() # put in [X, Y]-th cell of disp_volume displacement vector
            disp_volume_star[location[p[0],ind_t[-1]][0], location[p[0],ind_t[-1]][1]] = d_star.ravel()    

        mask = (disp_volume_star != 0).astype(int)

        dv = np.concatenate((dv, [disp_volume]))
        dv_star = np.concatenate((dv_star, [disp_volume_star]))
        m = np.concatenate((m, [mask]))

    dv = np.transpose(dv, axes=(0,3,1,2))
    dv_star = np.transpose(dv_star, axes=(0,3,1,2))
    m = np.transpose(m, axes=(0,3,1,2))

    dv = dv[:, np.newaxis]
    dv_star = dv_star[:, np.newaxis]
    m = m[:, np.newaxis]
    sum_m = m.sum(axis=-1).sum(axis=-1).sum(axis=-1).sum(axis=-1)
    
    return dv, dv_star, m, sum_m

In [None]:
for it in range(100):

    for i in tqdm(range(ceil(len(train_indeces) / batch_size) + 1)):

        dv, dv_star, m, sum_m = get_arrays(train_indeces[i * batch_size : (i+1) * batch_size])

        try:
            pred = model(to_cuda(dv))
            sum_m_cuda = to_cuda(sum_m).unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1).expand_as(pred).double()
            loss = criterion(pred * to_cuda(m) / sum_m_cuda, to_cuda(dv_star) / sum_m_cuda)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        except:
            fails.append(i)
            continue
            
    train_loss = 0

    for i in tqdm(range(ceil(len(train_indeces) / batch_size) + 1)):

        dv, dv_star, m, sum_m = get_arrays(train_indeces[i * batch_size : (i+1) * batch_size])

        try:
            pred = model(to_cuda(dv))
            sum_m_cuda = to_cuda(sum_m).unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1).expand_as(pred).double()
            loss = criterion(pred * to_cuda(m) / sum_m_cuda, to_cuda(dv_star) / sum_m_cuda)
            train_loss = (train_loss * i + loss.data.cpu()) / (i + 1)
        except:
            fails.append(i)
            continue
            
    train_losses.append(train_loss)
            
    validation_loss_prev = validation_losses[-1]
    validation_loss = 0

    for i in tqdm(range(ceil(len(validation_indeces) / batch_size) + 1)):

        dv, dv_star, m, sum_m = get_arrays(train_indeces[i * batch_size : (i+1) * batch_size])

        try:
            pred = model(to_cuda(dv))
            sum_m_cuda = to_cuda(sum_m).unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1).expand_as(pred).double()
            loss = criterion(pred * to_cuda(m) / sum_m_cuda, to_cuda(dv_star) / sum_m_cuda)
            validation_loss = (validation_loss * i + loss.data.cpu()) / (i + 1)
        except:
            fails.append(i)
            continue
            
    validation_losses.append(validation_loss)
    
    if validation_loss_prev[0] < validation_loss[0]:
        validation_losses.append(validation_loss)
        break
    
    print(train_loss, validation_loss)
    
    torch.save(model.state_dict(), 'src/model.pth')

  1%|          | 3/401 [00:02<05:16,  1.26it/s]

In [17]:
model.load_state_dict(torch.load('src/model.pth'))

In [None]:
# THERE IS A PROBLEM HERE:
# we lose pedestrians' ids after coding into disp_volume so to decode we need somehow match previous path and predicted

# displacement volume decode
xs, ys = disp_volume[:,:,0].nonzero() # getting indices of non-zero predicted coordinates at first (t_M+1) timepoint
for i, j in zip(xs,ys):
    np.array([i,j]) - (disp_volume[i,j].reshape(-1,2) - 1) * np.array([X, Y])

In [19]:
validation_losses[-10:]

[
 1.00000e-04 *
   5.1165
 [torch.DoubleTensor of size 1], 
 1.00000e-04 *
   5.1162
 [torch.DoubleTensor of size 1], 
 1.00000e-04 *
   5.1159
 [torch.DoubleTensor of size 1], 
 1.00000e-04 *
   5.1156
 [torch.DoubleTensor of size 1], 
 1.00000e-04 *
   5.1153
 [torch.DoubleTensor of size 1], 
 1.00000e-04 *
   5.1150
 [torch.DoubleTensor of size 1], 
 1.00000e-04 *
   5.1148
 [torch.DoubleTensor of size 1], 
 1.00000e-04 *
   5.1145
 [torch.DoubleTensor of size 1], 
 1.00000e-04 *
   5.1142
 [torch.DoubleTensor of size 1], 
 1.00000e-04 *
   5.1139
 [torch.DoubleTensor of size 1]]