In [1]:
import numpy as np

import matplotlib
from matplotlib import pyplot as plt

from IPython.display import HTML
from IPython.display import display

from skeleton_models import ntu_rgbd, get_kernel_by_group, ntu_ss_1, ntu_ss_2, ntu_ss_3, partial

from zoo_pose_embedding import TwoLayersGCNPoseEmbedding
from zoo_action_encoder_units import AttentionWithGCNEncoder
from zoo_action_decoder_units import AttentionWithGCNDecoder
from zoo_upsampling import StepByStepUpsampling

from model import ActionEmbeddingTransformer
from layers import subsequent_mask

from render import animate
import math, copy, time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import os, glob

In [2]:
sample_data = np.load('../datasets/NTURGB-D/Python/raw_npy/S003C001P008R002A023.skeleton.npy', allow_pickle=True)[()]

In [3]:
# Um equeleto com 5 vertices
A = np.array([
[ 1, 1, 1, 1, 1],
[ 1, 1, 0, 0, 0],
[ 1, 0, 1, 0, 0],
[ 1, 0, 0, 1, 0],
[ 1, 0, 0, 0, 1],
    ])
print(A)
print(A.shape)

[[1 1 1 1 1]
 [1 1 0 0 0]
 [1 0 1 0 0]
 [1 0 0 1 0]
 [1 0 0 0 1]]
(5, 5)


In [4]:
# Uma matriz de adjacencias de multigrafo tamanho 2
Am = np.stack((A,A))
print(Am)
print(Am.shape) # K, V, V

[[[1 1 1 1 1]
  [1 1 0 0 0]
  [1 0 1 0 0]
  [1 0 0 1 0]
  [1 0 0 0 1]]

 [[1 1 1 1 1]
  [1 1 0 0 0]
  [1 0 1 0 0]
  [1 0 0 1 0]
  [1 0 0 0 1]]]
(2, 5, 5)


In [5]:
# Uma pose com vertices no espaço 2D
X = np.array([
    [0, 0],
    [1, 1],
    [-2, 1],
    [1, -1],
    [-1, -2]
])
print(X)
print(X.shape) 

[[ 0  0]
 [ 1  1]
 [-2  1]
 [ 1 -1]
 [-1 -2]]
(5, 2)


In [6]:
# Um grupo de 7 poses
Xm = np.stack((X,X,X,X,X,X,X))
# print(Xm)
print(Xm.shape) # N, V, C

(7, 5, 2)


In [7]:
# Um grupo (7 amostras) de sequencias temporais (3 instantes) de poses
Xmt = np.stack((Xm, Xm, Xm), axis=1)
Xmt.shape

(7, 3, 5, 2)

In [8]:
# Converte para tensor

Ain = torch.from_numpy(Am).float()
print(Ain.size())

Xin = torch.from_numpy(Xmt).float()
print(Xin.size())


torch.Size([2, 5, 5])
torch.Size([7, 3, 5, 2])


In [9]:
conf_kernel_size = 2
conf_num_nodes = 5
conf_heads = 1
conf_encoding_per_node = 20
conf_internal_per_node = int(conf_encoding_per_node/conf_heads)

class BetterThatBestModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = ActionEmbeddingTransformer(
            TwoLayersGCNPoseEmbedding(
                2,
                conf_encoding_per_node,
                conf_kernel_size
            ),
            AttentionWithGCNEncoder(
                heads=conf_heads,
                node_channel_in=conf_encoding_per_node,
                node_channel_mid=conf_internal_per_node,
                node_channel_out=conf_encoding_per_node,
                num_nodes=conf_num_nodes,
                kernel_size=conf_kernel_size
            ),
            AttentionWithGCNDecoder(
                heads=conf_heads,
                node_channel_in=conf_encoding_per_node,
                memory_channel_in=conf_encoding_per_node,
                node_channel_mid=(conf_internal_per_node,conf_internal_per_node),
                node_channel_out=conf_encoding_per_node,
                num_nodes=conf_num_nodes,
                kernel_size=conf_kernel_size
            ),

            StepByStepUpsampling(
                conf_num_nodes,
                conf_encoding_per_node,
                node_channel_out = 2
            )
        )

    def forward(self, x_in, x_out, A, mask):
        return self.model(x_in, x_out, A, mask)

In [10]:
a = BetterThatBestModel()
out = a(Xin, Xin, Ain, subsequent_mask(3))
print(out[0].size())
print(out[1].size())
print(out[2].size())

NameError: name 'TwoLayersGCNPoseEncoder' is not defined

In [None]:
A = 3
B = 4
C = 5
D = 6
teste = torch.zeros( (A,B,C,D) ) 

for a in range(3):
    for b in range(4):
        for c in range(5):
            for d in range(6):
                teste[a,b,c,d] = a + 10*b + 100*c + 1000*d

teste[0,:,:,0] 

In [None]:
t = teste.permute(0,3,2,1) # [A,B,C,D] -> [A, D, C, B]
print(t[0,:,:,0])
t = t.contiguous().view(A, D*C, B)
print(t[0,:,0])
t = t.contiguous().view(A, D, C*B)
print(t[0,:,0])
t = t.permute(0,2,1) # [A,D,C*B] -> [A,C*B,D]
print(t[0,:,0])
t = t.contiguous().view(A, C, B*D)
print(t[0,:,0])
t = t.permute(0,2,1) # [A,C,B*D] -> [A,B*D,C]
print(t[0,:,0])
t = t.contiguous().view(A, B, D, C)
print(t[0,:,0, 0])
t = t.permute(0,1,3,2) # [A,B,D,C] -> [A,B,C,D]
print(t[0,:,:,0])

In [None]:
def calculate_params(heads, num_nodes, scale_factor):
    channel_out = scale_factor*num_nodes
    channel_in = heads * channel_out
    return channel_in, channel_out

In [None]:
calculate_params(8, 25, 4)