<a href="https://colab.research.google.com/github/alikipanou/GeoTransformer/blob/main/multi_modal_hybrid_architecture_for_pedestrian_action_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch 

class MapEncoding(torch.nn.Module):
  def __init__(self,in_channels):
    super(MapEncoding, self).__init__()
    modules = [torch.nn.Conv2d(in_channels, out_channels = 32, kernel_size = 3, stride = 4)]
    modules.append(torch.nn.Conv2d(32, out_channels = 64, kernel_size = 3, stride = 2))
    modules.append(torch.nn.Conv2d(64, out_channels = 128, kernel_size = 3, stride = 2))


    self.net = torch.nn.Sequential(*modules)

  def forward(self,x):
    return self.net(x)

m = MapEncoding(20)
d = torch.rand(50,20,500,200)
o = m(d)
o.shape


torch.Size([50, 128, 30, 11])

In [None]:
class SceneEncoding(torch.nn.Module):
  def __init__(self,in_channels):
    super(SceneEncoding, self).__init__()
    modules = [torch.nn.Conv2d(in_channels, out_channels = 64, kernel_size = 3, stride = 1)]
    modules.append(torch.nn.Conv2d(64, out_channels = 64, kernel_size = 3, stride = 4))
    modules.append(torch.nn.Conv2d(64, out_channels = 128, kernel_size = 3, stride = 2))
    modules.append(torch.nn.Conv2d(128, out_channels = 256, kernel_size = 3, stride = 2))


    self.net = torch.nn.Sequential(*modules)

  def forward(self,x):
    return self.net(x)

m = SceneEncoding(10)
d = torch.rand(50,10,500,200)
o = m(d)
o.shape

torch.Size([50, 256, 30, 11])

In [None]:
class VisualAttentionModule(torch.nn.Module):
  def __init__(self,in_channels):
   super(VisualAttentionModule, self).__init__() 

   modules = [torch.nn.Linear(in_channels, in_channels)]
   modules.append(torch.nn.Softmax(dim = 1))

   self.net = torch.nn.Sequential(*modules)


  def forward(self,x):
    return self.net(x)

d = torch.randn(50,20)
v = VisualAttentionModule(20)
v(d)

tensor([[0.0599, 0.0412, 0.0329, 0.1556, 0.0478, 0.0400, 0.0757, 0.0096, 0.0237,
         0.0561, 0.0922, 0.0559, 0.0114, 0.0730, 0.0215, 0.0348, 0.0584, 0.0398,
         0.0458, 0.0246],
        [0.0465, 0.0406, 0.0286, 0.0538, 0.0702, 0.0605, 0.0432, 0.0484, 0.0257,
         0.0357, 0.0272, 0.0189, 0.1780, 0.0231, 0.0401, 0.0613, 0.0497, 0.0356,
         0.0532, 0.0597],
        [0.1071, 0.0597, 0.0410, 0.0558, 0.0131, 0.0470, 0.0230, 0.0123, 0.0453,
         0.1468, 0.0446, 0.0378, 0.0223, 0.0358, 0.0409, 0.0673, 0.0597, 0.0293,
         0.0511, 0.0601],
        [0.0791, 0.0461, 0.0348, 0.0564, 0.0339, 0.0609, 0.0343, 0.0400, 0.0319,
         0.0561, 0.0380, 0.0185, 0.0409, 0.0312, 0.0299, 0.1575, 0.0732, 0.0471,
         0.0358, 0.0545],
        [0.0888, 0.0518, 0.0465, 0.0412, 0.0287, 0.0617, 0.0245, 0.0405, 0.0184,
         0.0216, 0.0433, 0.0689, 0.0679, 0.0295, 0.0823, 0.0313, 0.0864, 0.0505,
         0.0517, 0.0645],
        [0.0534, 0.0968, 0.0604, 0.0490, 0.0516, 0.0638, 0.0

In [None]:
class DynamicsAttentionModule(torch.nn.Module):
  def __init__(self,length,hidden_size):
    super(DynamicsAttentionModule, self).__init__()
    
    self.Wa = torch.nn.Parameter(torch.randn(hidden_size,hidden_size))
    self.Wa.requires_grad = True

    self.Wc = torch.nn.Parameter(torch.randn(hidden_size,2*hidden_size))
    self.Wc.requires_grad = True

    self.t = length
    self.hid_size = hidden_size

  def forward(self,x):
    # x --> (B,hid_size,t)
    t = torch.matmul(self.Wa,x) 
    x_trans = x.permute(0,2,1)

    q = torch.sigmoid(torch.matmul(x_trans,t))
    tens = torch.zeros(x.shape[0],self.t,1,self.t)
    tens[:,:,0,:] = q
    tens = tens.repeat(1,1,x.shape[1],1)

    X = torch.zeros(x.shape[0],1,self.hid_size,self.t)
    X[:,0,:,:] = x
    X = X.repeat(1,self.t,1,1)

    weights = torch.mul(tens,X)
    c = torch.sum(weights,3)  #(B,t,hid_size)
    c = c.permute(0,2,1)  #(B,hid_size,t)

    cat = torch.cat((c,x),1) #(Β,2*hid_size,t)

    dam = torch.tanh(torch.matmul(self.Wc,cat) )

    return dam

d = torch.rand((50,500,10))
model =  DynamicsAttentionModule(10,500)
out = model(d)


In [None]:

class HybridModel(torch.nn.Module):
  def __init__(self,map_channels,scene_channels,ped_size,ev_size,t):
    super(HybridModel, self).__init__()

    self.map_encoding_module = MapEncoding(map_channels)
    self.scene_encoding_module = SceneEncoding(scene_channels)
    self.vam = VisualAttentionModule(512)
    self.dam = DynamicsAttentionModule(t,512)

    self.c = torch.nn.Conv2d(256+128, out_channels = 512, kernel_size = 3, stride = 1)
    self.l = torch.nn.Linear(243200, 512)

    self.ped_lstm = torch.nn.LSTM(input_size = ped_size, hidden_size = 256, batch_first = True)
    self.ev_lstm = torch.nn.LSTM(input_size = ev_size, hidden_size = 256, batch_first = True)


  
  def forward(self,map,scene,ped_motion,ev_motion):
    out_map = self.map_encoding_module(map)
    out_scene = self.scene_encoding_module(scene)

    #print(out_map.shape)
    #print(out_scene.shape)

    οut_map_scene = torch.cat((out_map,out_scene),1)

    out_conv = self.c(οut_map_scene)
    out_flat = torch.flatten(out_conv,start_dim = 1)
    out_reduced = self.l(out_flat)

    out_vam = self.vam(out_reduced)

    (out_lstm1,hn_cn) = self.ped_lstm(ped_motion)
    (out_lstm2,hn_cn) = self.ev_lstm(ev_motion)

    print(out_lstm1.shape)


m  = HybridModel(15,15,6,2,10)
d1 = torch.rand(10,15,350,450)
d2 = torch.rand(10,15,350,450)
d3 = torch.randn(10,10,6)
d4 = torch.randn(10,10,2)


m(d1,d2,d3,d4)

torch.Size([10, 10, 256])


In [None]:
# import torch module
import torch
 
# Define a 2D tensor
tens = torch.tensor([[[1, 2, 3], [4, 5, 6], [47, 38, 29]],
                     [[7, 8, 9], [10, 11, 12], [5, 3, 1]],
                     [[13, 14, 15], [16, 17, 18], [2, 4, 3]]])
# display original tensor
print("\n Original Tensor: \n", tens)
 
# find transpose of multi-dimension tensor
tens_transpose = tens.permute(0,2,1)
#tens2 = torch.split(tens,1,dim = 0)
tens2 = torch.zeros(3,3,1,3)
tens2[:,:,0,:] = tens
tens2 = tens2.repeat(1,1,3,1)
t1 = torch.randn(50,10,500,10)

print(tens.repeat(1,2,1))
t2 = torch.randn(50,500,10)
print(tens2.shape)
print(t2.shape)
t = torch.mul(t1[:,1,:,:],t2)
# display final result



 Original Tensor: 
 tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [47, 38, 29]],

        [[ 7,  8,  9],
         [10, 11, 12],
         [ 5,  3,  1]],

        [[13, 14, 15],
         [16, 17, 18],
         [ 2,  4,  3]]])
tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [47, 38, 29],
         [ 1,  2,  3],
         [ 4,  5,  6],
         [47, 38, 29]],

        [[ 7,  8,  9],
         [10, 11, 12],
         [ 5,  3,  1],
         [ 7,  8,  9],
         [10, 11, 12],
         [ 5,  3,  1]],

        [[13, 14, 15],
         [16, 17, 18],
         [ 2,  4,  3],
         [13, 14, 15],
         [16, 17, 18],
         [ 2,  4,  3]]])
torch.Size([3, 3, 3, 3])
torch.Size([50, 500, 10])


In [None]:
B  = 10 
t = 4
x = torch.rand(B,20,t)
c = torch.rand(B,20,t)

torch.cat((x,c),1).shape



X = torch.zeros(B,1,20,t)
X[:,0,:,:] = x
x = X.repeat(1,t,1,1)

print(x)

tensor([[[[0.7450, 0.2218, 0.8524, 0.8619],
          [0.9971, 0.0833, 0.6543, 0.5368],
          [0.2814, 0.1802, 0.0624, 0.7050],
          ...,
          [0.7421, 0.0260, 0.1662, 0.5063],
          [0.7589, 0.4727, 0.6168, 0.8142],
          [0.7658, 0.1250, 0.3186, 0.5399]],

         [[0.7450, 0.2218, 0.8524, 0.8619],
          [0.9971, 0.0833, 0.6543, 0.5368],
          [0.2814, 0.1802, 0.0624, 0.7050],
          ...,
          [0.7421, 0.0260, 0.1662, 0.5063],
          [0.7589, 0.4727, 0.6168, 0.8142],
          [0.7658, 0.1250, 0.3186, 0.5399]],

         [[0.7450, 0.2218, 0.8524, 0.8619],
          [0.9971, 0.0833, 0.6543, 0.5368],
          [0.2814, 0.1802, 0.0624, 0.7050],
          ...,
          [0.7421, 0.0260, 0.1662, 0.5063],
          [0.7589, 0.4727, 0.6168, 0.8142],
          [0.7658, 0.1250, 0.3186, 0.5399]],

         [[0.7450, 0.2218, 0.8524, 0.8619],
          [0.9971, 0.0833, 0.6543, 0.5368],
          [0.2814, 0.1802, 0.0624, 0.7050],
          ...,
          