In [1]:
require 'nn';
require 'Attention';
require 'LSTM';
require 'nngraph';
require 'RNN';

### Encoder

In [2]:
batchSize = 4
seqLength = 24
inputFrameSize = 10
outputFrameSize = 5
kW = 5

enc_inp = nn.Identity()()

convlayer = nn.Sequential()
convlayer:add(nn.TemporalConvolution(inputFrameSize,outputFrameSize,kW))
convlayer:add(nn.ReLU())
convlayer:add(nn.TemporalMaxPooling(2,2))
L = (seqLength - kW + 1)/2

fRNN = nn.RNN(nn.LSTM(outputFrameSize,outputFrameSize,False),L,false)(convlayer(enc_inp))
bRNN = nn.RNN(nn.LSTM(outputFrameSize,outputFrameSize,False),L,true)(convlayer(enc_inp))

concat = nn.JoinTable(2,2)({fRNN,bRNN})

encoder = nn.gModule({enc_inp},{concat})


In [3]:
inp = torch.randn(seqLength,inputFrameSize)
enc_out  = encoder:forward(inp)
enc_grad = encoder:backward(inp,torch.ones(L,outputFrameSize*2))

In [4]:
inp = torch.randn(batchSize,seqLength,inputFrameSize)
enc_out  = encoder:forward(inp)
enc_grad = encoder:backward(inp,torch.ones(batchSize,L,outputFrameSize*2))

### Decoder

In [5]:
scoreDepth = 5
hybridAttendFilterSize = 3
hybridAttendFeatureMaps = 4
stateDepth = 4
annotationDepth = outputFrameSize*2
outputDepth = 2
--L = 10
T = 9
peepholes = false
mlpDepth = 7


decoder_recurrent = nn.LSTM(stateDepth,stateDepth,peepholes)

------------------ decoder_mlp ------------------
-- inputs:
--   s_t       ~ input      ~ stateDepth
--   c_t       ~ input      ~ annotationDepth
-- outputs:
--   y_t       ~ output     ~ outputDepth
dec_mlp_inp = nn.Identity()()
mlp_inp = nn.JoinTable(1,1)(dec_mlp_inp)
mlp     = nn.Sequential()
mlp:add(nn.Linear(stateDepth+annotationDepth,mlpDepth))
mlp:add(nn.ReLU())
mlp:add(nn.Linear(mlpDepth,outputDepth))
mlp:add(nn.LogSoftMax())
decoder_mlp = nn.gModule({dec_mlp_inp},{mlp(mlp_inp)})


decoder = nn.Attention(decoder_recurrent,decoder_mlp,scoreDepth,hybridAttendFilterSize,hybridAttendFeatureMaps,stateDepth,annotationDepth,outputDepth,L,T)


In [6]:
h = torch.ones(L,annotationDepth)
decoder:forward(h)
decoder:backward(h,torch.ones(T,outputDepth))

In [8]:
h = torch.ones(batchSize,L,annotationDepth)
decoder:forward(h)
decoder:backward(h,torch.ones(batchSize,T,outputDepth))

### Encoder - Decoder

In [9]:
autoenc_inp = nn.Identity()()
autoencoder = nn.gModule({autoenc_inp},{decoder(encoder(autoenc_inp))})

In [96]:
target = torch.ceil(torch.rand(T)*outputDepth)
print(target)
print(torch.zeros(T,outputDepth):scatter(2,target:reshape(T,1):long(),1))

 2
 1
 2
 2
 1
 2
 1
 2
 2
[torch.DoubleTensor of size 9]

 0  1
 1  0
 0  1
 0  1
 1  0
 0  1
 1  0
 0  1
 0  1
[torch.DoubleTensor of size 9x2]



In [99]:
target = torch.ceil(torch.rand(batchSize,T)*outputDepth)
print(target)
print(torch.zeros(batchSize,T,outputDepth):scatter(3,target:reshape(batchSize,T,1):long(),1))

 2  2  1  1  1  2  2  1  1
 2  1  1  1  2  1  2  2  2
 2  1  1  1  2  2  2  2  1
 1  2  2  1  1  1  1  1  2
[torch.DoubleTensor of size 4x9]

(1,.,.) = 
  0  1
  0  1
  1  0
  1  0
  1  0
  0  1
  0  1
  1  0
  1  0

(2,.,.) = 
  0  1
  1  0
  1  0
  1  0
  0  1
  1  0
  0  1
  0  1
  0  1

(3,.,.) = 
  0  1
  1  0
  1  0
  1  0
  0  1
  0  1
  0  1
  0  1
  1  0

(4,.,.) = 
  1  0
  0  1
  0  1
  1  0
  1  0
  1  0
  1  0
  1  0
  0  1
[torch.DoubleTensor of size 4x9x2]



In [111]:
inp = torch.randn(seqLength,inputFrameSize)
target = torch.ceil(torch.rand(T)*outputDepth)
autoenc_out  = autoencoder:forward(inp)
labelmask = torch.zeros(T,outputDepth):scatter(2,target:reshape(T,1):long(),1)
nll = -torch.cmul(autoenc_out,labelmask)
print('my nll ' .. nll:sum())

nll = 0
for t=1,T do
    nll = nn.ClassNLLCriterion():forward(autoenc_out[t],target[t]) + nll
end
print('nn nll ' .. nll)

crit_grad1 = -labelmask
crit_grad2 = nn.ClassNLLCriterion():backward(autoenc_out,target)
print((crit_grad1-crit_grad2):norm())

autoenc_grad = autoencoder:backward(inp,torch.ones(L,outputDepth))
print(autoenc_out:size())

my nll 6.0380990025673	
nn nll 6.0380990025673	
0	
 9
 2
[torch.LongStorage of size 2]



In [122]:
inp = torch.randn(batchSize,seqLength,inputFrameSize)

target = torch.ceil(torch.rand(batchSize,T)*outputDepth)
labelmask = torch.zeros(batchSize,T,outputDepth):scatter(3,target:reshape(batchSize,T,1):long(),1)

autoenc_out  = autoencoder:forward(inp)

nll = -torch.cmul(labelmask,autoenc_out)/batchSize
print('my nll ' .. nll:sum())

nll = 0
for b=1,batchSize do
    for t=1,T do
        nll = nn.ClassNLLCriterion():forward(autoenc_out[{b,t}],target[{b,t}]) + nll
    end
end
print('nn nll ' .. nll/batchSize)

crit_grad1 = -labelmask/batchSize

autoenc_grad = autoencoder:backward(inp,crit_grad1)
print(autoenc_out:size())

my nll 6.2530219316573	
nn nll 6.2530219316573	
 4
 9
 2
[torch.LongStorage of size 3]



In [123]:
autoenc_out:size()

 4
 9
 2
[torch.LongStorage of size 3]



In [127]:
torch.zeros(batchSize,T,outputDepth):size()

 4
 9
 2
[torch.LongStorage of size 3]



In [125]:
target:reshape(batchSize,T,1):long():type()

torch.LongTensor	


In [146]:
_, pred = autoenc_out:max(3)
pred = pred:squeeze()

In [145]:
pred:size()

 4
 9
 1
[torch.LongStorage of size 3]



In [150]:
torch.eq(pred:double(),target)

 0  1  0  1  0  1  1  0  0
 0  1  0  1  1  1  1  1  1
 0  0  0  0  0  0  1  0  1
 0  1  0  0  1  1  0  1  1
[torch.ByteTensor of size 4x9]



In [159]:
torch.zeros(target:size(1),T,outputDepth):scatter(3,target:view(target:size(1),T,1):long(),target:view(target:size(1),T,1))

(1,.,.) = 
  0  2
  1  0
  0  2
  1  0
  0  2
  1  0
  1  0
  0  2
  0  2

(2,.,.) = 
  0  2
  1  0
  0  2
  1  0
  1  0
  1  0
  1  0
  1  0
  1  0

(3,.,.) = 
  0  2
  0  2
  0  2
  0  2
  0  2
  0  2
  1  0
  0  2
  1  0

(4,.,.) = 
  0  2
  1  0
  0  2
  0  2
  1  0
  1  0
  0  2
  1  0
  1  0
[torch.DoubleTensor of size 4x9x2]



In [158]:
target:view(target:size(1),T,1):long():size()

 4
 9
 1
[torch.LongStorage of size 3]



In [134]:
print('training time  =', torch.round(10*(sys.clock()-start)/60)/10 .. ' minutes')

training time  =	0.6 minutes	


In [141]:
torch.zeros(batchSize,T,outputDepth):scatter(3,target:reshape(batchSize,T,1):long(),torch.ones(batchSize,T,1))

(1,.,.) = 
  0  1
  1  0
  0  1
  1  0
  0  1
  1  0
  1  0
  0  1
  0  1

(2,.,.) = 
  0  1
  1  0
  0  1
  1  0
  1  0
  1  0
  1  0
  1  0
  1  0

(3,.,.) = 
  0  1
  0  1
  0  1
  0  1
  0  1
  0  1
  1  0
  0  1
  1  0

(4,.,.) = 
  0  1
  1  0
  0  1
  0  1
  1  0
  1  0
  0  1
  1  0
  1  0
[torch.DoubleTensor of size 4x9x2]



In [137]:
target:reshape(batchSize,T,1):size()

 4
 9
 1
[torch.LongStorage of size 3]



In [138]:
torch.ones(batchSize,T,1)

(1,.,.) = 
  1
  1
  1
  1
  1
  1
  1
  1
  1

(2,.,.) = 
  1
  1
  1
  1
  1
  1
  1
  1
  1

(3,.,.) = 
  1
  1
  1
  1
  1
  1
  1
  1
  1

(4,.,.) = 
  1
  1
  1
  1
  1
  1
  1
  1
  1
[torch.DoubleTensor of size 4x9x1]

