In [1]:
include "/home/ubuntu/commitgen/model/utils.lua"
local Decoder, parent = torch.class('Decoder', 'nn.Module')
include "/home/ubuntu/commitgen/model/MaskedLoss.lua"

function Decoder:__init(config, params)
	parent.__init(self)
	self.emb_dim = config.emb_dim
	self.emb_out = config.emb_out
	self.params = params

	self.cell = self:cell()
  self.paramx, self.paramdx = self.cell:getParameters()
  self.paramx:uniform(-self.params.init_weight, self.params.init_weight)
	self.paramdx:zero()

	self.cells = cloneManyTimes(self.cell, self.params.max_nl_length)

	self.s={}
	self.ds={}

	for j = 0, self.params.max_nl_length do
		self.s[j] = {}
		for d = 1, 2 * self.params.layers do
			self.s[j][d] = torch.zeros(self.params.batch_size, self.params.rnn_size):cuda()
		end
	end
	self.cudaones = torch.ones(1):cuda()

	for d = 1, 2 * self.params.layers do
		self.ds[d] = torch.zeros(self.params.batch_size, self.params.rnn_size):cuda()
	end

end

function Decoder:cell()

	  -- decoder with attention cell
	  -- with position input

	  local  all_h = nn.Identity()()
	  local infmask = nn.Identity()()

	  -- inffmask is seq x minibatch


    -- regular decoder cell
	  local x                = nn.Identity()()
 		local y                = nn.Identity()()
		local prev_s           = nn.Identity()()

		local pos              = nn.Identity()()
		local pos_lookup       = nn.LookupTable(101, 50)(pos)

		local i                = {[0] = nn.JoinTable(2)({pos_lookup, nn.LookupTable(self.emb_dim, self.emb_out)(x)}) }

		local next_s           = {}
		local splitted         = {prev_s:split(2 * self.params.layers)}
		for layer_idx = 1, self.params.layers do
			local prev_c         = splitted[2 * layer_idx - 1]
			local prev_h         = splitted[2 * layer_idx]
			local dropped        = nn.Dropout(self.params.dropout)(i[layer_idx - 1])
			local next_c, next_h = lstm(dropped, prev_c, prev_h, 450) -- self.emb_out)
			table.insert(next_s, next_c)
			table.insert(next_s, next_h)
			i[layer_idx] = next_h
		end


		-- ok, now we have the decoder state in i[1]
    -- size of all_h is  minibatch x seq x 400
    -- size of dropped in minibatch x 400 
    -- we want minibatch x seq x 1
    -- Do not use log soft max here.. since we have to take a weighted combination
    -- using these weights
    -- Remember that before doing the softmax, we have to convert those weights
    -- which correspond to the padding to -infinity so that softmax assigns them 
    -- a weight of zero
    --
    local attention_weights = nn.Select(3, 1)(nn.MM()({all_h, nn.Replicate(1, 3)(i[self.params.layers])}))


    local masked_attention_weights = nn.SoftMax()(nn.CAddTable()({attention_weights, nn.Transpose({1,2})(infmask)}))

		-- attention is minibatch x seq --> make this minibatch x seq x 1
		-- all_h is minibatch x seq x 400
		-- we want minibatch x 400 x 1
		-- Then reduce it to minibatch x 400
		
    local context_vector = nn.Select(3, 1)(nn.MM(true, false)({all_h, nn.Replicate(1, 3)(masked_attention_weights)})):annotate{name = 'ATTENTION'}


 		local W1 = nn.Linear(self.params.rnn_size, self.params.rnn_size)
 		local W2 = nn.Linear(self.params.rnn_size, self.params.rnn_size)

		-- The final vector. We then softmax this
 		local h_att = nn.Tanh()(nn.CAddTable()({W1(i[self.params.layers]), W2(context_vector)}))

		local dropped          = nn.Dropout(self.params.dropout)(h_att)

 		local h2y              = nn.Linear(self.params.rnn_size, self.emb_dim)
 		local pred             = nn.LogSoftMax()(h2y(dropped))
 		local err              = MaskedLoss()({pred, y})

    return  nn.gModule({x, y, pos, prev_s, all_h, infmask}, {err, nn.Identity()(next_s)}):cuda()
end

function Decoder:forward(state, batch, all_h) 

	for i = 1, (batch.maxY - 1) do
	  err, self.s[i] = unpack(self.cells[i]:forward({batch.y[i], batch.y[i + 1], torch.ones(self.params.batch_size):cuda() * i, self.s[i - 1], all_h, batch.infmask}))

		state.count = state.count + err[2]
		state.normal = state.normal + err[3]
	end
	state.acc = state.count / state.normal
end

function Decoder:backward(batch, all_h)
	for d = 1, #self.ds do
		self.ds[d]:zero()
	end

	local sum_d_all_h = all_h:clone():zero()

	for i = (batch.maxY - 1), 1, -1 do

		local d_prev, d_next, d_s, d_all_h, d_mask
	  d_prev, d_next, d_pos, d_s, d_all_h, d_mask = unpack(self.cells[i]:backward(
			{batch.y[i], batch.y[i + 1], torch.ones(self.params.batch_size):cuda() * i, self.s[i - 1], all_h, batch.infmask},
			{self.cudaones, self.ds}))
 		  sum_d_all_h:add(d_all_h)

		copy_table(self.ds, d_s)
	end

	-- we also need to return the d's that have added up for the encoder
	return sum_d_all_h

end

function Decoder:training()
	g_enable_dropout_all(self.cells, params.dropout)
end

function Decoder:evaluate()
	g_disable_dropout_all(self.cells)
end

function Decoder:updateParameters()
  update_params(self.paramx, self.paramdx, self.params.max_grad_norm, self.params.learningRate, self.params.normalize)
	self.paramdx:zero()
end


In [2]:
params = {
    gpu=1,
    layers=1,
    rnn_size=400,
    learningRate=0.1,
    max_grad_norm=5,
    init_weight=0.5,
    dropout=0.5,
    decay=0.8,
    normalize=1,
    max_length=20,
    beam_size=10,
    max_nl_length=100,
	max_code_length=100,
	batch_size=100,
}

decoder = Decoder({emb_dim=3, emb_out=4}, params)

In [3]:
#decoder.cell.forwardnodes

In [4]:
decoder.cell.forwardnodes[50].data.mapindex[1]

61	
{
  annotations : 
    {
      _debugLabel : [[string "include "/home/ubuntu/commitgen/model/utils.l..."]]:89_
    }
  module : 
    nn.MM
    {
      _type : torch.CudaTensor
      output : CudaTensor - empty
      gradInput : 
        {
          1 : CudaTensor - empty
          2 : CudaTensor - empty
        }
      transA : true
      transB : false
    }
  reverseMap : 
    {
      nngraph.Node : true
    }
  forwardNodeId : 46
  mapindex : 
    {
      1 : 
        {
          annotations : 
            {
              _debugLabel : [[string "include "/home/ubuntu/commitgen/model/utils.l..."]]:40_
            }
          module : 
            nn.Identity
            {
              gradInput : CudaTensor - empty
              _type : torch.CudaTensor
              output : CudaTensor - empty
            }
          reverseMap : 
            {
              nngraph.Node : true
              nngraph.Node : true
            }
          forwardNodeId : 49
          mapindex : 
  

In [None]:
decoder.cell.forwardnodes[43].data.mapindex[1].input

In [57]:
JSON = (loadfile "commitgen/model/JSON.lua")() 


In [40]:
value = nil
tensors = {}
for line in io.lines("/tmp/lua_SsOb8o") do
    if #line > 0 then
        if value == nil then
            value = line
        else
            value = value .. "\n" .. line
        end
    else
        -- print(value)
        t = torch.deserialize(value, "ascii")
        table.insert(tensors,t)
        value=""
    end
end

In [46]:
tt = torch.totable(tensors[1])

In [50]:
JSON:encode(tt)

[[0.039528504014015,0.039528504014015,0.062131494283676,0.044466331601143,0.018017549067736,0.030503196641803,0.035669844597578],[0.034676723182201,0.034676723182201,0.039691116660833,0.025204978883266,0.030837003141642,0.018189964815974,0.02582111954689],[0.0019375256961212,0.0019375256961212,0.0012629881966859,0.0043391864746809,0.0018298089271411,0.0011462797410786,0.002435308881104],[0.056111969053745,0.056111969053745,0.067134208977222,0.042234148830175,0.030622810125351,0.035720609128475,0.029433626681566],[0.0045833084732294,0.0045833084732294,0.0027174670249224,0.0048443539999425,0.0031658185180277,0.0013434329302981,0.0045913867652416],[0.051353637129068,0.051353637129068,0.04918610304594,0.050211042165756,0.033918872475624,0.017015498131514,0.038635216653347],[0.010183644481003,0.010183644481003,0.0058755348436534,0.010339170694351,0.0073149777017534,0.0075591756030917,0.012034345418215],[0.019561149179935,0.019561149179935,0.036798965185881,0.029140489175916,0.0291514005512,

93565,0.072061620652676,0.095677800476551,0.041510213166475],[0.0019375256961212,0.0019375256961212,0.0012629881966859,0.0043391864746809,0.0018298089271411,0.0011462797410786,0.002435308881104],[0.053517259657383,0.053517259657383,0.059730984270573,0.026241784915328,0.047115460038185,0.026427395641804,0.029263831675053],[0.0045833084732294,0.0045833084732294,0.0027174670249224,0.0048443539999425,0.0031658185180277,0.0013434329302981,0.0045913867652416],[0.0028534086886793,0.0028534086886793,0.0013481815112755,0.0058964216150343,0.0033006470184773,0.0018656726460904,0.005618941038847],[0.065339758992195,0.065339758992195,0.046260997653008,0.033174198120832,0.041776467114687,0.041298326104879,0.051148656755686],[0.0063073271885514,0.0063073271885514,0.0045462027192116,0.010365139693022,0.0076512494124472,0.010335328988731,0.014941040426493],[0.029353685677052,0.029353685677052,0.019236953929067,0.017091820016503,0.028875024989247,0.032210290431976,0.03286237642169],[0.0063073271885514,0

In [48]:
for key, value in pairs(tt) do
    print(key, value)
end

1	{
  1 : 0.039528504014015
  2 : 0.039528504014015
  3 : 0.062131494283676
  4 : 0.044466331601143
  5 : 0.018017549067736
  6 : 0.030503196641803
  7 : 0.035669844597578
}
2	{
  1 : 0.034676723182201
  2 : 0.034676723182201
  3 : 0.039691116660833
  4 : 0.025204978883266
  5 : 0.030837003141642
  6 : 0.018189964815974
  7 : 0.02582111954689
}
3	{
  1 : 0.0019375256961212
  2 : 0.0019375256961212
  3 : 0.0012629881966859
  4 : 0.0043391864746809
  5 : 0.0018298089271411
  6 : 0.0011462797410786
  7 : 0.002435308881104
}
4	{
  1 : 0.056111969053745
  2 : 0.056111969053745
  3 : 0.067134208977222
  4 : 0.042234148830175
  5 : 0.030622810125351
  6 : 0.035720609128475
  7 : 0.029433626681566
}
5	{
  1 : 0.0045833084732294
  2 : 0.0045833084732294
  3 : 0.0027174670249224
  4 : 0.0048443539999425
  5 : 0.0031658185180277
  6 : 0.0013434329302981
  7 : 0.0045913867652416
}
6	{
  1 : 0.051353637129068
  2 : 0.051353637129068
  3 : 0.04918610304594
  4 : 0.050211042165756
  5 : 0.03391887247

.034676723182201
  3 : 0.039691116660833
  4 : 0.025204978883266
  5 : 0.030837003141642
  6 : 0.018189964815974
  7 : 0.02582111954689
}
25	{
  1 : 0.0019375256961212
  2 : 0.0019375256961212
  3 : 0.0012629881966859
  4 : 0.0043391864746809
  5 : 0.0018298089271411
  6 : 0.0011462797410786
  7 : 0.002435308881104
}
26	{
  1 : 0.056111969053745
  2 : 0.056111969053745
  3 : 0.067134208977222
  4 : 0.042234148830175
  5 : 0.030622810125351
  6 : 0.035720609128475
  7 : 0.029433626681566
}
27	{
  1 : 0.0045833084732294
  2 : 0.0045833084732294
  3 : 0.0027174670249224
  4 : 0.0048443539999425
  5 : 0.0031658185180277
  6 : 0.0013434329302981
  7 : 0.0045913867652416
}
28	{
  1 : 0.051353637129068
  2 : 0.051353637129068
  3 : 0.04918610304594
  4 : 0.050211042165756
  5 : 0.033918872475624
  6 : 0.017015498131514
  7 : 0.038635216653347
}
29	{
  1 : 0.010183644481003
  2 : 0.010183644481003
  3 : 0.0058755348436534
  4 : 0.010339170694351
  5 : 0.0073149777017534
  6 : 0.007559175603091

374959
  3 : 0.025626182556152
  4 : 0.022476680576801
  5 : 0.023303288966417
  6 : 0.026778364554048
  7 : 0.027257092297077
}
48	{
  1 : 0.0034229347947985
  2 : 0.0034229347947985
  3 : 0.0024786402937025
  4 : 0.006160918623209
  5 : 0.0034672704059631
  6 : 0.0027901714202017
  7 : 0.0061246887780726
}
49	{
  1 : 0.024624677374959
  2 : 0.024624677374959
  3 : 0.025626182556152
  4 : 0.022476680576801
  5 : 0.023303288966417
  6 : 0.026778364554048
  7 : 0.027257092297077
}
50	{
  1 : 0.0028534086886793
  2 : 0.0028534086886793
  3 : 0.0013481815112755
  4 : 0.0058964216150343
  5 : 0.0033006470184773
  6 : 0.0018656726460904
  7 : 0.005618941038847
}
51	{
  1 : 0
  2 : 0
  3 : 0
  4 : 0
  5 : 0
  6 : 0
  7 : 0
}
52	{
  1 : 0
  2 : 0
  3 : 0
  4 : 0
  5 : 0
  6 : 0
  7 : 0
}
53	{
  1 : 0
  2 : 0
  3 : 0
  4 : 0
  5 : 0
  6 : 0
  7 : 0
}
54	{
  1 : 0
  2 : 0
  3 : 0
  4 : 0
  5 : 0
  6 : 0
  7 : 0
}
55	{
  1 : 0
  2 : 0
  3 : 0
  4 : 0
  5 : 0
  6 : 0
  7 : 0
}
56	{
  1 : 0
  2 : 


100	{
  1 : 0
  2 : 0
  3 : 0
  4 : 0
  5 : 0
  6 : 0
  7 : 0
}


nil	


In [None]:
decoder.cell.forwardnodes[56].data.mapindex

In [None]:
#decoder.cell.fg.nodes