In [1]:
require 'nn'
require 'hdf5'
require 'optim'

### Loading the data:

In [76]:
myFile = hdf5.open('6-grams.hdf5','r')
data = myFile:all()
myFile:close()

In [92]:
data

{
  valid_output : LongTensor - size: 3370x50
  train_1000 : LongTensor - size: 696825x7
  test : LongTensor - size: 3761x55
  nwords : LongTensor - size: 1
  train : LongTensor - size: 772670x7
  valid : LongTensor - size: 3370x55
}


### Creating the training set:

We need to remove the counts (used for the Count based language model)

In [81]:
nwords = 10001
train = data['train']:narrow(2,1,6)

In [82]:
train_input = train:narrow(2,1,5)
train_output = train:narrow(2,6,1)

In [89]:
N = 6
dwin = N-1
hid1 = 30.
hid2 = 100
nnlm = nn.Sequential()

tanh = nn.Sequential()
tanh:add(nn.LookupTable(nwords,hid1/dwin))
tanh:add(nn.View(1,-1,hid1))
tanh:add(nn.Squeeze()) -- this layer is to go from a 1xAxB tensor to AxB dimensional tensor (https://groups.google.com/forum/#!topic/torch7/u4OEc0GB74k)
tanh:add(nn.Linear(hid1,hid2))
tanh:add(nn.Tanh())

nnlm:add(tanh)
nnlm:add(nn.Linear(hid2, nwords))
nnlm:add(nn.LogSoftMax())

In [84]:
nnlm:forward(train_input:narrow(1,1,1)):size()

 10001
[torch.LongStorage of size 1]



In [90]:
criterion = nn.ClassNLLCriterion()

In [86]:
dataset={};
for i=1,train:size(1) do 
  dataset[i] = {train_input[i]:view(1,5), train_output[i]}
end
function dataset:size() return train:size(1) end -- 100 examples

In [91]:
--with regularisation:

sq_mat = torch.zeros(10001,6)
norm_mat = torch.zeros(10001,6)

for i = 1, 20 do
    
    timer = torch.Timer()
    if i ~= 1 then
        sq_mat:copy(torch.pow(nnlm:findModules('nn.LookupTable')[1].weight,2))
        norm_mat:copy(torch.expandAs(sq_mat:sum(2), sq_mat))
        nnlm:findModules('nn.LookupTable')[1].weight:cdiv(norm_mat)
    end
    
    trainer = nn.StochasticGradient(nnlm, criterion)
    trainer.learningRate = 0.01
    trainer.maxIteration = 1
    trainer:train(dataset)
    print('Epoch '..i..': '..timer:time().real)
    
end


# StochasticGradient: training	


# current error = 6.4245431634655	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 6.4245431634655	
Epoch 1: 1867.2106969357	


# StochasticGradient: training	


# current error = 5.9158270506484	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.9158270506484	


Epoch 2: 1851.8256881237	


# StochasticGradient: training	


# current error = 5.8087271387059	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.8087271387059	
Epoch 3: 2067.7188940048	


# StochasticGradient: training	


# current error = 5.6064206043748	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.6064206043748	
Epoch 4: 1839.1438748837	


# StochasticGradient: training	


# current error = 5.5844903921627	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.5844903921627	
Epoch 5: 1832.6268959045	


# StochasticGradient: training	


# current error = 5.4698433440264	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.4698433440264	
Epoch 6: 1833.8597950935	


# StochasticGradient: training	


# current error = 5.4624980810907	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.4624980810907	
Epoch 7: 1834.5379061699	


# StochasticGradient: training	


# current error = 5.3855606170359	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.3855606170359	
Epoch 8: 1838.4308679104	


# StochasticGradient: training	


# current error = 5.3781181681479	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.3781181681479	
Epoch 9: 1839.4745910168	


# StochasticGradient: training	


# current error = 5.3245020409265	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.3245020409265	
Epoch 10: 1820.8121240139	


# StochasticGradient: training	


# current error = 5.3180288904676	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.3180288904676	
Epoch 11: 1806.8632180691	


# StochasticGradient: training	


# current error = 5.2783573349033	


# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.2783573349033	
Epoch 12: 1801.1834139824	


# StochasticGradient: training	


# current error = 5.2723746036031	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.2723746036031	
Epoch 13: 1806.0853638649	


# StochasticGradient: training	


# current error = 5.2412188396928	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.2412188396928	
Epoch 14: 1801.4268000126	


# StochasticGradient: training	


# current error = 5.234915767834	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.234915767834	
Epoch 15: 1803.8201138973	


# StochasticGradient: training	


# current error = 5.2105918915854	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.2105918915854	
Epoch 16: 1804.5267899036	


# StochasticGradient: training	


# current error = 5.2061212614106	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.2061212614106	
Epoch 17: 1807.6521818638	


# StochasticGradient: training	


# current error = 5.1854116802198	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.1854116802198	
Epoch 18: 1804.7857210636	


# StochasticGradient: training	


# current error = 5.1802477525104	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.1802477525104	
Epoch 19: 1804.3937339783	


# StochasticGradient: training	


# current error = 5.163963303637	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.163963303637	
Epoch 20: 1804.8741080761	


In [137]:
--save
torch.save('nnlm1',nnlm)




In [75]:
-- Without regularisation:

trainer = nn.StochasticGradient(nnlm, criterion)
trainer.learningRate = 0.01
trainer.maxIteration = 5
trainer:train(dataset)

# StochasticGradient: training	


# current error = 4.3886357784844	


# current error = 4.1514940863395	


# current error = 4.0717284582465	


# current error = 4.0314867838883	


# current error = 4.00779912249	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 4.00779912249	


### Tentative de Validation:

In [212]:
--load:
nnlm = torch.load('../../nnlm1')

In [244]:
kag = data['valid_output']

In [246]:
kag = data['valid_output']
valid = data['valid']
valid_topredict = valid:narrow(2,1,50)
valid_input = valid:narrow(2,51,5)
preds_valid = nnlm:forward(valid_input)
kag_pred_valid = torch.Tensor(preds_valid:size(1),50)
for i = 1,preds_valid:size(1) do
    kag_pred_valid[i]:copy(preds_valid[i]:index(1,valid_topredict[i])):exp()
end
norm_mat_ = torch.zeros(preds_valid:size(1),50)
norm_mat_:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
kag_pred_valid:cdiv(norm_mat_)
perp = 0
for i = 1,preds_valid:size(1) do
    mm,aa = kag[i]:max(1)
    perp = perp + math.log(kag_pred_valid[i][aa[1]])
end
perp = math.exp(-perp/preds_valid:size(1))
print(perp)

In [248]:
preds_valid = nnlm:forward(valid_input)

In [249]:
kag_pred_valid = torch.Tensor(preds_valid:size(1),50)
for i = 1,preds_valid:size(1) do
    kag_pred_valid[i]:copy(preds_valid[i]:index(1,valid_topredict[i])):exp()
end

In [250]:
norm_mat_ = torch.zeros(preds_valid:size(1),50)
norm_mat_:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
kag_pred_valid:cdiv(norm_mat_)

In [251]:
acc = 0
for i = 1,preds_valid:size(1) do
    m,a = kag_pred_valid[i]:max(1)
    mm,aa = kag[i]:max(1)
    if aa[1] == a[1] then
        acc = acc + 1
    end
end
print(acc/3370.)

0.42581602373887	


In [138]:
function perplexity(distribution)
    local perp = torch.DoubleTensor(distribution:size(1), distribution:size(2))
    perp:copy(distribution)
    n = distribution:size(1) * distribution:size(2)
    return math.exp(-(1/n)* perp:log():sum())
end

In [252]:
perp = 0
for i = 1,preds_valid:size(1) do
    mm,aa = kag[i]:max(1)
    perp = perp + math.log(kag_pred_valid[i][aa[1]])
end
perp = math.exp(-perp/preds_valid:size(1))
print(perp)

26.309662260245	


In [253]:
kag_pred_valid[preds_valid:size(1)][aa[1]]

0.49076318656613	


In [156]:
dataset[1]

{
  1 : LongTensor - size: 1x5
  2 : LongTensor - size: 1
}


### Deep:

In [206]:
N = 6
dwin = N-1
hid1 = 30.
hid2 = 100
dnnlm = nn.Sequential()

LT = nn.Sequential()
LT:add(nn.LookupTable(nwords,hid1/dwin))
LT:add(nn.View(1,-1,hid1))
LT:add(nn.Squeeze()) 

dnnlm:add(LT)

concat = nn.ConcatTable()

lin_tanh = nn.Sequential()
lin_tanh:add(nn.Linear(hid1,hid2))
lin_tanh:add(nn.Tanh())

id = nn.Identity()

concat:add(lin_tanh)
concat:add(id)

dnnlm:add(concat)
dnnlm:add(nn.JoinTable(1))
dnnlm:add(nn.Linear(hid2+hid1, nwords))
dnnlm:add(nn.LogSoftMax())

In [207]:
t = train_input[8000]

In [208]:
tt = dnnlm:forward(t)

In [189]:
nn.JoinTable(1):forward(ttt):size()

 130
[torch.LongStorage of size 1]



In [None]:
criterion = nn.ClassNLLCriterion()

In [272]:
sq_mat:sum(2):max()

38.705445706632	


In [214]:
--with regularisation:

sq_mat = torch.zeros(10001,6)
norm_mat = torch.zeros(10001,6)


for i = 2, 15 do
    
    timer = torch.Timer()
    if i ~= 1 then
        sq_mat:copy(torch.pow(dnnlm:findModules('nn.LookupTable')[1].weight,2))
        norm_mat:copy(torch.expandAs(sq_mat:sum(2), sq_mat))
        nnlm:findModules('nn.LookupTable')[1].weight:cdiv(norm_mat)
    end
    
    trainer = nn.StochasticGradient(dnnlm, criterion)
    trainer.learningRate = 0.01
    trainer.maxIteration = 1
    trainer:train(dataset)
    print('Epoch '..i..': '..timer:time().real)
    
    
end

# StochasticGradient: training	


# current error = 6.0753078968396	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 6.0753078968396	
Epoch 2: 2378.5694739819	


# StochasticGradient: training	


# current error = 5.9075009795102	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.9075009795102	
Epoch 3: 2383.4352579117	


# StochasticGradient: training	


# current error = 5.7951605307499	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.7951605307499	
Epoch 4: 2360.0455019474	


# StochasticGradient: training	


# current error = 5.7137464722437	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.7137464722437	
Epoch 5: 2359.0405631065	


# StochasticGradient: training	


# current error = 5.6505833421658	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.6505833421658	
Epoch 6: 2364.4075038433	


# StochasticGradient: training	


# current error = 5.6016661823111	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.6016661823111	
Epoch 7: 2357.3720500469	


# StochasticGradient: training	


# current error = 5.5607049309392	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.5607049309392	
Epoch 8: 2360.9850211143	


# StochasticGradient: training	


# current error = 5.5264242272571	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.5264242272571	
Epoch 9: 2362.8160278797	


# StochasticGradient: training	


# current error = 5.4981025271048	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.4981025271048	
Epoch 10: 2359.8637549877	


# StochasticGradient: training	


# current error = 5.4717365583531	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.4717365583531	
Epoch 11: 2363.4240169525	


# StochasticGradient: training	


# current error = 5.4497181814615	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.4497181814615	
Epoch 12: 2363.7275400162	


# StochasticGradient: training	


# current error = 5.4319642996491	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.4319642996491	
Epoch 13: 4948.130671978	


# StochasticGradient: training	


# current error = 5.4142337359248	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.4142337359248	
Epoch 14: 2365.0248579979	


# StochasticGradient: training	


# current error = 5.3984644474863	
# StochasticGradient: you have reached the maximum number of iterations	
# training error = 5.3984644474863	
Epoch 15: 2369.2235422134	


In [254]:
kag_pred_valid = torch.Tensor(preds_valid:size(1),50)
for i = 1,preds_valid:size(1) do
    kag_pred_valid[i]:copy(dnnlm:forward(valid_input[i]):index(1,valid_topredict[i])):exp()
end

In [255]:
norm_mat_ = torch.zeros(preds_valid:size(1),50)
norm_mat_:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
kag_pred_valid:cdiv(norm_mat_)

In [228]:
acc = 0
for i = 1,preds_valid:size(1) do
    m,a = kag_pred_valid[i]:max(1)
    mm,aa = kag[i]:max(1)
    if aa[1] == a[1] then
        acc = acc + 1
    end
end
print(acc/3370.)

0.57833827893175	


In [258]:
CE = 0
for i = 1,preds_valid:size(1) do
    mm,aa = kag[i]:max(1)
    CE = CE + math.log(kag_pred_valid[i][aa[1]])
end

In [259]:
print('Perplexity is equal to: '.. math.exp(-CE/preds_valid:size(1)))

Perplexity is equal to: 7.3734290599113	


### Testing on test data:

In [238]:
test_data = data['test']
test_topredict = test_data:narrow(2,1,50)
test_input = test_data:narrow(2,51,5)


In [241]:
kag_pred_test = torch.Tensor(test_data:size(1),50)
for i = 1,test_data:size(1) do
    kag_pred_test[i]:copy(dnnlm:forward(test_input[i]):index(1,test_topredict[i])):exp()
end

In [242]:
norm_mat_ = torch.zeros(test_data:size(1),50)
norm_mat_:copy(torch.expandAs(kag_pred_test:sum(2), kag_pred_test))
kag_pred_test:cdiv(norm_mat_)

In [243]:
filename = 'tocsv_1.f5'
myFile = hdf5.open(filename, 'w')
myFile:write(filename, kag_pred_test)
myFile:close()  

### Trying with 4-grams and changing embeddings size:

In [2]:
myFile = hdf5.open('../../5-grams.hdf5','r')
data = myFile:all()
myFile:close()

In [3]:
train5 = data['train_nocounts']
train_input5 = train5:narrow(2,1,4)
train_output5 = train5:narrow(2,5,1)

In [11]:
dataset5={};
for i=1,train5:size(1) do 
  dataset5[i] = {train_input5[i]:view(1,4), train_output5[i]}
end
function dataset5:size() return train5:size(1) end -- 100 examples

In [13]:
kag5 = data['valid_output']
valid5 = data['valid']
valid_topredict5 = valid5:narrow(2,1,50)
valid_input5 = valid5:narrow(2,51,4)

In [42]:
nwords = 10001
N = 5
dwin = N-1
hid1 = 30.
hid2 = 80
dnnlm2 = nn.Sequential()

LT2 = nn.Sequential()
LT1 = nn.LookupTable(nwords,hid1)
LT2:add(LT1)
LT2:add(nn.View(1,-1,hid1*dwin))
LT2:add(nn.Squeeze()) 

dnnlm2:add(LT2)

concat2 = nn.ConcatTable()

lin_tanh2 = nn.Sequential()
lin_tanh2:add(nn.Linear(hid1*dwin,hid2))
lin_tanh2:add(nn.Tanh())

id2 = nn.Identity()

concat2:add(lin_tanh2)
concat2:add(id2)

dnnlm2:add(concat2)
dnnlm2:add(nn.JoinTable(1))
dnnlm2:add(nn.Linear(hid2+hid1*dwin, nwords))
dnnlm2:add(nn.LogSoftMax())

In [25]:
criterion = nn.ClassNLLCriterion()

In [26]:
kag_pred_valid5 = torch.Tensor(valid_input5:size(1),50)
norm_mat_5 = torch.zeros(valid_input5:size(1),50)
sq_mat = torch.zeros(10001,30)

val_res = torch.Tensor(10)

In [None]:
--with regularisation:

for i = 1, 1 do
    
    timer = torch.Timer()
    
    LT1.weight:renorm(2,2,1)
    
    trainer2 = nn.StochasticGradient(dnnlm2, criterion)
    trainer2.learningRate = 0.005
    trainer2.maxIteration = 1
    trainer2:train(dataset5)
    print('Epoch '..i..': '..timer:time().real)
    
    kag_pred_valid5:zero()

    for i = 1,valid5:size(1) do
        kag_pred_valid5[i]:copy(dnnlm2:forward(valid_input5[i]):index(1,valid_topredict5[i])):exp()
    end
    
    norm_mat_5:zero()
    norm_mat_5:copy(torch.expandAs(kag_pred_valid5:sum(2), kag_pred_valid5))
    kag_pred_valid5:cdiv(norm_mat_5)
    
    CE = 0
    for i = 1,kag_pred_valid5:size(1) do
        mm,aa = kag5[i]:max(1)
        CE = CE + math.log(kag_pred_valid5[i][aa[1]])
    end
    
    val_res[i] = math.exp(-CE/kag_pred_valid5:size(1))
    print('Perplexity on valid: '..val_res[i])
    
end

# StochasticGradient: training	


 0.7071  0.7071
 0.7071  0.7071
[torch.DoubleTensor of size 2x2]



### Using Optim

In [2]:
myFile = hdf5.open('../../5-grams.hdf5','r')
data = myFile:all()
myFile:close()

In [3]:
data

{
  train_nocounts : DoubleTensor - size: 887522x5
  train_1000 : DoubleTensor - size: 887522x5
  test : LongTensor - size: 3761x54
  train : LongTensor - size: 754037x6
  nwords : LongTensor - size: 1
  valid : LongTensor - size: 3370x54
  train_1000_nocounts : LongTensor - size: 620208x6
  valid_output : LongTensor - size: 3370x50
}


In [4]:
train5 = data['train_nocounts']
train_input5 = train5:narrow(2,1,4)
train_output5 = train5:narrow(2,5,1)

In [5]:
train_output5:max()

10001	


In [6]:
criterion = nn.ClassNLLCriterion()

In [7]:
nwords = 10001
N = 5
dwin = N-1
hid1 = 30.
hid2 = 80
dnnlm2 = nn.Sequential()

LT2 = nn.Sequential()
LT1 = nn.LookupTable(nwords,hid1)
LT2:add(LT1)
LT2:add(nn.View(1,-1,hid1*dwin))
LT2:add(nn.Squeeze()) 

dnnlm2:add(LT2)

concat2 = nn.ConcatTable()

lin_tanh2 = nn.Sequential()
lin_tanh2:add(nn.Linear(hid1*dwin,hid2))
lin_tanh2:add(nn.Tanh())

id2 = nn.Identity()

concat2:add(lin_tanh2)
concat2:add(id2)

dnnlm2:add(concat2)
dnnlm2:add(nn.JoinTable(2))
dnnlm2:add(nn.Linear(hid2+hid1*dwin, nwords))
dnnlm2:add(nn.LogSoftMax())

In [8]:
model = dnnlm2

In [9]:
param, gradparam = model:getParameters()
batchsize = 64

In [16]:
for e = 1,5 do
    timer = torch.Timer()
    avLoss = 0
    
    LT1.weight:renorm(2,2,1)
    
    for t = 1,train5:size(1),batchsize do
    -- for t = 1,1,1 do
        inputs = torch.zeros(math.min(batchsize,train5:size(1)-t+1),dwin)
        targets = torch.zeros(math.min(batchsize,train5:size(1)-t+1))
        k = 1

        for i = t,math.min(t+batchsize-1,train5:size(1)) do
            targets[k] = train_output5[i]
            inputs[k] = train_input5[i]
            k = k + 1
        end

        function feval(x)
            collectgarbage()

            if x ~= param then
                param:copy(x)
            end

            gradparam:zero()

            outputs = model:forward(inputs)
            L = criterion:forward(outputs,targets)
            dL = criterion:backward(outputs,targets)
            model:backward(inputs,dL)

            avLoss = avLoss + L

            return L,gradparam

        end

        sgdState = {learningRate = 0.005}

        optim.sgd(feval,param,sgdState)

    end
    print('Epoch '.. e ..'completed in (sec): '..timer:time().real)
    print('Average Loss on train: '.. avLoss/math.floor(train5:size(1)/batchsize))
end

Epoch 1completed in (sec): 575.05744695663	
Average Loss on train: 7.922858046091	


Epoch 2completed in (sec): 559.13870000839	
Average Loss on train: 6.9411545443104	


Epoch 3completed in (sec): 554.5067191124	
Average Loss on train: 6.7439821096093	


Epoch 4completed in (sec): 549.67982292175	
Average Loss on train: 6.6323669364111	


Epoch 5completed in (sec): 554.31065011024	
Average Loss on train: 6.5619067452224	


In [17]:
for e = 6,20 do
    timer = torch.Timer()
    avLoss = 0
    
    LT1.weight:renorm(2,2,1)
    
    for t = 1,train5:size(1),batchsize do
    -- for t = 1,1,1 do
        inputs = torch.zeros(math.min(batchsize,train5:size(1)-t+1),dwin)
        targets = torch.zeros(math.min(batchsize,train5:size(1)-t+1))
        k = 1

        for i = t,math.min(t+batchsize-1,train5:size(1)) do
            targets[k] = train_output5[i]
            inputs[k] = train_input5[i]
            k = k + 1
        end

        function feval(x)
            collectgarbage()

            if x ~= param then
                param:copy(x)
            end

            gradparam:zero()

            outputs = model:forward(inputs)
            L = criterion:forward(outputs,targets)
            dL = criterion:backward(outputs,targets)
            model:backward(inputs,dL)

            avLoss = avLoss + L

            return L,gradparam

        end

        sgdState = {learningRate = 0.005}

        optim.sgd(feval,param,sgdState)

    end
    print('Epoch '.. e ..'completed in (sec): '..timer:time().real)
    print('Average Loss on train: '.. avLoss/math.floor(train5:size(1)/batchsize))
end

Epoch 6completed in (sec): 549.36122202873	
Average Loss on train: 6.5131466153525	


Epoch 7completed in (sec): 551.05811500549	


Average Loss on train: 6.4747375418067	


Epoch 8completed in (sec): 550.92656683922	
Average Loss on train: 6.4417819511198	


Epoch 9completed in (sec): 551.57784795761	
Average Loss on train: 6.4115803367962	


Epoch 10completed in (sec): 554.43027997017	
Average Loss on train: 6.3828083099609	


Epoch 11completed in (sec): 626.0440788269	
Average Loss on train: 6.3552663748153	


Epoch 12completed in (sec): 553.7746090889	
Average Loss on train: 6.3291940073103	


Epoch 13completed in (sec): 574.50339698792	
Average Loss on train: 6.3046952082911	


Epoch 14completed in (sec): 549.67145800591	
Average Loss on train: 6.2816738423971	


Epoch 15completed in (sec): 552.563601017	
Average Loss on train: 6.259960740982	


Epoch 16completed in (sec): 545.59943294525	
Average Loss on train: 6.2394098589757	


Epoch 17completed in (sec): 545.86253595352	
Average Loss on train: 6.2199206628517	


Epoch 18completed in (sec): 546.13499903679	
Average Loss on train: 6.2014229691107	


Epoch 19completed in (sec): 545.99924588203	
Average Loss on train: 6.1838587273178	


Epoch 20completed in (sec): 545.85558891296	
Average Loss on train: 6.1671739215651	


In [19]:
kag = data['valid_output']
valid = data['valid']

In [21]:
valid_topredict = valid:narrow(2,1,50)
valid_input = valid:narrow(2,51,4)
preds_valid = model:forward(valid_input)

In [22]:
preds_valid = model:forward(valid_input)
kag_pred_valid = torch.Tensor(preds_valid:size(1),50)
for i = 1,preds_valid:size(1) do
    kag_pred_valid[i]:copy(preds_valid[i]:index(1,valid_topredict[i])):exp()
end
norm_mat_ = torch.zeros(preds_valid:size(1),50)
norm_mat_:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
kag_pred_valid:cdiv(norm_mat_)
perp = 0
for i = 1,preds_valid:size(1) do
    mm,aa = kag[i]:max(1)
    perp = perp + math.log(kag_pred_valid[i][aa[1]])
end
perp = math.exp(-perp/preds_valid:size(1))
print(perp)

7.2295951783231	


### 6-gram & optim:

In [29]:
myFile = hdf5.open('../../6-grams.hdf5','r')
data6 = myFile:all()
myFile:close()

train6 = data6['train_nocounts']
train_input6 = train6:narrow(2,1,5)
train_output6 = train6:narrow(2,5,1)

kag = data6['valid_output']
valid = data6['valid']

valid_topredict = valid:narrow(2,1,50)
valid_input = valid:narrow(2,51,5)

criterion = nn.ClassNLLCriterion()

nwords = 10001
N = 6
dwin = N-1
hid1 = 30.
hid2 = 100
dnnlm3 = nn.Sequential()

LT2_3 = nn.Sequential()
LT1_3 = nn.LookupTable(nwords,hid1)
LT2_3:add(LT1_3)
LT2_3:add(nn.View(1,-1,hid1*dwin))
LT2_3:add(nn.Squeeze()) 

dnnlm3:add(LT2_3)

concat2_3 = nn.ConcatTable()

lin_tanh2_3 = nn.Sequential()
lin_tanh2_3:add(nn.Linear(hid1*dwin,hid2))
lin_tanh2_3:add(nn.Tanh())

id2_3 = nn.Identity()

concat2_3:add(lin_tanh2_3)
concat2_3:add(id2_3)

dnnlm3:add(concat2_3)
dnnlm3:add(nn.JoinTable(2))
dnnlm3:add(nn.Linear(hid2+hid1*dwin, nwords))
dnnlm3:add(nn.LogSoftMax())

In [30]:
model2 = dnnlm3
param2, gradparam2 = model2:getParameters()
batchsize = 32

In [31]:
for e = 1,20 do
    timer = torch.Timer()
    avLoss = 0
    
    LT1_3.weight:renorm(2,2,1)
    
    for t = 1,train6:size(1),batchsize do
    -- for t = 1,1,1 do
        inputs = torch.zeros(math.min(batchsize,train6:size(1)-t+1),dwin)
        targets = torch.zeros(math.min(batchsize,train6:size(1)-t+1))
        k = 1

        for i = t,math.min(t+batchsize-1,train6:size(1)) do
            targets[k] = train_output6[i]
            inputs[k] = train_input6[i]
            k = k + 1
        end

        function feval(x)
            collectgarbage()

            if x ~= param2 then
                param2:copy(x)
            end

            gradparam2:zero()

            outputs = model2:forward(inputs)
            L = criterion:forward(outputs,targets)
            dL = criterion:backward(outputs,targets)
            model2:backward(inputs,dL)

            avLoss = avLoss + L

            return L,gradparam2

        end

        sgdState = {learningRate = 0.01}

        optim.sgd(feval,param2,sgdState)

    end
    print('Epoch '.. e ..'completed in (sec): '..timer:time().real)
    print('Average Loss on train: '.. avLoss/math.floor(train6:size(1)/batchsize))
    
--     preds_valid = model2:forward(valid_input)
--     kag_pred_valid = torch.Tensor(preds_valid:size(1),50)
--     for i = 1,preds_valid:size(1) do
--         kag_pred_valid[i]:copy(preds_valid[i]:index(1,valid_topredict[i])):exp()
--     end
--     norm_mat_ = torch.zeros(preds_valid:size(1),50)
--     norm_mat_:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
--     kag_pred_valid:cdiv(norm_mat_)
--     perp = 0
--     for i = 1,preds_valid:size(1) do
--         mm,aa = kag[i]:max(1)
--         perp = perp + math.log(kag_pred_valid[i][aa[1]])
--     end
--     perp = math.exp(-perp/preds_valid:size(1))
--     print('Perpelixty on validation: '..perp)
    
end

Epoch 1completed in (sec): 776.11843895912	
Average Loss on train: 5.6922936051666	


Perpelixty on validation: 26.189493239985	


Epoch 2completed in (sec): 753.64157891273	
Average Loss on train: 4.4914361966547	


Perpelixty on validation: 62.840384796315	


Epoch 3completed in (sec): 750.57159090042	


Average Loss on train: 4.119245002276	


Perpelixty on validation: 104.2369303411	


In [None]:
kag = data6['valid_output']
valid = data6['valid']

valid_topredict = valid:narrow(2,1,50)
valid_input = valid:narrow(2,51,5)

criterion = nn.ClassNLLCriterion()

nwords = 10001
N = 6
dwin = N-1
hid1 = 30.
hid2 = 100
dnnlm4 = nn.Sequential()

LT2_4 = nn.Sequential()
LT1_4 = nn.LookupTable(nwords,hid1)
LT2_4:add(LT1_4)
LT2_4:add(nn.View(1,-1,hid1*dwin))
LT2_4:add(nn.Squeeze()) 

dnnlm4:add(LT2_4)

concat2_4 = nn.ConcatTable()

lin_tanh2_4 = nn.Sequential()
lin_tanh2_4:add(nn.Linear(hid1*dwin,hid2))
lin_tanh2_4:add(nn.Tanh())

id2_4 = nn.Identity()

concat2_4:add(lin_tanh2_4)
concat2_4:add(id2_4)

dnnlm4:add(concat2_4)
dnnlm4:add(nn.JoinTable(2))
dnnlm4:add(nn.Linear(hid2+hid1*dwin, nwords))
dnnlm4:add(nn.LogSoftMax())

In [None]:
model3 = dnnlm4
param3, gradparam3 = model3:getParameters()
batchsize = 32

In [None]:
for e = 1,20 do
    timer = torch.Timer()
    avLoss = 0
    
    LT1_4.weight:renorm(2,2,1)
    
    for t = 1,train6:size(1),batchsize do
    -- for t = 1,1,1 do
        inputs = torch.zeros(math.min(batchsize,train6:size(1)-t+1),dwin)
        targets = torch.zeros(math.min(batchsize,train6:size(1)-t+1))
        k = 1

        for i = t,math.min(t+batchsize-1,train6:size(1)) do
            targets[k] = train_output6[i]
            inputs[k] = train_input6[i]
            k = k + 1
        end

        function feval(x)
            collectgarbage()

            if x ~= param3 then
                param3:copy(x)
            end

            gradparam3:zero()

            outputs = model3:forward(inputs)
            L = criterion:forward(outputs,targets)
            dL = criterion:backward(outputs,targets)
            model3:backward(inputs,dL)

            avLoss = avLoss + L

            return L,gradparam3

        end

        sgdState = {learningRate = 0.01}

        optim.sgd(feval,param3,sgdState)

    end
    print('Epoch '.. e ..'completed in (sec): '..timer:time().real)
    print('Average Loss on train: '.. avLoss/math.floor(train6:size(1)/batchsize))
    
--     preds_valid = model3:forward(valid_input)
--     kag_pred_valid = torch.Tensor(preds_valid:size(1),50)
--     for i = 1,preds_valid:size(1) do
--         kag_pred_valid[i]:copy(preds_valid[i]:index(1,valid_topredict[i])):exp()
--     end
--     norm_mat_ = torch.zeros(preds_valid:size(1),50)
--     norm_mat_:copy(torch.expandAs(kag_pred_valid:sum(2), kag_pred_valid))
--     kag_pred_valid:cdiv(norm_mat_)
--     perp = 0
--     for i = 1,preds_valid:size(1) do
--         mm,aa = kag[i]:max(1)
--         perp = perp + math.log(kag_pred_valid[i][aa[1]])
--     end
--     perp = math.exp(-perp/preds_valid:size(1))
--     print('Perpelixty on validation: '..perp)
    
end