In [1]:
-- setup some useful stuff
require 'nn'
require 'cunn'

-- setting the random generator seed
torch.manualSeed(42)

-- default tensor type
torch.setdefaulttensortype('torch.FloatTensor')

-- plotting
Plot = require 'itorch.Plot'

-- help function to print in green
function cprint(str)  print(sys.COLORS.green..str..'\27[0m') end
function dprint(item, debug) if debug then print(item) end end

-- tests
precision = 1e-4
tester = torch.Tester()
function runTest(test)
    tester:add(test)
    tester:run()
    tester = torch.Tester()
end

##Lookup Table

In [2]:
lookupTable = function(vocabSize, embeddingSize)
    -- module to build
    local this = {}
    
    -- standard deviation for initialization
    local stdv =  1./math.sqrt(embeddingSize)
    
    -- weight matrix
    this.weight = torch.Tensor(vocabSize, embeddingSize):uniform(-stdv, stdv)
    
    -- forward operation
    this.forward = function(input)
        -- quirk to make it work with vectors and matrices
        input = (input:dim() == 1) and input:reshape(1, input:size(1)) or input
        -- how many words? how many samples?
        local batchSize = input:size(1)
        local contextSize = input:size(2)
        -- view matrix as vector to be used as index
        local vectorView = torch.view(input:long(), -1)
        local output = this.weight:index(1, vectorView)
        return torch.view(output, batchSize, contextSize, embeddingSize)
    end
    
    return this
end

-- test
runTest(function()
    print()
    -- 5 words with embeddings of size 3
    local dictionary = lookupTable(5,3)
    local lkt = nn.LookupTable(5,3)
    dictionary.weight = torch.FloatTensor():range(1,15):reshape(5,3)
    lkt.weight = torch.FloatTensor():range(1,15):reshape(5,3)
    cprint('LookupTable weight is now:')
    print(dictionary.weight)
    cprint('That\'s right, LookupTable has no bias!\n')
    tester:assert(dictionary.bias == nil)

    local inputVector = torch.FloatTensor{1,3}
    cprint('Testing input vector is:')
    print(inputVector)
    --local expected = torch.Tensor{{{1,2,3},{7,8,9}}}
    local expected = lkt:forward(inputVector)
    cprint('Expected output is:')
    print(expected)
    local output = dictionary.forward(inputVector)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)

    local inputMatrix = torch.FloatTensor{{1,3},{2,4},{3,5}}
    cprint('Testing input matrix is:')
    print(inputMatrix)
    --local expected = torch.Tensor{{1,2,3},{7,8,9},{4,5,6},{10,11,12}}
    local expected = lkt:forward(inputMatrix)
    cprint('Expected output is:')
    print(expected)
    local output = dictionary.forward(inputMatrix)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
end)

Running 1 tests	
_|  ==> unknown
[0;32mLookupTable weight is now:[0m	
  1   2   3
  4   5   6
  7   8   9
 10  11  12
 13  14  15
[torch.FloatTensor of dimension 5x3]

[0;32mThat's right, LookupTable has no bias!
[0m	
[0;32mTesting input vector is:[0m	
 1
 3
[torch.FloatTensor of dimension 2]

[0;32mExpected output is:[0m	
 1  2  3
 7  8  9
[torch.FloatTensor of dimension 2x3]

[0;32mActual output is:[0m	
(1,.,.) = 
  1  2  3
  7  8  9
[torch.FloatTensor of dimension 1x2x3]

[0;32mTesting input matrix is:[0m	


 1  3
 2  4
 3  5
[torch.FloatTensor of dimension 3x2]

[0;32mExpected output is:[0m	
(1,.,.) = 
   1   2   3
   7   8   9

(2,.,.) = 
   4   5   6
  10  11  12

(3,.,.) = 
   7   8   9
  13  14  15
[torch.FloatTensor of dimension 3x2x3]

[0;32mActual output is:[0m	
(1,.,.) = 
   1   2   3
   7   8   9

(2,.,.) = 
   4   5   6
  10  11  12

(3,.,.) = 
   7   8   9
  13  14  15
[torch.FloatTensor of dimension 3x2x3]



              _  ==> Done 

Completed 3 asserts in 1 tests with 0 errors	

--------------------------------------------------------------------------------	


##Concatenate


In [3]:
-- input:  batchSize x contextSize x embeddingSize tensor
-- output: (contextSize * embeddingSize) x batchSize matrix
concatenate = function()
    local this = {}
    this.forward = function(input)
        --local dim = (input:size(1) == 1) and 1 or input:dim()-1
        local batchSize = input:size(1)
        local contextSize = input:size(2)
        local embeddingSize = input:size(3)
        return torch.view(input, batchSize, contextSize*embeddingSize):t()
    end
    return this
end

-- test
runTest(function()
    print()
    local reshaper = concatenate()
    local inputTensor = torch.Tensor{{{1,2,3},{4,5,6}},{{7,8,9},{10,11,12}}}
    cprint('Testing input tensor is:')
    print(inputTensor)
    local expected = torch.Tensor{{1,2,3,4,5,6},{7,8,9,10,11,12}}:t()
    cprint('Expected output is:')
    print(expected)
    local output = reshaper.forward(inputTensor)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
--[[
    local inputMatrix = torch.Tensor{{1,2,3},{2,3,4},{3,4,5}}
    cprint('Testing input matrix is:')
    print(inputMatrix)
    local expected = torch.Tensor{{1,2,3,4,5,6}}:t()
    cprint('Expected output is:')
    print(expected)
    local output = reshaper.forward(inputMatrix)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
--]]
end)


Running 1 tests	
_|  ==> unknown
[0;32mTesting input tensor is:[0m	
(1,.,.) = 
   1   2   3
   4   5   6

(2,.,.) = 
   7   8   9
  10  11  12
[torch.FloatTensor of dimension 2x2x3]

[0;32mExpected output is:[0m	
  1   7
  2   8
  3   9
  4  10
  5  11
  6  12
[torch.FloatTensor of dimension 6x2]

[0;32mActual output is:[0m	
  1   7
  2   8
  3   9
  4  10
  5  11
  6  12
[torch.FloatTensor of dimension 6x2]



              _  ==> Done 

Completed 1 asserts in 1 tests with 0 errors	


##Linear Module

In [4]:
linearModule = function(inputSize, outputSize)
    
    -- module to build
    local this = {}
    
    -- standard deviation for initialization
    local stdv =  1./math.sqrt(outputSize)
    
    -- weight matrix
    this.weight = torch.Tensor(outputSize, inputSize):uniform(-stdv, stdv)
    
    -- bias vector
    this.bias = torch.Tensor(outputSize, 1):uniform(-stdv, stdv)
    
    -- forward operation
    this.forward = function(input)
        -- quirk to make it work with vectors and matrices
        input = (input:dim() == 1) and input:reshape(input:size(1), 1) or input
        -- multiply the input and weight matrix
        local output = this.weight * input
        -- add the expanded bias vector and return
        return output + this.bias:expand(output:size())
    end

    return this
end

-- test
runTest(function()
    print()
    -- takes N inputs of size 2 and produces N outputs of size 3
    local layer = linearModule(2,3)
    layer.weight:fill(2)
    layer.bias:fill(1)
    cprint('LinearModule weight is now:')
    print(layer.weight)
    cprint('LinearModule bias is now:')
    print(layer.bias)

    local inputVector = torch.Tensor{{1,2}}:t()
    cprint('Testing input vector is:')
    print(inputVector)
    local expected = torch.Tensor{{7,7,7}}:t()
    cprint('Expected output is:')
    print(expected)
    local output = layer.forward(inputVector)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)

    local inputMatrix = torch.Tensor{{1, 2}, {3,4}}:t()
    cprint('Testing input matrix is:')
    print(inputMatrix)
    local expected = torch.Tensor{{7,7,7}, {15,15,15}}:t()
    cprint('Expected output is:')
    print(expected)
    local output = layer.forward(inputMatrix)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
end)

Running 1 tests	
_|  ==> unknown
[0;32mLinearModule weight is now:[0m	
 2  2
 2  2
 2  2
[torch.FloatTensor of dimension 3x2]

[0;32mLinearModule bias is now:[0m	
 1
 1
 1
[torch.FloatTensor of dimension 3x1]

[0;32mTesting input vector is:[0m	
 1
 2
[torch.FloatTensor of dimension 2x1]

[0;32mExpected output is:[0m	
 7
 7
 7
[torch.FloatTensor of dimension 3x1]



[0;32mActual output is:[0m	
 7
 7
 7
[torch.FloatTensor of dimension 3x1]

[0;32mTesting input matrix is:[0m	
 1  3
 2  4
[torch.FloatTensor of dimension 2x2]

[0;32mExpected output is:[0m	
  7  15
  7  15
  7  15
[torch.FloatTensor of dimension 3x2]

[0;32mActual output is:[0m	
  7  15
  7  15
  7  15
[torch.FloatTensor of dimension 3x2]



              _  ==> Done 

Completed 2 asserts in 1 tests with 0 errors	

--------------------------------------------------------------------------------	


##Sigmoid
\begin{equation*}
    Sigmoid(x_i) = \frac{1}{1 + e^{-x_i}}
\end{equation*}

In [5]:
sigmoid = function()
    local this = {}
    this.forward = function(input)
        return torch.exp(input):pow(-1):add(1):pow(-1)
    end
    return this
end

--plot
local max = 7
local step = 1e-2
local x = torch.FloatTensor():range(-max, max, step)
local y = sigmoid().forward(x)
Plot():line(x, y):title('Sigmoid'):draw()

-- test
runTest(function()
    print()
    local inputVector = torch.FloatTensor():range(1,3)
    cprint('Testing input vector is:')
    print(inputVector)
    local expected = nn.Sigmoid():forward(inputVector)
    cprint('Expected output is:')
    print(expected)
    --local output = sigmoid().forward(inputVector)
    cprint('Actual output is:')
    --print(output)
    --tester:assertTensorEq(expected, output, precision)
        
    local inputMatrix = torch.FloatTensor():range(1,9):reshape(3,3)
    cprint('Testing input matrix is:')
    print(inputMatrix)
    local expected = nn.Sigmoid():forward(inputMatrix)
    cprint('Expected output is:')
    print(expected)
    --local output = sigmoid().forward(inputMatrix)
    cprint('Actual output is:')
    --print(output)
    --tester:assertTensorEq(expected, output, precision)
end)



Running 1 tests	
_|  ==> unknown
[0;32mTesting input vector is:[0m	
 1
 2
 3
[torch.FloatTensor of dimension 3]

[0;32mExpected output is:[0m	
 0.7311
 0.8808
 0.9526
[torch.FloatTensor of dimension 3]

[0;32mActual output is:[0m	
[0;32mTesting input matrix is:[0m	
 1  2  3
 4  5  6
 7  8  9
[torch.FloatTensor of dimension 3x3]

[0;32mExpected output is:[0m	


 0.7311  0.8808  0.9526
 0.9820  0.9933  0.9975
 0.9991  0.9997  0.9999
[torch.FloatTensor of dimension 3x3]

[0;32mActual output is:[0m	


              _  ==> Done 

Completed 0 asserts in 1 tests with 0 errors	

--------------------------------------------------------------------------------	


## LogSoftMax
\begin{equation*}
   LogSoftMax(x_i) = -\ln \Bigl(\frac{1}{e^{x_i}} \sum_j e^{x_j}\Bigr)
\end{equation*}

In [6]:
logSoftMax = function(input)
    local this = {}
    this.forward = function(input)
        -- quirk to make it work with vectors and matrices
        local inp = (input:dim() == 1) and input:reshape(1, input:size(1)) or input
    
        -- calculate sum of e^x_i and expand it to the right size
        local sumOfExp = torch.exp(inp):sum(2):expand(inp:size())
    
        -- calculate the rest of the formula and return
        local ret = torch.exp(inp):pow(-1):cmul(sumOfExp):log():mul(-1)
        return (input:dim() == 1) and ret:t():view(-1) or ret
    end
    return this
end

-- test
runTest(function()
    print()
    input = torch.range(1,12)
    cprint('Tensting input vector is:')
    print(input)
    local lms = nn.LogSoftMax()
    local expected = lms:forward(input)
    cprint('Expected output is:')
    print(expected)
    local output = logSoftMax().forward(input)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
        
    input = torch.range(1,12):reshape(3,4)
    cprint('Tensting input matrix is:')
    print(input)
    local lms = nn.LogSoftMax()
    local expected = lms:forward(input)
    cprint('Expected output is:')
    print(expected)
    local output = logSoftMax().forward(input)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
end)

--plot
local max = 10
local step = 1
local x = torch.Tensor():range(-max, max, step)
local y = logSoftMax().forward(x)
Plot():line(x, y):title('LogSoftMax'):draw()

Running 1 tests	
_|  ==> unknown
[0;32mTensting input vector is:[0m	
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
[torch.FloatTensor of dimension 12]



[0;32mExpected output is:[0m	
-11.4588
-10.4588
 -9.4588
 -8.4588
 -7.4588
 -6.4588
 -5.4588
 -4.4588
 -3.4588
 -2.4588
 -1.4588
 -0.4588
[torch.FloatTensor of dimension 12]



[0;32mActual output is:[0m	
-11.4587
-10.4587
 -9.4587
 -8.4587
 -7.4587
 -6.4587
 -5.4587
 -4.4587
 -3.4587
 -2.4587
 -1.4587
 -0.4587
[torch.FloatTensor of dimension 12]

[0;32mTensting input matrix is:[0m	
  1   2   3   4
  5   6   7   8
  9  10  11  12
[torch.FloatTensor of dimension 3x4]



[0;32mExpected output is:[0m	
-3.4402 -2.4402 -1.4402 -0.4402
-3.4402 -2.4402 -1.4402 -0.4402
-3.4402 -2.4402 -1.4402 -0.4402
[torch.FloatTensor of dimension 3x4]

[0;32mActual output is:[0m	
-3.4402 -2.4402 -1.4402 -0.4402
-3.4402 -2.4402 -1.4402 -0.4402
-3.4402 -2.4402 -1.4402 -0.4402
[torch.FloatTensor of dimension 3x4]



              _  ==> Done 

Completed 2 asserts in 1 tests with 0 errors	

--------------------------------------------------------------------------------	


##Negative Log-Likelihood

\begin{equation*}
    C = -\ln a^L_y
\end{equation*}

In [7]:
negativeLogLikelihood = function()
    local this = {}
    this.forward = function(input, class)
        return input:log():mul(-1)[class]
    end
    return this
end

-- test

##Sequential Model

In [8]:
sequentialModel = function()
    local this = {}
    this.modules = {}
    -- add a module to the existing chain
    this.add = function(mod)
        table.insert(this.modules, mod)
    end
    -- forward the input through the whole network
    this.forward = function(input, debug)
        local output = input
        for _,mod in ipairs(this.modules) do
            output = mod.forward(output)
            if debug then print(output) end
        end
        return output
    end
    return this
end

## Fixed-context Language Model

In [None]:
-- network parameters
local vocabSize = 7
local embeddingSize = 2
local contextLength = 3
local hiddenSize = 12

torch.setdefaulttensortype('torch.FloatTensor')

makeNNLM = function(vocabSize, embeddingSize, contextLength, hiddenSize, debug)

    -- Index To Embedding
    local lookupTable = lookupTable(vocabSize, embeddingSize)
    dprint('LookupTable of '..vocabSize..' words, each is a vector of size '..embeddingSize, debug)
    dprint(lookupTable.weight, debug)

    -- Concatenate Context
    local reshaper = concatenate()

    -- Context To Hidden
    local contextToHidden = linearModule(contextLength * embeddingSize, hiddenSize)
    dprint('Context-To-Hidden matrix is:', debug)
    dprint(contextToHidden.weight, debug)
    dprint(contextToHidden.bias, debug)

    -- Hidden To Embedding
    local hiddenToEmbedding = linearModule(hiddenSize, embeddingSize)
    dprint('Hidden-To-Embedding matrix is:', debug)
    dprint(hiddenToEmbedding.weight, debug)
    dprint(hiddenToEmbedding.bias, debug)

    -- Embedding To Vocabulary
    local embeddingToVocabulary = linearModule(embeddingSize, vocabSize)
    dprint('Embedding-To-Vocabulary matrix is:', debug)
    dprint(embeddingToVocabulary.weight, debug)
    dprint(embeddingToVocabulary.bias, debug)

    -- NonLinearity
    local nonLinearity = sigmoid()

    -- Softmax
    local softMax = logSoftMax()

    -- Network
    network = sequentialModel()
    network.add(lookupTable)
    network.add(reshaper)
    network.add(contextToHidden)
    network.add(nonLinearity)
    network.add(hiddenToEmbedding)
    network.add(embeddingToVocabulary)
    network.add(softMax)
    
    return network
end

nnlm = makeNNLM(vocabSize, embeddingSize, contextLength, hiddenSize, true)

-- Forward
cprint('Vector forward')
local input = torch.Tensor{1,3,5}
network.forward(input, true)

cprint('Matrix forward')
local inputMatrix = torch.Tensor{{1,3,5},{2,4,6},{3,5,7}}
network.forward(inputMatrix, true)

##GPU Processing and Benchmarking

In [None]:
-- network parameters
local vocabSize = 1e4
local embeddingSize = 256
local contextLength = 10
local hiddenSize = 1024
local nBatches = 1e2
local batchSize = 1e2

-- create random input data
local randomIndexes = torch.FloatTensor():rand(contextLength * batchSize * nBatches)
randomIndexes = randomIndexes:mul(vocabSize):add(1):int():reshape(nBatches, batchSize, contextLength)

-- benchmark
benchmarkModel = function(model, data)
    sys.tic()
    local nBatches = data:size(1)
    for i=1,nBatches do
        model.forward(data[i])
    end
    local totalTime = sys.toc()
    print(string.format('nBatches:\t%s', data:size(1)))
    print(string.format('batchSize:\t%s', data:size(2)))
    print(string.format('Total time:\t%.3f s', totalTime))
    print(string.format('Avg time:\t%.3f ms', 1e3*totalTime/data:size(1)))
    return totalTime
end

-- tell the model to use DoubleTensor and run on CPU
torch.setdefaulttensortype('torch.DoubleTensor')

-- model using doubles
cprint('\nBenchmaking DoubleTensor model')
doubleModel = makeNNLM(vocabSize, embeddingSize, contextLength, hiddenSize, false)
local timeDouble = benchmarkModel(doubleModel, randomIndexes)

-- the following line transforms our CPU models into a GPU ones
torch.setdefaulttensortype('torch.CudaTensor')

-- model using GPU
cprint('\nBenchmaking CudaTensor model')
cudaModel = makeNNLM(vocabSize, embeddingSize, contextLength, hiddenSize, false)
local timeCuda = benchmarkModel(cudaModel, randomIndexes)

-- report speedup
local speedUp = timeDouble/timeCuda
cprint(string.format('\nUsing CudaTensor yielded a speedup of %.2fx', speedUp))


[0;32m
Benchmaking DoubleTensor model[0m	
