In [27]:
-- setup some useful stuff
require 'nn'

-- setting the random generator seed
torch.manualSeed(42)

-- plotting
Plot = require 'itorch.Plot'

-- help function to print in green
function cprint(str) print(sys.COLORS.green..str..'\27[0m') end

-- tests
precision = 1e-4
tester = torch.Tester()
function runTest(test)
    tester:add(test)
    tester:run()
    tester = torch.Tester()
end

##Lookup Table

In [78]:
lookupTable = function(vocabSize, embeddingSize)
    -- module to build
    local this = {}
    
    -- standard deviation for initialization
    local stdv =  1./math.sqrt(embeddingSize)
    
    -- weight matrix
    this.weight = torch.Tensor(vocabSize, embeddingSize):uniform(-stdv, stdv)
    
    -- forward operation
    this.forward = function(input)
        -- quirk to make it work with vectors and matrices
        input = (input:dim() == 1) and input:reshape(1, input:size(1)) or input
        -- how many words? how many samples?
        local batchSize = input:size(1)
        local contextSize = input:size(2)
        -- view matrix as vector to be used as index
        local vectorView = torch.view(input:long(), -1)
        local output = this.weight:index(1, vectorView)
        return torch.view(output, batchSize, contextSize, embeddingSize)
    end
    
    return this
end

-- test
runTest(function()
    print()
    -- 5 words with embeddings of size 3
    local dictionary = lookupTable(5,3)
    dictionary.weight = torch.range(1,15):reshape(5,3)
    cprint('LookupTable weight is now:')
    print(dictionary.weight)
    cprint('That\'s right, LookupTable has no bias!\n')
    tester:assert(dictionary.bias == nil)

    local inputVector = torch.Tensor{1,3}
    cprint('Testing input vector is:')
    print(inputVector)
    local expected = torch.Tensor{{{1,2,3},{7,8,9}}}
    cprint('Expected output is:')
    print(expected)
    local output = dictionary.forward(inputVector)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)

    local inputMatrix = torch.Tensor{{1,3},{2,4}}
    cprint('Testing input matrix is:')
    print(inputMatrix)
    local expected = torch.Tensor{{1,2,3},{7,8,9},{4,5,6},{10,11,12}}
    cprint('Expected output is:')
    print(expected)
    local output = dictionary.forward(inputMatrix)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
end)

Running 1 tests	
_|  ==> unknown
[0;32mLookupTable weight is now:[0m	
  1   2   3
  4   5   6
  7   8   9
 10  11  12
 13  14  15
[torch.DoubleTensor of dimension 5x3]

[0;32mThat's right, LookupTable has no bias!
[0m	
[0;32mTesting input vector is:[0m	
 1
 3
[torch.DoubleTensor of dimension 2]

[0;32mExpected output is:[0m	
(1,.,.) = 
  1  2  3
  7  8  9
[torch.DoubleTensor of dimension 1x2x3]

[0;32mActual output is:[0m	
(1,.,.) = 
  1  2  3
  7  8  9
[torch.DoubleTensor of dimension 1x2x3]

[0;32mTesting input matrix is:[0m	
 1  3
 2  4
[torch.DoubleTensor of dimension 2x2]

[0;32mExpected output is:[0m	
  1   2   3
  7   8   9
  4   5   6
 10  11  12
[torch.DoubleTensor of dimension 4x3]

[0;32mActual output is:[0m	
(1,.,.) = 
   1   2   3
   7   8   9

(2,.,.) = 
   4   5   6
  10  11  12
[torch.DoubleTensor of dimension 2x2x3]



              _  ==> Done 

Completed 3 asserts in 1 tests with 0 errors	

--------------------------------------------------------------------------------	


##Concatenate


In [106]:
-- input:  batchSize x contextSize x embeddingSize tensor
-- output: (contextSize * embeddingSize) x batchSize matrix
concatenate = function()
    local this = {}
    this.forward = function(input)
        local dim = (input:size(1) == 1) and 1 or input:dim()-1
        return torch.view(input, dim, -1):t()
    end
    return this
end

-- test
runTest(function()
    print()
    local reshaper = concatenate()
    local inputTensor = torch.Tensor{{{1,2,3},{4,5,6}},{{7,8,9},{10,11,12}}}
    cprint('Testing input tensor is:')
    print(inputTensor)
    local expected = torch.Tensor{{1,2,3,4,5,6},{7,8,9,10,11,12}}:t()
    cprint('Expected output is:')
    print(expected)
    local output = reshaper.forward(inputTensor)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)

    local inputMatrix = torch.Tensor{{1,2,3},{4,5,6}}
    cprint('Testing input matrix is:')
    print(inputMatrix)
    local expected = torch.Tensor{{1,2,3,4,5,6}}:t()
    cprint('Expected output is:')
    print(expected)
    local output = reshaper.forward(inputMatrix)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)

end)


Running 1 tests	
_|  ==> unknown
[0;32mTesting input tensor is:[0m	
(1,.,.) = 
   1   2   3
   4   5   6

(2,.,.) = 
   7   8   9
  10  11  12
[torch.DoubleTensor of dimension 2x2x3]

[0;32mExpected output is:[0m	
  1   7
  2   8
  3   9
  4  10
  5  11
  6  12
[torch.DoubleTensor of dimension 6x2]

[0;32mActual output is:[0m	
  1   7
  2   8
  3   9
  4  10
  5  11
  6  12
[torch.DoubleTensor of dimension 6x2]

[0;32mTesting input matrix is:[0m	
 1  2  3
 4  5  6
[torch.DoubleTensor of dimension 2x3]

[0;32mExpected output is:[0m	
 1
 2
 3
 4
 5
 6
[torch.DoubleTensor of dimension 6x1]

[0;32mActual output is:[0m	
 1
 2
 3
 4
 5
 6
[torch.DoubleTensor of dimension 6x1]



              _  ==> Done 

Completed 2 asserts in 1 tests with 0 errors	

--------------------------------------------------------------------------------	


##Linear Module

In [107]:
linearModule = function(inputSize, outputSize)
    
    -- module to build
    local this = {}
    
    -- standard deviation for initialization
    local stdv =  1./math.sqrt(outputSize)
    
    -- weight matrix
    this.weight = torch.Tensor(outputSize, inputSize):uniform(-stdv, stdv)
    
    -- bias vector
    this.bias = torch.Tensor(outputSize, 1):uniform(-stdv, stdv)
    
    -- forward operation
    this.forward = function(input)
        -- quirk to make it work with vectors and matrices
        input = (input:dim() == 1) and input:reshape(input:size(1), 1) or input
        -- multiply the input and weight matrix
        local output = this.weight * input
        -- add the expanded bias vector and return
        return output + this.bias:expand(output:size())
    end

    return this
end

-- test
runTest(function()
    print()
    -- takes N inputs of size 2 and produces N outputs of size 3
    local layer = linearModule(2,3)
    layer.weight:fill(2)
    layer.bias:fill(1)
    cprint('LinearModule weight is now:')
    print(layer.weight)
    cprint('LinearModule bias is now:')
    print(layer.bias)

    local inputVector = torch.Tensor{{1,2}}:t()
    cprint('Testing input vector is:')
    print(inputVector)
    local expected = torch.Tensor{{7,7,7}}:t()
    cprint('Expected output is:')
    print(expected)
    local output = layer.forward(inputVector)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)

    local inputMatrix = torch.Tensor{{1, 2}, {3,4}}:t()
    cprint('Testing input matrix is:')
    print(inputMatrix)
    local expected = torch.Tensor{{7,7,7}, {15,15,15}}:t()
    cprint('Expected output is:')
    print(expected)
    local output = layer.forward(inputMatrix)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
end)

Running 1 tests	
_|  ==> unknown
[0;32mLinearModule weight is now:[0m	
 2  2
 2  2
 2  2
[torch.DoubleTensor of dimension 3x2]

[0;32mLinearModule bias is now:[0m	
 1
 1
 1
[torch.DoubleTensor of dimension 3x1]

[0;32mTesting input vector is:[0m	
 1
 2
[torch.DoubleTensor of dimension 2x1]

[0;32mExpected output is:[0m	
 7
 7
 7
[torch.DoubleTensor of dimension 3x1]

[0;32mActual output is:[0m	
 7
 7
 7
[torch.DoubleTensor of dimension 3x1]

[0;32mTesting input matrix is:[0m	
 1  3
 2  4
[torch.DoubleTensor of dimension 2x2]

[0;32mExpected output is:[0m	
  7  15
  7  15
  7  15
[torch.DoubleTensor of dimension 3x2]

[0;32mActual output is:[0m	
  7  15
  7  15
  7  15
[torch.DoubleTensor of dimension 3x2]



              _  ==> Done 

Completed 2 asserts in 1 tests with 0 errors	

--------------------------------------------------------------------------------	


##Sigmoid
\begin{equation*}
    Sigmoid(x_i) = \frac{1}{1 + e^{-x_i}}
\end{equation*}

In [21]:
sigmoid = function()
    local this = {}
    this.forward = function(input)
        return torch.exp(input):pow(-1):add(1):pow(-1)
    end
    return this
end

--plot
local max = 7
local step = 1e-2
local x = torch.Tensor():range(-max, max, step)
local y = sigmoid().forward(x)
Plot():line(x, y):title('Sigmoid'):draw()

-- test
runTest(function()
    print()
    local inputVector = torch.range(1,3)
    cprint('Testing input vector is:')
    print(inputVector)
    local expected = nn.Sigmoid():forward(inputVector)
    cprint('Expected output is:')
    print(expected)
    local output = sigmoid().forward(inputVector)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
        
    local inputMatrix = torch.range(1,9):reshape(3,3)
    cprint('Testing input matrix is:')
    print(inputMatrix)
    local expected = nn.Sigmoid():forward(inputMatrix)
    cprint('Expected output is:')
    print(expected)
    local output = sigmoid().forward(inputMatrix)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
end)



Running 1 tests	
_|  ==> unknown
[0;32mTesting input vector is:[0m	
 1
 2
 3
[torch.DoubleTensor of dimension 3]

[0;32mExpected output is:[0m	
 0.7311
 0.8808
 0.9526
[torch.DoubleTensor of dimension 3]

[0;32mActual output is:[0m	
 0.7311
 0.8808
 0.9526
[torch.DoubleTensor of dimension 3]



[0;32mTesting input matrix is:[0m	
 1  2  3
 4  5  6
 7  8  9
[torch.DoubleTensor of dimension 3x3]

[0;32mExpected output is:[0m	
 0.7311  0.8808  0.9526
 0.9820  0.9933  0.9975
 0.9991  0.9997  0.9999
[torch.DoubleTensor of dimension 3x3]

[0;32mActual output is:[0m	
 0.7311  0.8808  0.9526
 0.9820  0.9933  0.9975
 0.9991  0.9997  0.9999
[torch.DoubleTensor of dimension 3x3]



              _  ==> Done 

Completed 2 asserts in 1 tests with 0 errors	

--------------------------------------------------------------------------------	


## LogSoftMax
\begin{equation*}
   LogSoftMax(x_i) = -\ln \Bigl(\frac{1}{e^{x_i}} \sum_j e^{x_j}\Bigr)
\end{equation*}

In [115]:
logSoftMax = function(input)
    local this = {}
    this.forward = function(input)
        -- quirk to make it work with vectors and matrices
        local inp = (input:dim() == 1) and input:reshape(1, input:size(1)) or input
    
        -- calculate sum of e^x_i and expand it to the right size
        local sumOfExp = torch.exp(inp):sum(2):expand(inp:size())
    
        -- calculate the rest of the formula and return
        local ret = torch.exp(inp):pow(-1):cmul(sumOfExp):log():mul(-1)
        return (input:dim() == 1) and ret:t():view(-1) or ret
    end
    return this
end

-- test
runTest(function()
    print()
    input = torch.range(1,12)
    cprint('Tensting input vector is:')
    print(input)
    local lms = nn.LogSoftMax()
    local expected = lms:forward(input)
    cprint('Expected output is:')
    print(expected)
    local output = logSoftMax().forward(input)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
        
    input = torch.range(1,12):reshape(3,4)
    cprint('Tensting input matrix is:')
    print(input)
    local lms = nn.LogSoftMax()
    local expected = lms:forward(input)
    cprint('Expected output is:')
    print(expected)
    local output = logSoftMax().forward(input)
    cprint('Actual output is:')
    print(output)
    tester:assertTensorEq(expected, output, precision)
end)

--plot
local max = 10
local step = 1
local x = torch.Tensor():range(-max, max, step)
local y = logSoftMax().forward(x)
Plot():line(x, y):title('LogSoftMax'):draw()

Running 1 tests	
_|  ==> unknown
[0;32mTensting input vector is:[0m	
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
[torch.DoubleTensor of dimension 12]

[0;32mExpected output is:[0m	
-11.4588
-10.4588
 -9.4588
 -8.4588
 -7.4588
 -6.4588
 -5.4588
 -4.4588
 -3.4588
 -2.4588
 -1.4588
 -0.4588
[torch.DoubleTensor of dimension 12]

[0;32mActual output is:[0m	
-11.4587
-10.4587
 -9.4587
 -8.4587
 -7.4587
 -6.4587
 -5.4587
 -4.4587
 -3.4587
 -2.4587
 -1.4587
 -0.4587
[torch.DoubleTensor of dimension 12]

[0;32mTensting input matrix is:[0m	


  1   2   3   4
  5   6   7   8
  9  10  11  12
[torch.DoubleTensor of dimension 3x4]

[0;32mExpected output is:[0m	
-3.4402 -2.4402 -1.4402 -0.4402
-3.4402 -2.4402 -1.4402 -0.4402
-3.4402 -2.4402 -1.4402 -0.4402
[torch.DoubleTensor of dimension 3x4]

[0;32mActual output is:[0m	
-3.4402 -2.4402 -1.4402 -0.4402
-3.4402 -2.4402 -1.4402 -0.4402
-3.4402 -2.4402 -1.4402 -0.4402
[torch.DoubleTensor of dimension 3x4]



              _  ==> Done 

Completed 2 asserts in 1 tests with 0 errors	

--------------------------------------------------------------------------------	


##Negative Log-Likelihood

\begin{equation*}
    C = -\ln a^L_y
\end{equation*}

In [14]:
negativeLogLikelihood = function()
    local this = {}
    this.forward = function(input, class)
        return input:log():mul(-1)[class]
    end
    return this
end

-- test

##Sequential Model

In [121]:
sequentialModel = function()
    local this = {}
    this.modules = {}
    -- add a module to the existing chain
    this.add = function(mod)
        table.insert(this.modules, mod)
    end
    -- forward the input through the whole network
    this.forward = function(input, debug)
        local output = input
        print(#this.modules)
        for _,mod in ipairs(this.modules) do
            output = mod.forward(output)
            if debug then print(output) end
        end
        return output
    end
    return this
end

## Fixed-context Language Model

In [124]:
-- network parameters
local embeddingSize = 2
local contextLength = 3
local vocabSize = 5
local hiddenSize = 12
local stdv = 1

-- Index To Embedding
local lookupTable = lookupTable(vocabSize, embeddingSize)
cprint('LookupTable of '..vocabSize..' words, each is a vector of size '..embeddingSize)
print(lookupTable.weight)

-- Concatenate Context
local reshaper = concatenate()

-- Context To Hidden
local contextToHidden = linearModule(contextLength * embeddingSize, hiddenSize)
cprint('Context-To-Hidden matrix is:')
print(contextToHidden.weight)
print(contextToHidden.bias)

-- Hidden To Embedding
local hiddenToEmbedding = linearModule(hiddenSize, embeddingSize)
cprint('Hidden-To-Embedding matrix is:')
print(hiddenToEmbedding.weight)
print(hiddenToEmbedding.bias)

-- Embedding To Vocabulary
local embeddingToVocabulary = linearModule(embeddingSize, vocabSize)
cprint('Embedding-To-Vocabulary matrix is:')
print(embeddingToVocabulary.weight)
print(embeddingToVocabulary.bias)

-- NonLinearity
local nonLinearity = sigmoid()

-- Softmax
local softMax = logSoftMax()

-- Network
local network = sequentialModel()
network.add(lookupTable)
network.add(reshaper)
network.add(contextToHidden)
network.add(nonLinearity)
network.add(hiddenToEmbedding)
network.add(embeddingToVocabulary)
network.add(softMax)

-- Forward
local input = torch.Tensor{1,2,3}
local output = network.forward(input, true)

cprint('Glorious output!')
print(output)

[0;32mLookupTable of 5 words, each is a vector of size 2[0m	
-0.5425  0.0562
 0.4489  0.1809
-0.1688  0.2465
 0.5345  0.0459
 0.5205  0.3131
[torch.DoubleTensor of dimension 5x2]

[0;32mContext-To-Hidden matrix is:[0m	


 0.1766 -0.2071  0.1674 -0.1663 -0.1128  0.1840
-0.2420 -0.1025 -0.0560 -0.2755 -0.1885 -0.1999
 0.1126 -0.1596 -0.0889  0.2640  0.2746  0.0422
 0.0814  0.2498  0.1862  0.0434 -0.2122 -0.0119
 0.2090 -0.1075  0.2441  0.0614 -0.0075 -0.2083
 0.0613  0.2299  0.1529  0.1901 -0.1877 -0.1053
 0.0015  0.2614 -0.0585  0.1878 -0.2042  0.0960
-0.0765 -0.2184 -0.2493 -0.0126 -0.2738  0.0035
-0.2106 -0.1544  0.2674 -0.2425  0.0286  0.1598
 0.2689 -0.2182 -0.0390  0.0544 -0.1086  0.2520
 0.0035  0.2171 -0.0349 -0.1283 -0.2277  0.2029
 0.0813 -0.2885 -0.1639 -0.2785  0.0690  0.0592
[torch.DoubleTensor of dimension 12x6]

 0.0867
-0.0781
-0.2009
-0.0656
-0.2533
 0.0372
 0.1621
-0.1779
-0.0232
-0.1664
-0.2551
-0.0089
[torch.DoubleTensor of dimension 12x1]

[0;32mHidden-To-Embedding matrix is:[0m	
Columns 1 to 10
 0.6998  0.3596 -0.6254  0.1690  0.2758 -0.2866  0.6840 -0.0985 -0.3689 -0.4658
-0.5354  0.3998 -0.2782 -0.3318 -0.5642 -0.4557  0.2718  0.2710 -0.6190  0.1670

Columns 11 to 12
-0.5059 -0.

 0.7024
-0.3834
[torch.DoubleTensor of dimension 2x1]

[0;32mEmbedding-To-Vocabulary matrix is:[0m	
 0.2808 -0.2391
 0.1031 -0.2497
-0.1733 -0.2918
 0.1108  0.1393
 0.0242  0.3113
[torch.DoubleTensor of dimension 5x2]

-0.0661
 0.2006
-0.3303
 0.2613
 0.3458
[torch.DoubleTensor of dimension 5x1]

7	
(1,.,.) = 
 -0.5425  0.0562
  0.4489  0.1809
 -0.1688  0.2465
[torch.DoubleTensor of dimension 1x3x2]

-0.5425
 0.0562
 0.4489
 0.1809
-0.1688
 0.2465
[torch.DoubleTensor of dimension 6x1]

 0.0887
-0.0450
-0.2990
 0.0286
-0.3021
 0.1256
 0.2418
-0.2158
 0.1931
-0.2518
-0.1953
-0.1903
[torch.DoubleTensor of dimension 12x1]

 0.5222
 0.4888
 0.4258
 0.5072
 0.4250
 0.5314
 0.5602
 0.4462
 0.5481
 0.4374
 0.4513
 0.4526
[torch.DoubleTensor of dimension 12x1]

 0.4607
-1.1381
[torch.DoubleTensor of dimension 2x1]



 0.3354
 0.5323
-0.0780
 0.1538
 0.0027
[torch.DoubleTensor of dimension 5x1]

1e-16 *
 -0.0000
 -0.0000
  1.1102
  1.1102
 -0.0000
[torch.DoubleTensor of dimension 5x1]

[0;32mGlorious output![0m	
1e-16 *
 -0.0000
 -0.0000
  1.1102
  1.1102
 -0.0000
[torch.DoubleTensor of dimension 5x1]

