# Highway Neural Networks 

Highway Neural Networks have been introduced by __Rupesh Kumar, Klaus Greff, Jurgen Schmidhuber __ in [this paper](https://arxiv.org/pdf/1505.00387v2.pdf). 

The goal of this paper was to be able to train Deep Neural Networks easily since the gradient tended to be vanishing by preventing the 

In [2]:
require 'nn';
require 'nngraph'

# Loading data 

In [1]:
require 'david_tools'

{
  mnist_pipeline : function: 0x072bc070
  mnist_munging : function: 0x06fbd290
  train : 
    {
      data : ByteTensor - size: 60000x28x28
      size : 60000
      label : ByteTensor - size: 60000
    }
  convert_to_tensor : function: 0x0751d2f0
  classify_training_examples : function: 0x0700f188
  reshaped_data : ByteTensor - size: 60000x784
  get_target_classes : function: 0x0751ed48
}


In [4]:
training_set = mnist_pipeline(1,2)

times : 	
{
  reshaping : 1.0438530445099
}


times : 	
{
  classifying : 0.1857168674469
}
# of example of class 1 : 6742	
# of example of class 2 : 5958	


times : 	
{
  look_up_building : 0.014348983764648
  global : 0.014357089996338
  shuffling : 0.12929487228394
}


In [None]:
-- Reshaping the data
model = nn.Linear(training_set.data:size()[1],50)

In [None]:
net = nn.Sequential()
net:add(nn.SpatialConvolution(1, 6, 5, 5)) -- 1 input image channel, 6 output channels, 5x5 convolution kernel
net:add(nn.ReLU())                       -- non-linearity 
net:add(nn.SpatialMaxPooling(2,2,2,2))     -- A max-pooling operation that looks at 2x2 windows and finds the max.
net:add(nn.SpatialConvolution(6, 16, 5, 5))
net:add(nn.ReLU())                       -- non-linearity 
net:add(nn.SpatialMaxPooling(2,2,2,2))
net:add(nn.View(16*5*5))                    -- reshapes from a 3D tensor of 16x5x5 into 1D tensor of 16*5*5
net:add(nn.Linear(16*5*5, 120))             -- fully connected layer (matrix multiplication between input and weights)
net:add(nn.ReLU())                       -- non-linearity 
net:add(nn.Linear(120, 84))
net:add(nn.ReLU())                       -- non-linearity 
net:add(nn.Linear(84, 15))                   -- 15 is the number of outputs of the network (in this case, 15 digits)
net:add(nn.LogSoftMax())                     -- converts the output to a log-probability. Useful for classification problems

In [None]:
-- linear units

model = nn.Sequential()
model:add()

In [8]:
x1 = nn.Identity()()
x2 = nn.Identity()()

In [9]:
a = nn.CAddTable()({x1, x2})
m = nn.gModule({x1, x2}, {a})

In [10]:
a

nngraph.Node
{
  data : 
    {
      annotations : 
        {
          _debugLabel : [[string "a = nn.CAddTable()({x1, x2})..."]]:1_
        }
      module : 
        nn.CAddTable
        {
          gradInput : table: 0x56589718
          _type : torch.DoubleTensor
          output : DoubleTensor - empty
        }
      reverseMap : 
        {
          nngraph.Node : true
        }
      forwardNodeId : 1
      mapindex : 
        {
          1 : 
            {
              annotations : 
                {
                  _debugLabel : [[string "x1 = nn.Identity()()..."]]:1_
                }
              module : 
                nn.Identity
                {
                  gradInput : DoubleTensor - empty
                  _type : torch.DoubleTensor
                  output : DoubleTensor - empty
                }
              reverseMap : 
                {
                  nngraph.Node : true
                }
              forwardNodeId : 3
              mapindex : 
  

              {
                  _debugLabel : [[string "x1 = nn.Identity()()..."]]:2_
                }
              module : 
                nn.Identity
                {
                  gradInput : DoubleTensor - empty
                  _type : torch.DoubleTensor
                  output : DoubleTensor - empty
                }
              reverseMap : 
                {
                  nngraph.Node : true
                }
              forwardNodeId : 4
              mapindex : 
                {
                  1 : table: 0x5658b2d8
                  table: 0x5658b2d8 : 1
                }
            }
          table: 0x56586b88 : 2
          table: 0x56586300 : 1
        }
    }
  visited : false
  id : 2
  marked : false
  children : 
    {
      1 : 
        nngraph.Node
        {
          data : 
            {
              annotations : 
                {
                  _debugLabel : [[string "x1 = nn.Identity()()..."]]:1_
                }
              mod

        {
                  1 : table: 0x5658b2d8
                  table: 0x5658b2d8 : 1
                }
            }
          visited : false
          id : 4
          marked : false
          children : 
            {
              1 : 
                nngraph.Node
                {
                  data : table: 0x5658b2d8
                  visited : false
                  id : 6
                  marked : false
                  children : table: 0x5658b7e8
                }
              nngraph.Node : 1
            }
        }
      nngraph.Node : 1
      nngraph.Node : 2
    }
}
