In [None]:
local torch = require 'torch'
require 'hdf5'
require 'nn'
require 'cunn';
require 'cutorch';
-- require 'cudnn';
require 'paths'
require 'xlua'    -- xlua provides useful tools, like progress bars
require 'optim'   -- an optimization package, for online and batch methods
require 'image'
color = require 'trepl.colorize'

base_path = '/media/wei/DATA/datasets/vlm/'
model_path = '/home/wei/Dropbox/research/gesture & sl & action/my_hand_detector/models/'

dataset_size = 'tiny'

img_size = {}
img_size['H'] = 58
img_size['W'] = 58

----test nn:
--m = nn.SpatialConvolution(1,3,2,2) -- learn 3 2x2 kernels
--print(m.weight) -- initially, the weights are randomly initialized

---- test cuda:
--a = torch.Tensor(5,3) -- construct a 5x3 matrix, uninitialized
--b = torch.rand(3,4)
--c = torch.Tensor(5,4)
--c:mm(a,b) -- store the result of a*b in c
--a = a:cuda()
--b = b:cuda()
--c = c:cuda()
--c:mm(a,b) -- done on GPU

---- test with table
--classes = {key1='Hand', key2='Non-hand'}
--print(classes.key1)
--print(classes.key2)

classes = {}
classes[1] = 'Non-hand'
classes[2] = 'Hand'
itorch.image({image.lena(), image.lena(), image.lena()})
print('Classes: 1-' .. classes[1] .. ' 2-' .. classes[2])

In [None]:
--[[ Test on original hdf5 dataset:
--loading hdf5 data of training samples
train_file = hdf5.open(base_path .. 'subgestures/ASL_hand_train_tiny.hdf5', 'r')
X_train = train_file:read('X_train'):all()
y_train = train_file:read('y_train'):all()
train_file:close()
--loading hdf5 data of test examples
test_file = hdf5.open(base_path .. 'subgestures/ASL_hand_test.hdf5', 'r')
X_test = test_file:read('X_test'):all()
y_test = test_file:read('y_test'):all()
test_file:close()

N_train = (#X_train)[1]
N_test = (#X_test)[1]
print('Number of training samples:' .. tostring(N_train))
print('Number of test samples:' .. tostring(N_test))
---- let's display an image first: (only works with qlua or itorch)
itorch.image(X_train[100]) -- display the 100-th image in dataset
print(classes[y_train[100]+1])
]]

trainset = torch.load(base_path .. 'subgestures/' .. 'ASL_torch_hand_train_' .. dataset_size .. '.t7')
print('Number of training samples:' .. trainset.data:size(1))

In [None]:
---- let's display an image first: (only works with qlua or itorch)
itorch.image(trainset.data[1])
print(classes[trainset.label[1][1]])
itorch.image(trainset.data[25000])
print(classes[trainset.label[25000][1]])

In [None]:
trainset.data = trainset.data:float() -- convert the data from a ByteTensor to a FloatTensor.
setmetatable(trainset, 
    {__index = function(t, i) 
                    return {t.data[i], t.label[i]} 
                end}
);
mean = {} -- store the mean, to normalize the test set in the future
stdv  = {} -- store the standard-deviation for the future
for i=1,3 do -- over each image channel
    mean[i] = trainset.data[{ {}, {i}, {}, {}  }]:mean() -- mean estimation
    print('Channel ' .. i .. ', Mean: ' .. mean[i])
    trainset.data[{ {}, {i}, {}, {}  }]:add(-mean[i]) -- mean subtraction
    
    stdv[i] = trainset.data[{ {}, {i}, {}, {}  }]:std() -- std estimation
    print('Channel ' .. i .. ', Standard Deviation: ' .. stdv[i])
    trainset.data[{ {}, {i}, {}, {}  }]:div(stdv[i]) -- std scaling
end

In [None]:
--[[ A simple Lenet and Stochastic gradient trainer
model:add(nn.SpatialConvolution(3, 6, 5, 5, 1, 1, 2, 2)) 
model:add(nn.ReLU())                
model:add(nn.SpatialMaxPooling(2,2,2,2))    
model:add(nn.SpatialConvolution(6, 16, 5, 5, 1, 1, 2, 2))
model:add(nn.ReLU())                      
model:add(nn.SpatialMaxPooling(2,2,2,2))
model:add(nn.View(16*14*14))                   
model:add(nn.Linear(16*14*14, 120))            
model:add(nn.ReLU())                     
model:add(nn.Linear(120, 84))
model:add(nn.ReLU())                     
model:add(nn.Linear(84, 2))              
model:add(nn.LogSoftMax())              

print('Lemodel\n' .. model:__tostring());

criterion = nn.ClassNLLCriterion()

criterion = criterion:cuda()
trainset.data = trainset.data:cuda()
trainset.label = trainset.label:cuda()

trainer = nn.StochasticGradient(model, criterion)
trainer.learningRate = 0.01
trainer.learningRateDecay = 0.95
trainer.maxIteration = 25 

trainer:train(trainset)
]]

In [None]:
opt = {}
opt['save'] = 'Logs'
opt['batchSize'] = 250
opt['learningRateDecay'] = 0.95
opt['learningRate'] = 0.003
opt['weightDecay'] = 0.0001
opt['momentum'] = 0.9
opt['model'] = 'customized_model'
opt['epoch_step'] = 25
opt['max_epoch'] = 300
opt['backend'] = 'nn'

print(color.blue '==>' ..' configuring model')
model = nn.Sequential()
-- Copy : add a copy of the input with type casting;
model:add(nn.Copy('torch.FloatTensor','torch.CudaTensor'):cuda())
model:add(dofile('models/'..opt.model..'.lua'):cuda())
-- do not update gradients of Copy layer
model:get(1).updateGradInput = function(input) return end 

if opt.backend == 'cudnn' then
   require 'cudnn'
   -- convert ReLU to cudnn
   cudnn.convert(model:get(2), cudnn) 
end


print('Customized model\n' .. model:__tostring());
--print(model)

parameters,gradParameters = model:getParameters()


In [None]:
confusion = optim.ConfusionMatrix(2)
print('Will save at '..opt.save)
paths.mkdir(opt.save)
testLogger = optim.Logger(paths.concat(opt.save, 'test.log'))
testLogger:setNames{'% mean class accuracy (train set)', '% mean class accuracy (val set)'}
testLogger.showPlot = true

In [None]:
print(color.blue'==>' ..' setting criterion')
criterion = nn.CrossEntropyCriterion():cuda()

print(color.blue'==>' ..' configuring optimizer')
optimState = {
  learningRate = opt.learningRate,
  weightDecay = opt.weightDecay,
  momentum = opt.momentum,
  learningRateDecay = opt.learningRateDecay,
}
print(optimState)

In [None]:
function train()
  -- sets the mode of the Module (or sub-modules) to train=true
  model:training()
  epoch = epoch or 1

  -- drop learning rate every "epoch_step" epochs
  if epoch % opt.epoch_step == 0 then optimState.learningRate = optimState.learningRate/2 end
  
  print(color.blue '==>'.." online epoch # " .. epoch .. ' [batchSize = ' .. opt.batchSize .. ']')

  local targets = torch.CudaTensor(opt.batchSize)
  local indices = torch.randperm(trainset.data:size(1)):long():split(opt.batchSize)
  -- remove last element so that all the batches have equal size
  indices[#indices] = nil

  local tic = torch.tic()
  for t,v in ipairs(indices) do
    xlua.progress(t, #indices)

    local inputs = trainset.data:index(1,v)
    targets:copy(trainset.label:index(1,v))

    local feval = function(x)
      if x ~= parameters then parameters:copy(x) end
      gradParameters:zero()
      
      local outputs = model:forward(inputs)
      local f = criterion:forward(outputs, targets)
      local df_do = criterion:backward(outputs, targets)
      model:backward(inputs, df_do)

      confusion:batchAdd(outputs, targets)

      return f,gradParameters
    end
    optim.sgd(feval, parameters, optimState)
  end

  confusion:updateValids()
  print(('Train accuracy: '..color.cyan'%.2f'..' %%\t time: %.2f s'):format(
        confusion.totalValid * 100, torch.toc(tic)))

  train_acc = confusion.totalValid * 100

  confusion:zero()
  epoch = epoch + 1
end

In [None]:
function test()
  -- disable flips, dropouts and batch normalization
  -- sets the mode of the Module (or sub-modules) to train=false
  model:evaluate()

  print(color.blue '==>'.." testing")
    
  local bs = 125
  for i=1,valset.data:size(1),bs do
    local outputs = model:forward(valset.data:narrow(1,i,bs))
    confusion:batchAdd(outputs, valset.label:narrow(1,i,bs))
  end

  confusion:updateValids()
  print('Test accuracy:', confusion.totalValid * 100)
  
  if testLogger then
    paths.mkdir(opt.save)
    testLogger:add{train_acc, confusion.totalValid * 100}
    testLogger:style{'-','-'}
    testLogger:plot()

    local base64im
    do
      os.execute(('convert -density 200 %s/test.log.eps %s/test.png'):format(opt.save,opt.save))
      os.execute(('openssl base64 -in %s/test.png -out %s/test.base64'):format(opt.save,opt.save))
      local f = io.open(opt.save..'/test.base64')
      if f then base64im = f:read'*all' end
    end

    local file = io.open(opt.save..'/report.html','w')
    file:write(([[
    <!DOCTYPE html>
    <html>
    <body>
    <title>%s - %s</title>
    <img src="data:image/png;base64,%s">
    <h4>optimState:</h4>
    <table>
    ]]):format(opt.save,epoch,base64im))
    for k,v in pairs(optimState) do
      if torch.type(v) == 'number' then
        file:write('<tr><td>'..k..'</td><td>'..v..'</td></tr>\n')
      end
    end
    file:write'</table><pre>\n'
    file:write(tostring(confusion)..'\n')
    file:write(tostring(model)..'\n')
    file:write'</pre></body></html>'
    file:close()
  end

  -- save model every 50 epochs
  if epoch % 50 == 0 then
    local filename = paths.concat(opt.save, 'model.net')
    print('==> saving model to '..filename)
    torch.save(filename, model:get(3):clearState())
  end

  confusion:zero()
end

In [None]:
valset = torch.load(base_path .. 'subgestures/' .. 'ASL_torch_hand_val.t7')
valset.data = valset.data:float() -- convert the data from a ByteTensor to a FloatTensor.
-- print the mean and standard-deviation of example-100
sample = valset.data[100]
print(sample:mean(), sample:std())
print(classes[valset.label[100][1]]) -- tensor element must be accessed by with index [1] in this case
itorch.image(valset.data[100])

In [None]:
-- normalize val samples
for i=1,3 do -- over each image channel
    valset.data[{ {}, {i}, {}, {}  }]:add(-mean[i]) -- mean subtraction    
    valset.data[{ {}, {i}, {}, {}  }]:div(stdv[i]) -- std scaling
end
valset.data = valset.data:cuda() -- or error occurs: (cannot convert 'struct THCudaTensor *' to 'struct THDoubleTensor *')
-- valset.label = valset.label:cuda() -- shouldn't be converted into cuda tensor

In [None]:
for i=1,opt.max_epoch do
  train()
  test()
end

In [None]:
predicted = model:forward(valset.data[100])
-- the output of the modelwork is Log-Probabilities. To convert them to probabilities, you have to take e^x 
print(predicted:exp())
--To make it clearer, let us tag each probability with it's class-name:
for i=1,predicted:size(1) do
    print(classes[i] .. ' ' .. predicted[i])
end

In [None]:
N_val = valset:size()

N_val_pos = 0
N_val_neg = 0
for i=1,N_val do
  if valset.label[i][1] == 1 then
    N_val_neg = N_val_neg + 1
  else
    N_val_pos = N_val_pos + 1
  end
end
print('N_val_neg: ' .. N_val_neg)
print('N_val_pos: ' .. N_val_pos)

correct = 0
for i=1,N_val do
    ---- disp progress
    --xlua.progress(i, valset:size())
    
    local groundtruth = valset.label[i][1]
    local prediction = model:forward(valset.data[i])
    local confidences, indices = torch.sort(prediction, true)  -- true means sort in descending order
    if groundtruth == indices[1] then
        correct = correct + 1
    end
end
print('Accuracy: ', 100*correct/N_val .. '% ')

class_performance = {0, 0}
for i=1,N_val do
    local groundtruth = valset.label[i][1]
    local prediction = model:forward(valset.data[i])
    local confidences, indices = torch.sort(prediction, true)  -- true means sort in descending order
    if groundtruth == indices[1] then
        class_performance[groundtruth] = class_performance[groundtruth] + 1
    end
end
print('Specificity: ', class_performance[1] / N_val_neg .. '%')
print('Sensitivity: ', class_performance[2] / N_val_pos .. '%')

In [None]:
-- space collection
for k,v in pairs(trainset) do trainset[k]=nil end
for k,v in pairs(valset) do valset[k]=nil end

In [None]:
-- Evaluate on test set
testset = torch.load(base_path .. 'subgestures/' .. 'ASL_torch_hand_test.t7')
testset.data = testset.data:float() -- convert the data from a ByteTensor to a FloatTensor.
-- normalize test samples
for i=1,3 do -- over each image channel
    testset.data[{ {}, {i}, {}, {}  }]:add(-mean[i]) -- mean subtraction    
    testset.data[{ {}, {i}, {}, {}  }]:div(stdv[i]) -- std scaling
end
testset.data = testset.data:cuda() -- or error occurs: (cannot convert 'struct THCudaTensor *' to 'struct THDoubleTensor *')
N_test = testset:size()
N_test_pos = 0
N_test_neg = 0
for i=1,N_test do
  if testset.label[i][1] == 1 then
    N_test_neg = N_test_neg + 1
  else
    N_test_pos = N_test_pos + 1
  end
end
print('N_test_neg: ' .. N_test_neg)
print('N_test_pos: ' .. N_test_pos)

correct = 0
for i=1,N_test do
    local groundtruth = testset.label[i][1]
    local prediction = model:forward(testset.data[i])
    local confidences, indices = torch.sort(prediction, true)  -- true means sort in descending order
    if groundtruth == indices[1] then
        correct = correct + 1
    end
end
print('Accuracy: ', 100*correct/N_test .. '% ')

class_performance = {0, 0}
for i=1,N_test do
    local groundtruth = testset.label[i][1]
    local prediction = model:forward(testset.data[i])
    local confidences, indices = torch.sort(prediction, true)  -- true means sort in descending order
    if groundtruth == indices[1] then
        class_performance[groundtruth] = class_performance[groundtruth] + 1
    end
end
print('Specificity: ', class_performance[1] / N_test_neg .. '%')
print('Sensitivity: ', class_performance[2] / N_test_pos.. '%')

In [None]:
classifier = model
model_out_path = model_path .. 'ASL_hand_classifier_' .. dataset_size .. '.t7'
print('Writing model to file ' .. model_out_path)
torch.save(model_out_path, classifier)