In [1]:
require 'torch'
require 'nn'
require 'image'
require 'optim'

In [2]:
loadcaffe_wrap = require 'loadcaffe_wrapper'
json = require 'json'

In [10]:
cmd = torch.CmdLine()

-- Basic options
cmd:option('-style_dir', 'in/', 'Style input directory')
cmd:option('-tmp_dir', 'tmp/', 'Directory to store vectors on disk')
cmd:option('-gpu', -1, 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1')

-- Other options
cmd:option('-pooling', 'max', 'max|avg')
cmd:option('-proto_file', 'models/VGG_ILSVRC_19_layers_deploy.prototxt')
cmd:option('-model_file', 'models/VGG_ILSVRC_19_layers.caffemodel')

cmd:option('-content_layers', 'relu4_2', 'layers for content')
cmd:option('-style_layers', 'relu1_1,relu1_2,relu2_1,relu2_2,relu3_1,relu3_2,relu3_3,relu3_4,relu4_1,relu4_2,relu4_3,relu4_4,relu5_1,relu5_2,relu5_3,relu5_4', 'layers for style') -- tbh all but relu6 and relu7, which cause size mismatches

In [4]:
-- Preprocess an image before passing it to a Caffe model.
-- We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
-- and subtract the mean pixel. [jcjohnson]
function preprocess(img)
  local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
  local perm = torch.LongTensor{3, 2, 1}
  img = img:index(1, perm):mul(256.0)
  mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
  img:add(-1, mean_pixel)
  return img
end

In [5]:
-- Returns a network that computes the CxC Gram matrix from inputs
-- of size C x H x W – jcjohnson's version
function GramMatrix()
    local net = nn.Sequential()
    net:add(nn.View(-1):setNumInputDims(2))
    local concat = nn.ConcatTable()
    concat:add(nn.Identity())
    concat:add(nn.Identity())
    net:add(concat)
    net:add(nn.MM(false, true))
    return net
end

In [7]:
function Style2Vec(img, cnn, style_layers, params)
    --[[ runs img through cnn, saving the output tensor at each of style_layers

    relu1_1 : FloatTensor - size: 64x64
    relu1_2 : FloatTensor - size: 64x64
    relu2_1 : FloatTensor - size: 128x128
    relu2_2 : FloatTensor - size: 128x128
    relu3_1 : FloatTensor - size: 256x256
    relu3_2 : FloatTensor - size: 256x256
    relu3_3 : FloatTensor - size: 256x256
    relu3_4 : FloatTensor - size: 256x256
    relu4_1 : FloatTensor - size: 512x512
    relu4_2 : FloatTensor - size: 512x512
    relu4_3 : FloatTensor - size: 512x512
    relu4_4 : FloatTensor - size: 512x512
    relu5_1 : FloatTensor - size: 512x512
    relu5_2 : FloatTensor - size: 512x512
    relu5_3 : FloatTensor - size: 512x512
    relu5_4 : FloatTensor - size: 512x512
    
    Returns a Lua table with the above key-value pairs.
    
    --]]
    
    local next_style_idx = 1
    local net = nn.Sequential()
    local style_vec = {}
    
    -- Build up net from cnn
    
    for i = 1, #cnn do
        if next_style_idx <= #style_layers then
            local layer = cnn:get(i)
            local layer_name = layer.name
            local layer_type = torch.type(layer)
            local is_pooling = (layer_type == 'cudnn.SpatialMaxPooling' or layer_type == 'nn.SpatialMaxPooling')
            
            -- add layers to net from cnn, replacing max-pooling if necessary [jcjohnson]
            if is_pooling and params.pooling == 'avg' then
                local msg = 'Replacing max pooling at layer %d with average pooling'
                print(string.format(msg, i))
                assert(layer.padW == 0 and layer.padH == 0)
                -- kWxkH regions by step size dWxdH
                local kW, kH = layer.kW, layer.kH
                local dW, dH = layer.dW, layer.dH
                local avg_pool_layer = nn.SpatialAveragePooling(kW, kH, dW, dH):float()
                if params.gpu >= 0 then avg_pool_layer:cuda() end
                net:add(avg_pool_layer)
            else
                net:add(layer)
            end
            
            -- now to grab style layers
            
            if (layer_name == style_layers[next_style_idx]) then
                    
                local gram = GramMatrix():float()
                if params.gpu >= 0 then gram = gram:cuda() end
                local target_features = net:forward(img)
                local target_i = gram:forward(target_features)
                
                target_i:div(target_features:nElement())

                style_vec[layer_name] = target_i
                -- itorch.image(target_i) -- YA THIS IS THE VECTOR!!!
                
                maybe_save(params)
                
                next_style_idx = next_style_idx + 1
            end
        end
    end
        
    return style_vec
end

In [23]:
function maybe_save(params)
    local tmp = params.tmp_dir
    assert(paths.dir(tmp), 'Could not access ./' .. tmp)
    
    for f in paths.files(tmp) do
        print(f)
    end
    
    print(params.label)
    
    torch.save(params.tmp_dir .. 'params.json')
end

params.label = 'x'
maybe_save(params)




[string "function maybe_save(params)..."]:3: Could not access ./tmp/
stack traceback:
	[C]: in function 'assert'
	[string "function maybe_save(params)..."]:3: in function 'maybe_save'
	[string "function maybe_save(params)..."]:15: in main chunk
	[C]: in function 'xpcall'
	./itorch/main.lua:179: in function <./itorch/main.lua:143>
	/Users/razi/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	/Users/razi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	/Users/razi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	/Users/razi/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	./itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	[string "arg={'/Users/razi/.ipython/profile_default/se..."]:1: in main chunk: 

In [11]:
-- let's get started

local arg = {} -- when running from cli, this will be defined
params = cmd:parse(arg)
print(params)

{
  gpu : -1
  pooling : max
  content_layers : relu4_2
  style_dir : in/
  style_layers : relu1_1,relu1_2,relu2_1,relu2_2,relu3_1,relu3_2,relu3_3,relu3_4,relu4_1,relu4_2,relu4_3,relu4_4,relu5_1,relu5_2,relu5_3,relu5_4
  tmp_dir : tmp/
  proto_file : models/VGG_ILSVRC_19_layers_deploy.prototxt
  model_file : models/VGG_ILSVRC_19_layers.caffemodel
}


In [9]:
-- load caffe network image
cnn = loadcaffe_wrap.load(params.proto_file, params.model_file, params.backend):float()

if params.gpu >= 0 then
    cnn:cuda()
end

Successfully loaded models/VGG_ILSVRC_19_layers.caffemodel


conv1_1: 64 3 3 3
conv1_2: 64 64 3 3
conv2_1: 128 64 3 3


conv2_2: 128 128 3 3
conv3_1: 256 128 3 3


conv3_2: 256 256 3 3


conv3_3: 256 256 3 3


conv3_4: 256 256 3 3


conv4_1: 512 256 3 3


conv4_2: 512 512 3 3


conv4_3: 512 512 3 3


conv4_4: 512 512 3 3


conv5_1: 512 512 3 3


conv5_2: 512 512 3 3


conv5_3: 512 512 3 3


conv5_4: 512 512 3 3


fc6: 1 1 25088 4096


fc7: 1 1 4096 4096


fc8: 1 1 4096 1000


In [10]:
-- load images

style_images = {}

for f in paths.iterfiles(params.style_dir) do
    local img = image.load(params.style_dir .. f)
    img = preprocess(img):float()
    if params.gpu >= 0 then 
        img = img:cuda()
    end
    
    -- itorch.image(img)
    style_images[f] = img
end

print(style_images)

{
  haring_color.jpg : FloatTensor - size: 3x474x475
  haring_bw.jpg : FloatTensor - size: 3x450x338
}


In [11]:
-- Run Style2Vec

style_vecs = {}
-- grab our params (all relu layers except 6 and 7)
local style_layers = params.style_layers:split(',')

for label, image in pairs(style_images) do
    io.write('Grabbing style from ' .. label .. '...')
    params.label = label
    local vec = Style2Vec(image, cnn, style_layers, params)
    style_vecs[label] = vec
    io.write(' Done!\n')
end

-- the code above gives us style_vecs["haring_bw.jpg"]["relu2_1 "] = torch.FloatTensor
-- print(style_vecs)

Grabbing style from haring_color.jpg...


 64
 64
[torch.LongStorage of size 2]


 224676
[torch.LongStorage of size 1]




 64
 64
[torch.LongStorage of size 2]


 224676
[torch.LongStorage of size 1]


 128
 128
[torch.LongStorage of size 2]


 56169
[torch.LongStorage of size 1]




 128
 128
[torch.LongStorage of size 2]


 56169
[torch.LongStorage of size 1]




 256
 256
[torch.LongStorage of size 2]


 14161
[torch.LongStorage of size 1]


 256
 256
[torch.LongStorage of size 2]


 14161
[torch.LongStorage of size 1]


 256
 256
[torch.LongStorage of size 2]


 14161
[torch.LongStorage of size 1]


 256
 256
[torch.LongStorage of size 2]


 14161
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 3600
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 3600
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 3600
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 3600
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 900
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 900
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 900
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 900
[torch.LongStorage of size 1]

 Done!
Grabbing style from haring_bw.jpg...
 64
 64
[torch.LongStorage of size 2]


 202500
[torch.LongStorage of size 1]




 64
 64
[torch.LongStorage of size 2]


 202500
[torch.LongStorage of size 1]




 128
 128
[torch.LongStorage of size 2]




 50625
[torch.LongStorage of size 1]




 128
 128
[torch.LongStorage of size 2]


 50625
[torch.LongStorage of size 1]




 256
 256
[torch.LongStorage of size 2]


 12769
[torch.LongStorage of size 1]




 256
 256
[torch.LongStorage of size 2]


 12769
[torch.LongStorage of size 1]




 256
 256
[torch.LongStorage of size 2]


 12769
[torch.LongStorage of size 1]


 256
 256
[torch.LongStorage of size 2]


 12769
[torch.LongStorage of size 1]


 512
 512
[torch.LongStorage of size 2]


 3249
[torch.LongStorage of size 1]


 512
 512
[torch.LongStorage of size 2]


 3249
[torch.LongStorage of size 1]


 512
 512
[torch.LongStorage of size 2]


 3249
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 3249
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 841
[torch.LongStorage of size 1]


 512
 512
[torch.LongStorage of size 2]


 841
[torch.LongStorage of size 1]


 512
 512
[torch.LongStorage of size 2]


 841
[torch.LongStorage of size 1]




 512
 512
[torch.LongStorage of size 2]


 841
[torch.LongStorage of size 1]



 Done!


In [None]:
-- clean up a little
cnn = nil
style_images = nil
collectgarbage()

In [110]:
function CosineSimilarity(x, y)
    local net = nn.Sequential()
    net:add(nn.CosineDistance())
    return net:forward({x, y})
end

function StyleDistance(x, y, sorted_layers)
    -- this function will return the distance from each layer, assuming x and y
    -- x["relu2_1 "] = torch.FloatTensor
    
    for _, i in ipairs(sorted_layers) do -- can you tell I'm new to Lua?
        local distance_vector = CosineSimilarity(x[i]:double(), y[i]:double())
        local avg_distance = torch.mean(distance_vector)
        
        local msg ='Distance at layer %s is: %f'
        print(string.format(msg, i, avg_distance))
    end
    
end
    
-- this is a little embarassing, no?
local labels = params.style_layers:split(',')
table.sort(labels)

StyleDistance(style_vecs['haring_bw.jpg'], style_vecs['haring_bw.jpg'], labels)
-- x = torch.Tensor({1, 2, 3})
-- y = torch.Tensor({4, 5, 6})
-- print(CosineSimilarity(x, y))

Distance at layer relu1_1 is: 1.000000	


Distance at layer relu1_2 is: 1.000000	


Distance at layer relu2_1 is: 0.984375	


Distance at layer relu2_2 is: 1.000000	


Distance at layer relu3_1 is: 0.996094	


Distance at layer relu3_2 is: 1.000000	


Distance at layer relu3_3 is: 1.000000	


Distance at layer relu3_4 is: 0.996094	


Distance at layer relu4_1 is: 0.968750	


Distance at layer relu4_2 is: 0.953125	


Distance at layer relu4_3 is: 0.984375	


Distance at layer relu4_4 is: 0.972656	


Distance at layer relu5_1 is: 0.984375	


Distance at layer relu5_2 is: 0.958984	


Distance at layer relu5_3 is: 0.925781	


Distance at layer relu5_4 is: 0.642368	
