# C-SWM Baseline Model

I have prepared this notebook to demonstrate data loading, batching, testing operations. I will compile them at a seperate utils.jl file.

In [1]:
using HDF5
using Knet
using Statistics: mean
using Random

In [2]:
atype = KnetArray{Float32}

SAVE_FOLDER = "./checkpoints"
NUM_STEPS = 1
TRAIN_DATASET_PATH = "/home/cagan/dev/datasets/C-SWM/shapes_train.h5"
BATCH_SIZE = 100
SEED = 0
NUM_OBJECTS = 5

5

In [3]:
function loadh5file(DATASET_PATH)
    f_e = h5open(DATASET_PATH,"r")
    dict = read(f_e)
    close(f_e)
    return dict
end

loadh5file (generic function with 1 method)

In [4]:
struct StateTransitionDataset
    """Create dataset of (o_t, a_t, o_{t+1}) transitions from replay buffer."""
   
    experience_buffer;
    # Build table for conversion between linear idx -> episode/step idx
    idx2episode;
    
    #Container to hold total number of steps
    num_steps;
    
    #Read array
    batch_idxs;
    
end

In [5]:
function buildDataset(DATASET_PATH, d_shuffle)
    
    experience_buffer = loadh5file(DATASET_PATH)   
    step = 0
    
    println("Dataset loaded. Building dataset indexing.")
    
    idx2episode = []
    
    
    for ep in 1:length(experience_buffer)
        
        ep_key = string(ep-1)
        num_steps = length(experience_buffer[ep_key]["action"])
        
        for i in 1:num_steps
           
            push!(idx2episode,(ep_key,i))
            
        end 
        
        step += num_steps
        
    end
         
    batch_idxs = collect(1:step)
    
    if d_shuffle
        batch_idxs = shuffle(batch_idxs)
    end
        
    
    return  StateTransitionDataset(experience_buffer,idx2episode,step, batch_idxs)
    
end

buildDataset (generic function with 1 method)

In [6]:
dtrn = buildDataset(TRAIN_DATASET_PATH,true);

Dataset loaded. Building dataset indexing.


In [7]:
TOTAL_SET_SIZE = dtrn.num_steps
net_threshold = TOTAL_SET_SIZE ÷ BATCH_SIZE

1000

In [8]:
function getitem(s,idx)
    
    ep_key, step = s.idx2episode[idx]
    obs = s.experience_buffer[ep_key]["obs"][:,:,:,step]
    action = s.experience_buffer[ep_key]["action"][step]
    next_obs = s.experience_buffer[ep_key]["next_obs"][:,:,:,step]
    
    return obs,action,next_obs
    
end

(s::StateTransitionDataset)(idx) = getitem(s,idx)

In [9]:
function prepareBatch(s,idx_1, idx_2)
    """Lazy loader to GPU."""
    
    minibatch = s.batch_idxs[idx_1:idx_2]
    minibatch_batch_size = size(minibatch,1)
    
    #Read
    b_obs = zeros(minibatch_batch_size,3,50,50)
    b_next_obs = zeros(minibatch_batch_size,3,50,50)
    b_action = zeros(minibatch_batch_size)
    
    for i in 1:length(minibatch_batch_size)
        
        idx = minibatch[i]
        obs, action, next_obs = s(idx) 
        
        #Insert obs
        obs = permutedims(obs, [3, 1, 2])
        b_obs[i,:,:,:] = reshape(obs, (1,size(obs)...))
        
        #Assign action
        b_action[i] = action
        
        #Insert next_obs
        next_obs = permutedims(next_obs, [3, 1, 2])
        b_next_obs[i,:,:,:] = reshape(next_obs, (1,size(obs)...))
        
    end
    
    return atype(b_obs), b_action, atype(b_next_obs)
    
end

prepareBatch (generic function with 1 method)

In [10]:
function batchIdxHelper(batch_idx)

    net_threshold = TOTAL_SET_SIZE ÷ BATCH_SIZE
    remainder = TOTAL_SET_SIZE % BATCH_SIZE
    
    idx_1 = -1
    idx_2 = -1
    
    if batch_idx <= net_threshold
        
        idx_1 = 1 + BATCH_SIZE*(batch_idx-1)
        idx_2 = BATCH_SIZE*batch_idx
        
    elseif batch_idx == net_threshold
        
        idx_1 = 1 + BATCH_SIZE*(batch_idx-1)
        idx_2 = TOTAL_SET_SIZE
        
    end

    return idx_1, idx_2
    
end

batchIdxHelper (generic function with 1 method)

In [11]:
idx_1, idx_2 = batchIdxHelper(1000)

(99901, 100000)

In [12]:
obs, b_action, next_obs = prepareBatch(dtrn,idx_1,idx_2);

In [13]:
println(size(obs))
println(size(b_action))
println(size(next_obs))

(100, 3, 50, 50)
(100,)
(100, 3, 50, 50)


In [14]:
function getBatch(dataset,idx)
    
    idx_1, idx_2 = batchIdxHelper(idx)
    
    if idx_1 == -1 && idx_2 == -1
    
        println("Invalid batch index") 
        return
        
    end
    
    obs, action, next_obs = prepareBatch(dataset,idx_1,idx_2)
    
    return obs, action, next_obs
end

getBatch (generic function with 1 method)

In [15]:
obs, action, next_obs = getBatch(dtrn,1)

(K32(100,3,50,50)[0.0⋯], [11.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], K32(100,3,50,50)[0.0⋯])

In [16]:
println(size(obs))
println(size(b_action))
println(size(next_obs))

(100, 3, 50, 50)
(100,)
(100, 3, 50, 50)


# ConvNet

I have get a ConvNet from examples as a baseline.

In [23]:
frame_diff = next_obs - obs;

In [25]:
# define model
function initmodel(; atype=KnetArray, dtype=Float32, winit=xavier, binit=zeros)
    w(dims...)=atype(winit(dtype,dims...))
    b(dims...)=atype(binit(dtype,dims...))
    return Any[
        w(3,3,3,50), b(1,1,50,1),
        w(3,3,50,50), b(1,1,50,1),
        w(3,3,50,100), b(1,1,100,1),
        w(3,3,100,100), b(1,1,100,1),
        w(512,6400), b(512,1),
        w(20,512), b(20,1)
    ]
end;

In [26]:
# define loss and its gradient
function predict(w,x; pdrop=(0,0))
    convbias(x,w,b) = conv4(w,x;padding=1) .+ b
    fc(x,w,b) = w * mat(x) .+ b;
    x = relu.(convbias(x,w[1],w[2]))
    x = relu.(pool(convbias(x,w[3],w[4])))
    x = dropout(x,pdrop[1])
    x = relu.(convbias(x,w[5],w[6]))
    x = relu.(pool(convbias(x,w[7],w[8])))
    x = dropout(x,pdrop[1])
    x = relu.(fc(x,w[9],w[10]))
    x = dropout(x,pdrop[2])
    return fc(x,w[11],w[12])
end

loss(w,x,y;o...)=nll(predict(w,x;o...),y) # nll: negative log likelihood
lossgradient = grad(loss);

In [27]:
# prepare for training
model = optim = nothing; Knet.gc() # Clear memory from last run
model = initmodel()

12-element Array{Any,1}:
 K32(3,3,3,50)[0.08358852⋯]     
 K32(1,1,50,1)[0.0⋯]            
 K32(3,3,50,50)[-0.05785168⋯]   
 K32(1,1,50,1)[0.0⋯]            
 K32(3,3,50,100)[-0.05171234⋯]  
 K32(1,1,100,1)[0.0⋯]           
 K32(3,3,100,100)[-0.035115782⋯]
 K32(1,1,100,1)[0.0⋯]           
 K32(512,6400)[0.0017206036⋯]   
 K32(512,1)[0.0⋯]               
 K32(20,512)[-0.051471993⋯]     
 K32(20,1)[0.0⋯]                

In [34]:
function toOneHot(action_dim, idx)
   
    vec = zeros(action_dim)
    
    vec[idx] = 1.0
    
    return atype(vec)
    
end

toOneHot (generic function with 1 method)

In [36]:
toOneHot(20,3)

20-element KnetArray{Float32,1}:
 0.0
 0.0
 1.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0