In [3]:
import pickle
import numpy as np
import tensorflow as tf

In [4]:
with open('hw2_q2.pkl', 'rb') as f:
    data = pickle.load(f)

In [8]:
data_trn, data_val = data['train'], data['test']
print(data_trn.shape, data_val.shape)

(20000, 32, 32, 3) (6838, 32, 32, 3)


In [10]:
def resnet(layer_in, scope, num_channels, output_dim, num_filters=256, num_blocks=8):
    
#     TODO: BatchNorm & WeightNormalization

    with tf.variable_scope(scope):
        h = conv2d(layer_in, scope="conv2d", kernel=(3, 3), stride=(1, 1), 
                   in_channels=num_channels, out_channels=num_filters)
        for idx in range(num_blocks):
            _h = conv2d(h, scope="conv2d_"+str(idx)+"_0", kernel=(1, 1), stride=(1, 1), 
                        in_channels=num_filters, out_channels=num_filters)
            _h = tf.nn.relu(_h)
            _h = conv2d(_h, scope="conv2d_"+str(idx)+"_1", kernel=(3, 3), stride=(1, 1), 
                        in_channels=num_filters, out_channels=num_filters)
            _h = tf.nn.relu(_h)
            _h = conv2d(_h, kernel=(1,1), stride=(1, 1))
            h = h + _h
            h = tf.nn.relu(_h)
        layer_out = conv2d(h, scope="resnet_layer_out", kernel=(3, 3), stride=(1, 1), 
                           in_channels=num_filters, out_channels=output_dim)
    return layer_out
        
def conv2d(layer_in, scope, kernel, stride, in_channels, out_channels):
    with tf.variable_scope(scope):
        kernel_h, kernel_w = kernel
        stride_h, stride_w = stride
        weights = tf.get_variable("weights", [kernel_h, kernel_w, in_channels, out_channels],
                                  tf.float32, tf.contrib.layers.xavier_initializer())
        layer_out = tf.nn.conv2d(input=layer_in, filter=weights, strides=[1, stride_h, stride_w, 1], 
                                 padding='SAME', name='conv2d_layer_out')
    return layer_out

$$
y=b \odot x+(1-b) \odot(x \odot \exp (s(b \odot x))+t(b \odot x))
$$

In [14]:
class Layer():
    def forward_and_jacobian(self, x, sum_log_det_jacobians, z):
        raise NotImplementedError()

    def backward(self, y, z):
        raise NotImplementedError()

In [13]:
class CouplingLayer(Layer):
    def __init__(self, scope, mask_type):
        self.scope = scope
        self.mask_type = mask_type
        
    def _get_mask(self, shape):
        if self.mask_type.startswith("checkerboard"):
            if self.mask_type == "checkerboard0":
                mask = tf.constant([[0.0, 1.0], [1.0, 0.0]], dtype=tf.float32)
            elif self.mask_type == "checkerboard1": 
                mask = tf.constant([[1.0, 0.0], [0.0, 1.0]], dtype=tf.float32)
            mask = tf.reshape(unit, [1, 2, 2, 1], name="mask_"+mask_type)
        elif self.mask_type.startswith("channel"):
            shape_halved = [shape[0], shape[1], shape[2], shape[3]//2]
            ones = tf.ones(shape_halved)
            zeros = tf.zeors(shape_halved)
            if self.mask_type == "channel0":
                mask = tf.concat([ones, zeros], axis=-1)
            elif self.mask_type == "channel1": 
                mask = tf.concat([zeros, ones], axis=-1)
        return mask
    
    def _build_log_s_t(self, masked_in, scope="_build_log_s_t"):
        with tf.variable_scope(scope):
            resnet_out = resnet(masked_in, scope="resnet", num_channels=self.C, output_dim=self.C*2)
            log_s, t = split(resnet_out, 2, axis=-1)
        return log_s, t
    
    def forward_and_jacobian(self, x, sum_log_det_jacobian, z):
        '''
        sum_log_det_jacobian = (None,)
        '''
        with tf.variable_scope(self.scope):
            mask = self._get_mask(x.get_shape())
            masked_x = mask * x
            log_s, t = self._build_log_s_t(masked_x)
            s = tf.check_numerics(tf.exp(log_s), "exp has NaN")
            y = masked_x + (1 - mask) * (x * s + t)
            sum_log_det_jacobian += tf.reduce_sum(log_s, axis=[1, 2, 3])
            
        return y, sum_log_det_jacobian, z 
    
        # TODO: z changed?????????
        
    def backward(self, y, z):
        with tf.variable_scope(self.scope, reuse=True):
            mask = self._get_mask(y.get_shape())
            masked_y = mask * y
            log_s, t = self._build_log_s_t(mask_y)
            neg_s = tf.check_numerics(tf.exp(-log_s), "exp has NaN")
            x = masked_y + ((1 - mask) * y - t) * tf.exp(neg_s)
            
        return x, z

In [15]:
class SqueezingLayer(Layer):
    def __init__(self, scope):
        self.name = name

    def forward_and_jacobian(self, x, sum_log_det_jacobians, z):
        y = tf.space_to_depth(x.get_shape(), 2)
        if z is not None:
            z = tf.space_to_depth(z, 2)      
        return y,sum_log_det_jacobians, z
    
    def backward(self, y, z):
        x = tf.depth_to_space(y.get_shape(), 2)
        if z is not None:
            z = tf.depth_to_space(z, 2)
        return x, z

In [17]:
class FactorOutLayer(Layer):
    def __init__(self, scale, scope):
        self.scale = scale
        self.scope = scope
    
    def forward_and_jacobian(self, x, sum_log_det_jacobians, z):
        split = x.get_shape()[-1]//2
        
        new_z = x[:, :, :, :split]
        x = x[:, :, :, split:]
        if z is not None:
            z = tf.concat([z, new_z], axis=3)
        else:
            z = new_z
        return x, sum_log_det_jacobians, z
  
    def backward(self, y, z):
        # At scale 0, 1/2 of the original dimensions are factored out
        # At scale 1, 1/4 of the original dimensions are factored out
        # ....
        # At scale s, (1/2)^(s+1) are factored out
        # Hence, at backward pass of scale s, (1/2)^(s) of z should be factored in
        
        if y is not None:
            split = y.get_shape()[-1]
        else:
            z.get_shape()[-1]//(2**self.scale)
        
        new_y = z[:, :, :, -split:]
        z = z[:, :, :, :-split]
        
        if y is not None:
            x = tf.concat([new_y, y], axis=3)
        else:
            x = new_y
        
        return x, z
    
    # Given the output of the network and all jacobians, 
    # compute the log probability. 
    # Equation (3) of the RealNVP paper
    def compute_log_prob_x(z, sum_log_det_jacobians):

        # y is assumed to be in standard normal distribution
        # 1/sqrt(2*pi)*exp(-0.5*x^2)
        zs = int_shape(z)
        K = zs[1]*zs[2]*zs[3] #dimension of the Gaussian distribution

        log_density_z = -0.5*tf.reduce_sum(tf.square(z), [1,2,3]) - 0.5*K*np.log(2*np.pi)

        log_density_x = log_density_z + sum_log_det_jacobians

        # to go from density to probability, one can 
        # multiply the density by the width of the 
        # discrete probability area, which is 1/256.0, per dimension.
        # The calculation is performed in the log space.
        log_prob_x = log_density_x - K*tf.log(256.0)

        return log_prob_x


    # Computes the loss of the network. 
    # It is chosen so that the probability P(x) of the 
    # natural images is maximized.
    def loss(z, sum_log_det_jacobians):
        return -tf.reduce_mean(compute_log_prob_x(z, sum_log_det_jacobians))
    

    # Adam optimizer.
    # Exactly the same code as the PixelCNN++ implementation by OpenAI.
    # https://github.com/openai/pixel-cnn
    def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999):
        updates = []
        if type(cost_or_grads) is not list:
            grads = tf.gradients(cost_or_grads, params)
        else:
            grads = cost_or_grads
        t = tf.Variable(1., 'adam_t')
        for p, g in zip(params, grads):
            mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
            if mom1 > 0: 
                v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
                v_t = mom1*v + (1. - mom1)*g
                v_hat = v_t / (1. - tf.pow(mom1,t))
                updates.append(v.assign(v_t))
            else:
                v_hat = g
            mg_t = mom2*mg + (1. - mom2)*tf.square(g)
            mg_hat = mg_t / (1. - tf.pow(mom2,t))
            g_t = v_hat / tf.sqrt(mg_hat + 1e-8)
            p_t = p - lr * g_t
            updates.append(mg.assign(mg_t))
            updates.append(p.assign(p_t))
        updates.append(t.assign_add(1))
    return tf.group(*updates)

<function tensorflow.python.ops.array_ops.depth_to_space(input, block_size, name=None, data_format='NHWC')>