In [78]:
%matplotlib inline
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import random

In [92]:
#subtract by the mean and divide by the standard deviation
#normalize data
# X_train = (X_train - np.mean(X_train)) / np.std(X_train)

#unused method to scale from 0 to 1~ish
def preproc(unclean_batch_x):
    """Convert values to range 0-1"""
    temp_batch = unclean_batch_x / unclean_batch_x.max()
    return temp_batch
#X_train = preproc(X_train)

def shuffle(Xtrain, ytrain):
    stacked = np.column_stack((Xtrain,ytrain))
    np.random.shuffle(stacked)
    return stacked[:,:Xtrain.shape[1]], stacked[:,Xtrain.shape[1]]

def get_kernel_size(factor):
    """
    Find the kernel size given the desired factor of upsampling.
    """
    return 2 * factor - factor % 2


def upsample_filt(size):
    """
    Make a 2D bilinear kernel suitable for upsampling of the given (h, w) size.
    """
    factor = (size + 1) // 2
    if size % 2 == 1:
        center = factor - 1
    else:
        center = factor - 0.5
    og = np.ogrid[:size, :size]
    return (1 - abs(og[0] - center) / factor) * \
           (1 - abs(og[1] - center) / factor)

def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')

# wrapper for applying spatial conv, batchnorm, reLU
def conv2d(x, W, b, stride):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    # Batch Norm
    x = tf.contrib.layers.batch_norm(x, center=True, scale=True)
    return tf.nn.relu(x)

def deconv2d(x, W, b, stride):
    x = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    # Batch Norm
    x = tf.contrib.layers.batch_norm(x, center=True, scale=True)
    
    # bilinear interpolation upsampling
    old_height, old_width = x.get_shape().as_list()[1 : 3]
    scale = 2
    new_height = old_height * scale
    new_width = old_width * scale
    return tf.image.resize_images(x, [new_height, new_width], method=tf.image.ResizeMethod.BILINEAR)

ENCODER = 'encoder'
MID = 'mid'
DECODER = 'decoder'

def get_weight_name(stage, layer_num):
    return 'w_' + stage + '_' + str(layer_num)
    
def get_bias_name(stage, layer_num):
    return 'b_' + stage + '_' + str(layer_num)

In [93]:
class IntrinsicNetwork(object):
    def __init__(self, input_dimensions, encoder_dimensions, mid_dimensions, decoder_dimensions):
        self.params = {}
        # in the paper each input sample is 256x256x3
        self.input = tf.placeholder(tf.float32, shape=[None,] + list(input_dimensions))
        
        encoder_layers = self.get_encoder_layers(self.input, encoder_dimensions)
        mid_output = self.get_mid_output(encoder_layers[-1], mid_dimensions)
        self.output = self.get_decoder_output(mid_output, encoder_layers, decoder_dimensions)
        
    def get_encoder_layers(self, input, dimensions):
        prev = input
        encoder_layers = []
        # encoder layers 0 to 5
        for layer_num in range(6):
            # initilize weights
            dims = dimensions[get_weight_name(ENCODER, layer_num)]
            input_depth, output_depth, stride, filter_size = \
                (dims[x] for x in ['input_depth', 'output_depth', 'stride', 'filter_size'])
            W = tf.Variable(tf.random_normal([filter_size, filter_size, input_depth, output_depth]))
            b = tf.Variable(tf.random_normal([output_depth]))
            self.params[get_weight_name(ENCODER, layer_num)] = W
            self.params[get_bias_name(ENCODER, layer_num)] = b
            prev = conv2d(prev, W, b, stride)
            encoder_layers.append(prev)
        return encoder_layers
    
    def get_mid_output(self, encoder_output, dimensions):
        prev = encoder_output
        for layer_num in range(4):
            dims = dimensions[get_weight_name(MID, layer_num)]
            input_depth, output_depth, stride, filter_size = \
                (dims[x] for x in ['input_depth', 'output_depth', 'stride', 'filter_size'])
            W = tf.Variable(tf.random_normal([filter_size, filter_size, input_depth, output_depth]))
            b = tf.Variable(tf.random_normal([output_depth]))
            self.params[get_weight_name(MID, layer_num)] = W
            self.params[get_bias_name(MID, layer_num)] = b
            prev = conv2d(prev, W, b, stride)
        return prev
    
    def get_decoder_output(self, mid_output, encoder_layers, dimensions):
        prev = mid_output
        for layer_num in range(7):
            dims = dimensions[get_weight_name(DECODER, layer_num)]
            input_depth, output_depth, stride, filter_size = \
                (dims[x] for x in ['input_depth', 'output_depth', 'stride', 'filter_size'])
            W = tf.Variable(tf.random_normal([filter_size, filter_size, input_depth, output_depth]))
            b = tf.Variable(tf.random_normal([output_depth]))
            self.params[get_weight_name(DECODER, layer_num)] = W
            self.params[get_bias_name(DECODER, layer_num)] = b
            
            if layer_num < 6: # concatenate with encoder layer for layers 0 to 5
                encoder_layer_input = encoder_layers[5 - layer_num]
                prev = tf.concat([prev, encoder_layer_input], axis=3)
            if layer_num < 5: # perform deconv on layers 0 to 4
                prev = deconv2d(prev, W, b, stride)
            else: # conv on layers 5 and 6
                prev = conv2d(prev, W, b, stride)
        return prev

In [94]:
# Store layers weight & bias
# more parameters
FILTER_SIZE = 3
INPUT_DEPTH = 3

# output depths of the layers
ENCODER_DEPTHS = [16, 32, 64, 128, 256, 256]
MID_DEPTH = encoder_depths[-1]
DECODER_DEPTHS = [256, 128, 64, 32, 16, 16, 3]

# defines the sizes for each of the conv / deconv layers
encoder_dimensions = {}
prev_depth = INPUT_DEPTH
for i, output_depth in enumerate(ENCODER_DEPTHS):
    weight_name = get_weight_name(ENCODER, i)
    stride = 1 if i == 0 else 2 # stride 1 for the first conv layer only
    encoder_dimensions[weight_name] = {
        'input_depth' : prev_depth,
        'output_depth' : output_depth,
        'stride' : stride,
        'filter_size' : FILTER_SIZE
    }
    prev_depth = output_depth

mid_dimensions = {}
for i in range(4):
    weight_name = get_weight_name(MID, i)
    mid_dimensions[weight_name] = {
        'input_depth' : MID_DEPTH,
        'output_depth' : MID_DEPTH,
        'stride' : 1,
        'filter_size' : FILTER_SIZE
    }

decoder_dimensions = {}
prev_depth = mid_depth
for i, output_depth in enumerate(DECODER_DEPTHS):
    # the ith deconv layer's input is the concatenation of the previous output and the output of (5-i)th encoder conv layer
    # so the depth is the sum of the depths
    # the last two layers in this loop (5th and 6th layer) are conv layers, not deconv
    if i < 6:
        prev_encoder_depth = encoder_depths[5 - i]
        input_depth = prev_depth + prev_encoder_depth
    else: # 6th layer doesn't have concatenation
        input_depth = prev_depth
    weight_name = get_weight_name(DECODER, i)
    decoder_dimensions[weight_name] = {
        'input_depth' : input_depth,
        'output_depth' : output_depth,
        'stride' : 1,
        'filter_size' : FILTER_SIZE
    }
    prev_depth = output_depth

In [95]:
IntrinsicNetwork((256, 256, INPUT_DEPTH), encoder_dimensions, mid_dimensions, decoder_dimensions)

<__main__.IntrinsicNetwork at 0x229c04a1588>

In [157]:
import re

params = []
end_re = re.compile(r'^\[torch.CudaTensor of size (\w+)\]')
index_re = re.compile(r'^\((\d+),(\d+)')
with open('shapenet_intrinsics/train/model_weights.txt') as f:
    current = []
    linenum = 1
    for line in f:
        end = end_re.match(line)
        if end:
            end_dims = tuple(map(int, end.group(1).split('x')))
            if len(end_dims) == 1:
                if current[0].startswith('0.01 *'):
                    current = current[1:]
                array = np.array(tuple(map(float, current)))
                assert(len(array) == end_dims[0])
            else:
                array = np.zeros(end_dims)
                i = 0
                while i < len(current):
                    match = index_re.match(current[i])
                    first_index = int(match.group(1)) - 1
                    second_index = int(match.group(2)) - 1
                    for j in range(3):
                        array[first_index][second_index][j] = np.array(tuple(map(float, current[i + 1 + j].lstrip().split())))
                    i += 5
            params.append(array)
            current = []
            continue
        current.append(line)

In [None]:
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
# optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

In [None]:
# batch size of one = stochastic gradient descent, mini-batch gradient descent
# should use 10 at once
# mini batch neural nets tensorflow
# fc, relu, maybe a few repetitions, softmax

M = 70 #M is the input size factor

input_size = 32*M # 28 x 28
output_size = 32*M

# dropout = 0.8 # probability of keeping that data point
# dropout2 = 0.9

# learning_rate = 0.5

# data placeholders

# input data
x = tf.placeholder(tf.float32, shape=[None, input_size])
# correct answers
y = tf.placeholder(tf.float32, [None, output_size])
# dropout variable
keep_prob = tf.placeholder(tf.float32)
keep_prob2 = tf.placeholder(tf.float32)

In [None]:
# more parameters
batch_size = 30
epochs = 5

display_step = 1

n_samples = x_training.shape[0]
#rounds_number = data_size/ batch_size

mean_losses = []
mean_accs = []

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    # Keep training until reach max iterations
    for epoch in range(epochs):
        print("running epoch: ", epoch)
        total_iter = n_samples // batch_size #total_iter
        total_loss = 0
#         indeces = np.random.permutations(np.arange(len(n_samples))
        
        for i in range(total_iter):
#             x = X[i * batch_size : (i + 1) * batch_size]
            start = i * batch_size
            end = (i + 1) * batch_size
            batch_x, batch_y = x_training[start:end], labels_training[start:end]
            
            # Run optimization op (backprop)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout, keep_prob2: dropout2})
            
            # Calculate batch loss and accuracy 
#             loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y, keep_prob: 1., keep_prob2: 1.})
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1., keep_prob2: 1.})
#             loss = self.fit_batch(x)
            total_loss += loss
#             total_acc += acc

            if i%100 == 0:
                print("epoch: ",epoch,", iteration: ",i,", loss: ",loss," total loss: ",total_loss)
    
    
        mean_loss = total_loss / total_iter
        mean_losses.append(mean_loss)
        
#         mean_acc = total_acc / total_iter
#         mean_accs.append(mean_acc)
        
        if (epoch + 1) % display_step == 0:
            print('epoch %s: loss=%.4f' % (epoch + 1, mean_loss))
            
    
    # Calculate accuracy for test images
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: x_valid, y: labels_valid, keep_prob: 1., keep_prob2: 1.}))

In [None]:
# Unused implementation of fully connected layer

# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
print("fc1 shape", fc1.shape)
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
print("fc1 shape after weights and biases", fc1.shape)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)

# Output, class prediction (dense layer)
fc2 = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
print("fc2 shape after weights and biases", fc2.shape)
#     fc2 = tf.nn.dropout(fc2, dropout2)