In [1]:
import numpy as np
import tensorflow as tf

import skimage.io
from skimage.transform import resize
import matplotlib.pyplot as plt
%matplotlib inline

In [111]:
data = np.load(open("bvlc_alexnet.npy", "rb"), encoding="bytes").item()

In [124]:
def weights(layer_name):
    layer = data[layer_name]
    W = layer[0]
    b = layer[1]
    return W, b

In [166]:
def conv2d(prev_layer, layer_name, strides=[1,1,1,1], groups = 1, relu = True):
    W, b = weights(layer_name)
    W = tf.constant(W)
    b = tf.constant(np.reshape(b, (b.size)))
    
    convolve = lambda prev, w: tf.nn.conv2d(prev, w, strides, padding='SAME')
    
    if groups == 1:
        conv = convolve(prev_layer, W)
    else:
        input_groups = tf.split(axis=3, num_or_size_splits=groups, value=prev_layer)
        weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W)
        output_groups = [convolve(prev, w) for (prev, w) in zip(input_groups, weight_groups)]
        
        conv = tf.concat(axis=3, values=output_groups)
    
    
    conv_bias = tf.nn.bias_add(conv, b)
    if relu == True:
        return tf.nn.relu(conv_bias)
    return conv

In [192]:
def maxpool(prev_layer):
    return tf.nn.max_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [187]:
def export_graph(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNEL):
    graph = dict()
    graph['input'] = tf.Variable(np.zeros((1, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNEL)), dtype=tf.float32)

    graph['conv1'] = conv2d(graph['input'], 'conv1', strides=[1,4,4,1])
    graph['pool1'] = maxpool(graph['conv1'])
    
    graph['conv2'] = conv2d(graph['pool1'], 'conv2', groups=2)
    graph['pool2'] = maxpool(graph['conv2'])
    
    graph['conv3'] = conv2d(graph['pool2'], 'conv3')
    graph['pool3'] = maxpool(graph['conv3'])
    
    graph['conv4'] = conv2d(graph['pool3'], 'conv4', groups=2)
    graph['pool4'] = maxpool(graph['conv4'])
    
    graph['conv5'] = conv2d(graph['pool4'], 'conv5', groups=2)
    return graph

In [188]:
graph = export_graph(360, 360, 3)
'''
conv1 (11, 11, 3, 96) (96,)
conv2 (5, 5, 48, 256) (256,)
conv3 (3, 3, 256, 384) (384,)
conv4 (3, 3, 192, 384) (384,)
conv5 (3, 3, 192, 256) (256,)
'''

'\nconv1 (11, 11, 3, 96) (96,)\nconv2 (5, 5, 48, 256) (256,)\nconv3 (3, 3, 256, 384) (384,)\nconv4 (3, 3, 192, 384) (384,)\nconv5 (3, 3, 192, 256) (256,)\n'

In [191]:
graph

{'conv1': <tf.Tensor 'Relu_21:0' shape=(1, 90, 90, 96) dtype=float32>,
 'conv2': <tf.Tensor 'Relu_22:0' shape=(1, 45, 45, 256) dtype=float32>,
 'conv3': <tf.Tensor 'Relu_23:0' shape=(1, 23, 23, 384) dtype=float32>,
 'conv4': <tf.Tensor 'Relu_24:0' shape=(1, 12, 12, 384) dtype=float32>,
 'conv5': <tf.Tensor 'Relu_25:0' shape=(1, 6, 6, 256) dtype=float32>,
 'input': <tensorflow.python.ops.variables.Variable at 0x1c23b28da0>,
 'pool1': <tf.Tensor 'MaxPool_19:0' shape=(1, 45, 45, 96) dtype=float32>,
 'pool2': <tf.Tensor 'MaxPool_20:0' shape=(1, 23, 23, 256) dtype=float32>,
 'pool3': <tf.Tensor 'MaxPool_21:0' shape=(1, 12, 12, 384) dtype=float32>,
 'pool4': <tf.Tensor 'MaxPool_22:0' shape=(1, 6, 6, 384) dtype=float32>}

In [2]:
def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name,
         padding='SAME', groups=1):
    
    # Get number of input channels
    input_channels = int(x.get_shape()[-1])

    # Create lambda function for the convolution
    convolve = lambda i, k: tf.nn.conv2d(i, k,
                                         strides = [1, stride_y, stride_x, 1],
                                         padding = padding)

    with tf.variable_scope(name) as scope:
        # Create tf variables for the weights and biases of the conv layer
        weights = tf.get_variable('weights',
                                  shape = [filter_height, filter_width,
                                           input_channels/groups, num_filters])
        biases = tf.get_variable('biases', shape = [num_filters])

        if groups == 1:
            conv = convolve(x, weights)

        # In the cases of multiple groups, split inputs & weights and
        else:
            # Split input and weights and convolve them separately
            input_groups = tf.split(axis = 3, num_or_size_splits=groups, value=x)
            weight_groups = tf.split(axis = 3, num_or_size_splits=groups, value=weights)
            output_groups = [convolve(i, k) for i,k in zip(input_groups, weight_groups)]

            # Concat the convolved output together again
            conv = tf.concat(axis = 3, values = output_groups)

        # Add biases
        bias = tf.reshape(tf.nn.bias_add(conv, biases), tf.shape(conv))

        # Apply relu function
        relu = tf.nn.relu(bias, name = scope.name)
        
        return relu

In [3]:
def fc(x, num_in, num_out, name, relu = True):
    with tf.variable_scope(name) as scope:
        # Create tf variables for the weights and biases
        weights = tf.get_variable('weights', shape=[num_in, num_out], trainable=True)
        biases = tf.get_variable('biases', [num_out], trainable=True)

        # Matrix multiply weights and inputs and add bias
        act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name)
    
        if relu == True:
            # Apply ReLu non linearity
            relu = tf.nn.relu(act)
            return relu
        else:
            return act

In [4]:
def max_pool(x, filter_height, filter_width, stride_y, stride_x,
             name, padding='SAME'):
    return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],
                          strides = [1, stride_y, stride_x, 1],
                          padding = padding, name = name)

In [5]:
def lrn(x, radius, alpha, beta, name, bias=1.0):
    return tf.nn.local_response_normalization(x, depth_radius = radius,
                                              alpha = alpha, beta = beta,
                                              bias = bias, name = name)

In [6]:
def dropout(x, keep_prob):
    return tf.nn.dropout(x, keep_prob)

In [75]:
class AlexNet(object):
    
    def __init__(self, x, keep_prob, num_classes, weights_path = 'DEFAULT'):
        """
        Inputs:
        - x: tf.placeholder, for the input images
        - keep_prob: tf.placeholder, for the dropout rate
        - num_classes: int, number of classes of the new dataset
        - skip_layer: list of strings, names of the layers you want to reinitialize
        - weights_path: path string, path to the pretrained weights,
                        (if bvlc_alexnet.npy is not in the same folder)
        """
        # Parse input arguments
        self.X = x
        self.NUM_CLASSES = num_classes
        self.KEEP_PROB = keep_prob

        if weights_path == 'DEFAULT':
            self.WEIGHTS_PATH = 'bvlc_alexnet.npy'
        else:
            self.WEIGHTS_PATH = weights_path

        # Call the create function to build the computational graph of AlexNet
        self.create()

    def create(self):
        
        # 1st Layer: Conv (w ReLu) -> Lrn -> Pool
        conv1 = conv(self.X, 11, 11, 96, 4, 4, padding='VALID', name='conv1')
        norm1 = lrn(conv1, 2, 1e-05, 0.75, name='norm1')
        pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID', name='pool1')
        
        # 2nd Layer: Conv (w ReLu)  -> Lrn -> Pool with 2 groups
        conv2 = conv(pool1, 5, 5, 256, 1, 1, groups=2, name='conv2')
        norm2 = lrn(conv2, 2, 1e-05, 0.75, name='norm2')
        pool2 = max_pool(norm2, 3, 3, 2, 2, padding='VALID', name='pool2')
        
        # 3rd Layer: Conv (w ReLu)
        conv3 = conv(pool2, 3, 3, 384, 1, 1, name='conv3')

        # 4th Layer: Conv (w ReLu) splitted into two groups
        conv4 = conv(conv3, 3, 3, 384, 1, 1, groups=2, name='conv4')

        # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
        conv5 = conv(conv4, 3, 3, 256, 1, 1, groups=2, name='conv5')
        pool5 = max_pool(conv5, 3, 3, 2, 2, padding='VALID', name='pool5')

        # 6th Layer: Flatten -> FC (w ReLu) -> Dropout
        sz = int(np.prod(pool5.get_shape()[1:]))
        flattened = tf.reshape(pool5, [-1, sz])
        fc6 = fc(flattened, sz, 4096, name='fc6')
        dropout6 = dropout(fc6, self.KEEP_PROB)

        # 7th Layer: FC (w ReLu) -> Dropout
        fc7 = fc(dropout6, 4096, 4096, name = 'fc7')
        dropout7 = dropout(fc7, self.KEEP_PROB)

        # 8th Layer: FC and return unscaled activations
        # (for tf.nn.softmax_cross_entropy_with_logits)
        fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu = False, name='fc8')
        
        # For future model
        self.input = self.X
        self.conv1 = conv1
        self.conv2 = conv2
        self.conv3 = conv3
        self.conv4 = conv4
        self.conv5 = conv5
        self.fc6 = fc6
        self.fc7 = fc7
        self.fc8 = fc8

    def load_initial_weights(self):
        
        # Load the weights into memory
        weights_dict = np.load(self.WEIGHTS_PATH, encoding = 'bytes').item()

        # Loop over all layer names stored in the weights dict
        for op_name in weights_dict:

            with tf.variable_scope(op_name, reuse = True):

                # Loop over list of weights/biases and assign them to their corresponding tf variable
                for data in weights_dict[op_name]:

                    # Biases
                    if len(data.shape) == 1:
                        var = tf.get_variable('biases', trainable = False)
                        session.run(var.assign(data))
                    # Weights
                    else:
                        var = tf.get_variable('weights', trainable = False)
                        session.run(var.assign(data))
                        
    def export(self):
        model = dict()
        model['input'] = self.input
        model['conv1'] = self.conv1
        model['conv2'] = self.conv2
        model['conv3'] = self.conv3
        model['conv4'] = self.conv4
        model['conv5'] = self.conv5
        model['fc6'] = self.fc6
        model['fc7'] = self.fc7
        model['fc8'] = self.fc8
        return model

In [76]:
# Path for training 
train_file = './llama.jpeg'

# Hyperparameters
learning_rate = 0.001
num_classes = 1000
dropout_rate = 0.5
batch_size = 1

img_train = skimage.io.imread(train_file)
img_train = resize(img_train, (360,360))

  warn("The default mode, 'constant', will be changed to 'reflect' in "


In [77]:
tf.reset_default_graph()
x = tf.placeholder(tf.float32, [batch_size, 360, 360, 3])
y = tf.placeholder(tf.float32, [batch_size, num_classes])
keep_prob = tf.placeholder(tf.float32)

In [78]:
model = AlexNet(x, keep_prob, num_classes)
score = model.fc8

In [79]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    s = sess.run(score, feed_dict={x:[img_train], keep_prob:dropout_rate})

In [80]:
out = model.conv4

In [81]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    o = sess.run(out, feed_dict={x:[img_train], keep_prob:dropout_rate})

In [82]:
print(s.shape, o.shape)

(1, 1000) (1, 21, 21, 384)


In [97]:
w = data['conv1'][0]
b = data['conv1'][1]
print(len(w), len(b))

11 96


In [100]:
print(w.shape, b.shape)

(11, 11, 3, 96) (96,)


In [105]:
data.keys()

dict_keys(['fc6', 'fc7', 'fc8', 'conv3', 'conv2', 'conv1', 'conv5', 'conv4'])

In [107]:
for key in data.keys():
    layer = data[key]
    w = layer[0]
    b = layer[1]
    print(key, w.shape, b.shape)

fc6 (9216, 4096) (4096,)
fc7 (4096, 4096) (4096,)
fc8 (4096, 1000) (1000,)
conv3 (3, 3, 256, 384) (384,)
conv2 (5, 5, 48, 256) (256,)
conv1 (11, 11, 3, 96) (96,)
conv5 (3, 3, 192, 256) (256,)
conv4 (3, 3, 192, 384) (384,)
