# Pass Images through network to get Bottleneck Values

In [1]:
#Import all the required packages

import numpy as np
import pandas as pd

from os import listdir
from os.path import join, basename
import struct
import json
from scipy import misc
import datetime as dt

from bigdl.nn.layer import *
from optparse import OptionParser
from bigdl.nn.criterion import *
from bigdl.optim.optimizer import *
from bigdl.util.common import *
from bigdl.dataset.transformer import *
from bigdl.nn.initialization_method import *
from transformer import *
from imagenet import *
from transformer import Resize

# if you want to train on whole imagenet
#from bigdl.dataset import imagenet
%matplotlib inline 

In [2]:
def scala_T(input_T):
    """
    Helper function for building Inception layers. Transforms a list of numbers to a dictionary with ascending keys 
    and 0 appended to the front. Ignores dictionary inputs. 
    
    :param input_T: either list or dict
    :return: dictionary with ascending keys and 0 appended to front {0: 0, 1: realdata_1, 2: realdata_2, ...}
    """    
    if type(input_T) is list:
        # insert 0 into first index spot, such that the real data starts from index 1
        temp = [0]
        temp.extend(input_T)
        return dict(enumerate(temp))
    # if dictionary, return it back
    return input_T

In [3]:
# Question: What is config?
def Inception_Layer_v1(input_size, config, name_prefix=""):
    """
    Builds the inception-v1 submodule, a local network, that is stacked in the entire architecture when building
    the full model.  
    
    :param input_size: dimensions of input coming into the local network
    :param config: ?
    :param name_prefix: string naming the layers of the particular local network
    :return: concat container object with all of the Sequential layers' ouput concatenated depthwise
    """        
    
    '''
    Concat is a container who concatenates the output of it's submodules along the provided dimension: all submodules 
    take the same inputs, and their output is concatenated.
    '''
    concat = Concat(2)
    
    """
    In the above code, we first create a container Sequential. Then add the layers into the container one by one. The 
    order of the layers in the model is same with the insertion order. 
    
    """
    conv1 = Sequential()
    
    #Adding layes to the conv1 model we jus created
    
    #SpatialConvolution is a module that applies a 2D convolution over an input image.
    conv1.add(SpatialConvolution(input_size, config[1][1], 1, 1, 1, 1).set_name(name_prefix + "1x1"))
    conv1.add(ReLU(True).set_name(name_prefix + "relu_1x1"))
    concat.add(conv1)
    
    conv3 = Sequential()
    conv3.add(SpatialConvolution(input_size, config[2][1], 1, 1, 1, 1).set_name(name_prefix + "3x3_reduce"))
    conv3.add(ReLU(True).set_name(name_prefix + "relu_3x3_reduce"))
    conv3.add(SpatialConvolution(config[2][1], config[2][2], 3, 3, 1, 1, 1, 1).set_name(name_prefix + "3x3"))
    conv3.add(ReLU(True).set_name(name_prefix + "relu_3x3"))
    concat.add(conv3)
    
    
    conv5 = Sequential()
    conv5.add(SpatialConvolution(input_size,config[3][1], 1, 1, 1, 1).set_name(name_prefix + "5x5_reduce"))
    conv5.add(ReLU(True).set_name(name_prefix + "relu_5x5_reduce"))
    conv5.add(SpatialConvolution(config[3][1], config[3][2], 5, 5, 1, 1, 2, 2).set_name(name_prefix + "5x5"))
    conv5.add(ReLU(True).set_name(name_prefix + "relu_5x5"))
    concat.add(conv5)
    
    
    pool = Sequential()
    pool.add(SpatialMaxPooling(3, 3, 1, 1, 1, 1, to_ceil=True).set_name(name_prefix + "pool"))
    pool.add(SpatialConvolution(input_size, config[4][1], 1, 1, 1, 1).set_name(name_prefix + "pool_proj"))
    pool.add(ReLU(True).set_name(name_prefix + "relu_pool_proj"))
    concat.add(pool).set_name(name_prefix + "output")
    return concat

In [4]:
def Inception_v1_NoAuxClassifier(class_num):
    model = Sequential()
    model.add(SpatialConvolution(3, 64, 7, 7, 2, 2, 3, 3, 1, False).set_name("conv1/7x7_s2"))
    model.add(ReLU(True).set_name("conv1/relu_7x7"))
    model.add(SpatialMaxPooling(3, 3, 2, 2, to_ceil=True).set_name("pool1/3x3_s2"))
    model.add(SpatialCrossMapLRN(5, 0.0001, 0.75).set_name("pool1/norm1"))
    model.add(SpatialConvolution(64, 64, 1, 1, 1, 1).set_name("conv2/3x3_reduce"))
    model.add(ReLU(True).set_name("conv2/relu_3x3_reduce"))
    model.add(SpatialConvolution(64, 192, 3, 3, 1, 1, 1, 1).set_name("conv2/3x3"))
    model.add(ReLU(True).set_name("conv2/relu_3x3"))
    model.add(SpatialCrossMapLRN(5, 0.0001, 0.75).set_name("conv2/norm2"))
    model.add(SpatialMaxPooling(3, 3, 2, 2, to_ceil=True).set_name("pool2/3x3_s2"))
    model.add(Inception_Layer_v1(192, scala_T([scala_T([64]), scala_T(
         [96, 128]), scala_T([16, 32]), scala_T([32])]), "inception_3a/"))
    model.add(Inception_Layer_v1(256, scala_T([scala_T([128]), scala_T(
         [128, 192]), scala_T([32, 96]), scala_T([64])]), "inception_3b/"))
    model.add(SpatialMaxPooling(3, 3, 2, 2, to_ceil=True))
    model.add(Inception_Layer_v1(480, scala_T([scala_T([192]), scala_T(
         [96, 208]), scala_T([16, 48]), scala_T([64])]), "inception_4a/"))
    model.add(Inception_Layer_v1(512, scala_T([scala_T([160]), scala_T(
         [112, 224]), scala_T([24, 64]), scala_T([64])]), "inception_4b/"))
    model.add(Inception_Layer_v1(512, scala_T([scala_T([128]), scala_T(
         [128, 256]), scala_T([24, 64]), scala_T([64])]), "inception_4c/"))
    model.add(Inception_Layer_v1(512, scala_T([scala_T([112]), scala_T(
         [144, 288]), scala_T([32, 64]), scala_T([64])]), "inception_4d/"))
    model.add(Inception_Layer_v1(528, scala_T([scala_T([256]), scala_T(
         [160, 320]), scala_T([32, 128]), scala_T([128])]), "inception_4e/"))
    model.add(SpatialMaxPooling(3, 3, 2, 2, to_ceil=True))
    model.add(Inception_Layer_v1(832, scala_T([scala_T([256]), scala_T(
         [160, 320]), scala_T([32, 128]), scala_T([128])]), "inception_5a/"))
    model.add(Inception_Layer_v1(832, scala_T([scala_T([384]), scala_T(
         [192, 384]), scala_T([48, 128]), scala_T([128])]), "inception_5b/"))
    model.add(SpatialAveragePooling(7, 7, 1, 1).set_name("pool5/7x7_s1"))
    model.add(Dropout(0.4).set_name("pool5/drop_7x7_s1"))
    model.add(View([1024], num_input_dims=3))
#     model.add(Linear(1024, class_num).set_name("loss3/classifier_flowers"))
#     model.add(LogSoftMax().set_name("loss3/loss3"))
    model.reset()
    return model

# Create the Model

In [5]:
# initializing BigDL engine
init_engine()

In [6]:
#bh_name unique values
categoryNum = 9

# Instantiating the model the model
#inception_model = Inception_v1(classNum)  #-- main inception-v1 model
inception_model = Inception_v1_NoAuxClassifier(categoryNum)

creating: createSequential
creating: createSpatialConvolution
creating: createReLU
creating: createSpatialMaxPooling
creating: createSpatialCrossMapLRN
creating: createSpatialConvolution
creating: createReLU
creating: createSpatialConvolution
creating: createReLU
creating: createSpatialCrossMapLRN
creating: createSpatialMaxPooling
creating: createConcat
creating: createSequential
creating: createSpatialConvolution
creating: createReLU
creating: createSequential
creating: createSpatialConvolution
creating: createReLU
creating: createSpatialConvolution
creating: createReLU
creating: createSequential
creating: createSpatialConvolution
creating: createReLU
creating: createSpatialConvolution
creating: createReLU
creating: createSequential
creating: createSpatialMaxPooling
creating: createSpatialConvolution
creating: createReLU
creating: createConcat
creating: createSequential
creating: createSpatialConvolution
creating: createReLU
creating: createSequential
creating: createSpatialConvolutio

## Import weights from Caffe Model


In [7]:
import urllib
# path, names of the downlaoded pre-trained caffe models
caffe_prototxt = 'bvlc_googlenet.prototxt'
caffe_model = 'bvlc_googlenet.caffemodel'

if not path.exists(caffe_model):
    model_loader = urllib.URLopener()
    model_loader.retrieve("http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel", caffe_model)
    
# loading the weights to the BigDL inception model, EXCEPT the weights for the last fc layer (classification layer)
model = Model.load_caffe(inception_model, caffe_prototxt, caffe_model, match_all=False, bigdl_type="float")

# if we want to export the whole caffe model including definition, this can be used.
#model = Model.load_caffe_model(inception_model, caffe_prototxt, caffe_model, match_all=True)

## Get bottleneck values for imgs
Images have been cropped to square, then cropped to 224x224 and saved in "pickle_imgs.dat"

In [8]:
'''
GOAL: convert processes vegnonveg imgs into 2d array bottlenecks of shape (# of imgs, 1024). save into pickle_bottlenecks.dat.
'''
from PIL import Image 
import cv2 
from os import listdir
import pickle
PIK = "pickle_bottleneck.dat"

imgs_pickle = pickle.load(open("pickle_imgs.dat", "rb"))
    

In [9]:
'''
Convert images to rgb array
'''
img_data = np.array(map((lambda(x):np.array(x)),imgs_pickle))
print "All rgb images: ", img_data.shape

All rgb images:  (2611, 224, 224, 3)


In [11]:
# Calculate Means and stds along rgb channels for normalizer
'''
:param channel pass r g or b
:param img_data is the np array of rgb values
:return mean,std tuple
'''

def calc_mean_and_std(channel, img_data):
    place = 0
    if channel == 'r':
        place = 0
    elif channel == 'g':
        place = 1
    elif channel == 'b':
        place = 2
    else:
        raise ValueError('Specify 'r',\'g\' or 'b' for channel')
    vals = img_data[:,:,:,place]
    vals = np.reshape(vals, -1)
    mean = np.mean(vals)
    std = np.std(vals)
    return mean, std

In [12]:
r_mean, r_std = calc_mean_and_std('r', img_data)
g_mean, g_std = calc_mean_and_std('g', img_data)
b_mean, b_std = calc_mean_and_std('b', img_data)

In [15]:
# defining the transformer, which we will use to pre-process our test image
img_rows = 224
img_cols = 224


transform_input_train = Transformer([Crop(img_rows, img_cols, "center"),
                                        ChannelNormalizer(r_mean, g_mean, b_mean, r_std, g_std, b_std),
                                        Flip(500,"train"),
                                        TransposeToTensor(False)
                                        ])
transform_input_test = Transformer([Crop(img_rows, img_cols, "center"),
                                        ChannelNormalizer(r_mean, g_mean, b_mean, r_std, g_std, b_std),
                                        Flip(500,"test"),
                                        TransposeToTensor(False)
                                        ])

## Create Test and Training Set with Normalized Values

In [None]:
train_data = img_data[0:2176]
test_data = img_data[2176:]

train_data_norm = np.array(map(lambda(imgs):transform_input_train(imgs), train_data))
test_data_norm = np.array(map(lambda(imgs):transform_input_test(imgs), test_data))
#print train_data_norm.shape, test_data_norm.shape

In [None]:
print (train_data_norm).shape, test_data_norm.shape

In [None]:
bottlenecks = []
for img in train_data_norm:
    label = np.array(-1) 
    img_to_model = Sample.from_ndarray(img, label)
    img_data_rdd = sc.parallelize([img_to_model])
    #get bottleneck array
    predict_result = model.predict(img_data_rdd)
    bottleneck = predict_result.collect()[0]
    bottlenecks.append(bottleneck)

In [None]:
pickle.dump(bottlenecks, open(PIK, "wb")) #dump to pickle_bottleneck.dat
