# VegNonVeg Transfer Learning using Inception V1

The goal of this notebook is to use bigdl to retrain last layer of imported inception_v1 model from Caffe, used for Imagenet.  Aims to use transfer learning to classify a dataset of flower images among 5 categories of flowers.  

In [1]:
# Verify if the spark context was initialized 
sc

<pyspark.context.SparkContext at 0x103e6dc90>

In [2]:
#Import all the required packages

import numpy as np
import pandas as pd

from os import listdir
from os.path import join, basename
import struct
import json
from scipy import misc
import datetime as dt

from bigdl.nn.layer import *
from optparse import OptionParser
from bigdl.nn.criterion import *
from bigdl.optim.optimizer import *
from bigdl.util.common import *
from bigdl.dataset.transformer import *
from bigdl.nn.initialization_method import *
from transformer import *
from imagenet import *
from transformer import Resize

# if you want to train on whole imagenet
#from bigdl.dataset import imagenet
#%pylab inline
import matplotlib.pyplot as plt
%matplotlib inline 
from PIL import Image

## Model Definition and helper functions

In [3]:
# helper func to read the files from disk
def read_local_path(folder, has_label=True):
    """
    :param folder: local directory (str)
    :param has_label: does image have label (bool)
    :return: list of (image path , label) tuples
    """
    # read directory, create map
    dirs = listdir(folder)
    # print "local path: ", folder
    # print "listdir: ", dirs
    # create a list of (image path , label) tuples
    image_paths = []
    #append image path to the label (ex: )
    if has_label:
        dirs.sort()
        for d in dirs:
            for f in listdir(join(folder, d)):
                image_paths.append((join(join(folder, d), f), dirs.index(d) + 1))
    else:
        for f in dirs:
            image_paths.append((join(folder, f), -1))
    return image_paths

In [4]:
# helper func to read the files from disk
def read_local(sc, folder, normalize=255.0, has_label=True):
    """
    Read images from local directory
    :param sc: spark context
    :param folder: local directory
    :param normalize: normalization value
    :param has_label: whether the image folder contains label
    :return: RDD of sample
    """
    # read directory, create image paths list
    image_paths = read_local_path(folder, has_label)
    # print "BEFORE PARALLELIZATION: ", image_paths
    # create rdd
    image_paths_rdd = sc.parallelize(image_paths)
    # print image_paths_rdd
    feature_label_rdd = image_paths_rdd.map(lambda path_label: (misc.imread(path_label[0]), np.array(path_label[1]))) \
        .map(lambda img_label:
             (Resize(256, 256)(img_label[0]), img_label[1])) \
        .map(lambda feature_label:
             (((feature_label[0] & 0xff) / normalize).astype("float32"), feature_label[1]))
    # print "feature_label_rdd", feature_label_rdd
    return feature_label_rdd

The following function takes an input, if the input is a list, it insert into index 0 spot, such that the real data starts from index 1. Returns back a dictionary with key being the index and value the list of numbers.

In [5]:
def scala_T(input_T):
    """
    Helper function for building Inception layers. Transforms a list of numbers to a dictionary with ascending keys 
    and 0 appended to the front. Ignores dictionary inputs. 
    
    :param input_T: either list or dict
    :return: dictionary with ascending keys and 0 appended to front {0: 0, 1: realdata_1, 2: realdata_2, ...}
    """    
    if type(input_T) is list:
        # insert 0 into first index spot, such that the real data starts from index 1
        temp = [0]
        temp.extend(input_T)
        return dict(enumerate(temp))
    # if dictionary, return it back
    return input_T

The following functions are used to create and initiate the inception-v1 model. 

In [6]:
# Question: What is config?
def Inception_Layer_v1(input_size, config, name_prefix=""):
    """
    Builds the inception-v1 submodule, a local network, that is stacked in the entire architecture when building
    the full model.  
    
    :param input_size: dimensions of input coming into the local network
    :param config: ?
    :param name_prefix: string naming the layers of the particular local network
    :return: concat container object with all of the Sequential layers' ouput concatenated depthwise
    """        
    
    '''
    Concat is a container who concatenates the output of it's submodules along the provided dimension: all submodules 
    take the same inputs, and their output is concatenated.
    '''
    concat = Concat(2)
    
    """
    In the above code, we first create a container Sequential. Then add the layers into the container one by one. The 
    order of the layers in the model is same with the insertion order. 
    
    """
    conv1 = Sequential()
    
    #Adding layes to the conv1 model we jus created
    
    #SpatialConvolution is a module that applies a 2D convolution over an input image.
    conv1.add(SpatialConvolution(input_size, config[1][1], 1, 1, 1, 1).set_name(name_prefix + "1x1"))
    conv1.add(ReLU(True).set_name(name_prefix + "relu_1x1"))
    concat.add(conv1)
    
    conv3 = Sequential()
    conv3.add(SpatialConvolution(input_size, config[2][1], 1, 1, 1, 1).set_name(name_prefix + "3x3_reduce"))
    conv3.add(ReLU(True).set_name(name_prefix + "relu_3x3_reduce"))
    conv3.add(SpatialConvolution(config[2][1], config[2][2], 3, 3, 1, 1, 1, 1).set_name(name_prefix + "3x3"))
    conv3.add(ReLU(True).set_name(name_prefix + "relu_3x3"))
    concat.add(conv3)
    
    
    conv5 = Sequential()
    conv5.add(SpatialConvolution(input_size,config[3][1], 1, 1, 1, 1).set_name(name_prefix + "5x5_reduce"))
    conv5.add(ReLU(True).set_name(name_prefix + "relu_5x5_reduce"))
    conv5.add(SpatialConvolution(config[3][1], config[3][2], 5, 5, 1, 1, 2, 2).set_name(name_prefix + "5x5"))
    conv5.add(ReLU(True).set_name(name_prefix + "relu_5x5"))
    concat.add(conv5)
    
    
    pool = Sequential()
    pool.add(SpatialMaxPooling(3, 3, 1, 1, 1, 1, to_ceil=True).set_name(name_prefix + "pool"))
    pool.add(SpatialConvolution(input_size, config[4][1], 1, 1, 1, 1).set_name(name_prefix + "pool_proj"))
    pool.add(ReLU(True).set_name(name_prefix + "relu_pool_proj"))
    concat.add(pool).set_name(name_prefix + "output")
    return concat

In [7]:
def Inception_v1_Bottleneck(class_num):
    model = Sequential()
    model.add(SpatialConvolution(3, 64, 7, 7, 2, 2, 3, 3, 1, False).set_name("conv1/7x7_s2"))
    model.add(ReLU(True).set_name("conv1/relu_7x7"))
    model.add(SpatialMaxPooling(3, 3, 2, 2, to_ceil=True).set_name("pool1/3x3_s2"))
    model.add(SpatialCrossMapLRN(5, 0.0001, 0.75).set_name("pool1/norm1"))
    model.add(SpatialConvolution(64, 64, 1, 1, 1, 1).set_name("conv2/3x3_reduce"))
    model.add(ReLU(True).set_name("conv2/relu_3x3_reduce"))
    model.add(SpatialConvolution(64, 192, 3, 3, 1, 1, 1, 1).set_name("conv2/3x3"))
    model.add(ReLU(True).set_name("conv2/relu_3x3"))
    model.add(SpatialCrossMapLRN(5, 0.0001, 0.75).set_name("conv2/norm2"))
    model.add(SpatialMaxPooling(3, 3, 2, 2, to_ceil=True).set_name("pool2/3x3_s2"))
    model.add(Inception_Layer_v1(192, scala_T([scala_T([64]), scala_T(
         [96, 128]), scala_T([16, 32]), scala_T([32])]), "inception_3a/"))
    model.add(Inception_Layer_v1(256, scala_T([scala_T([128]), scala_T(
         [128, 192]), scala_T([32, 96]), scala_T([64])]), "inception_3b/"))
    model.add(SpatialMaxPooling(3, 3, 2, 2, to_ceil=True))
    model.add(Inception_Layer_v1(480, scala_T([scala_T([192]), scala_T(
         [96, 208]), scala_T([16, 48]), scala_T([64])]), "inception_4a/"))
    model.add(Inception_Layer_v1(512, scala_T([scala_T([160]), scala_T(
         [112, 224]), scala_T([24, 64]), scala_T([64])]), "inception_4b/"))
    model.add(Inception_Layer_v1(512, scala_T([scala_T([128]), scala_T(
         [128, 256]), scala_T([24, 64]), scala_T([64])]), "inception_4c/"))
    model.add(Inception_Layer_v1(512, scala_T([scala_T([112]), scala_T(
         [144, 288]), scala_T([32, 64]), scala_T([64])]), "inception_4d/"))
    model.add(Inception_Layer_v1(528, scala_T([scala_T([256]), scala_T(
         [160, 320]), scala_T([32, 128]), scala_T([128])]), "inception_4e/"))
    model.add(SpatialMaxPooling(3, 3, 2, 2, to_ceil=True))
    model.add(Inception_Layer_v1(832, scala_T([scala_T([256]), scala_T(
         [160, 320]), scala_T([32, 128]), scala_T([128])]), "inception_5a/"))
    model.add(Inception_Layer_v1(832, scala_T([scala_T([384]), scala_T(
         [192, 384]), scala_T([48, 128]), scala_T([128])]), "inception_5b/"))
    model.add(SpatialAveragePooling(7, 7, 1, 1).set_name("pool5/7x7_s1"))
    model.add(Dropout(0.4).set_name("pool5/drop_7x7_s1"))
    model.add(View([1024], num_input_dims=3))
    model.reset()
    return model

In [8]:
def Inception_v1_NoAuxClassifier(class_num):
    model = Inception_v1_Bottleneck(class_num)
    model.add(Linear(1024, class_num).set_name("loss3/classifier_flowers"))
    model.add(LogSoftMax().set_name("loss3/loss3"))
    model.reset()
    return model

In [9]:
def get_inception_data(folder, file_type="image", data_type="train", normalize=255.0):
    """
    Builds the entire network using Inception architecture  
    
    :param class_num: number of categories of classification
    :return: entire model architecture 
    """
    #Getting the path of our data
    path = os.path.join(folder, data_type)
    if "seq" == file_type:
        #return imagenet.read_seq_file(sc, path, normalize) #-- incase if we are trying to read the orig imagenet data
        return read_seq_file(sc, path, normalize)
    elif "image" == file_type:
        #return imagenet.read_local(sc, path, normalize)
        return read_local(sc, path, normalize)

## Creating the Bottleneck model


In [10]:
# initializing BigDL engine
init_engine()

In [11]:
# paths for datasets, saving checkpoints 
from os import path

DATA_ROOT = "../../../data"
DATA_PATH = DATA_ROOT + "/vegnonveg-samples"
PROCESSED_PATH = DATA_ROOT + "/vegnonveg-processed"
checkpoint_path = path.join(DATA_PATH, "checkpoints")

IMAGE_SIZE = 224

In [None]:
#providing the no of classes in the dataset to model (5 for flowers)
classNum = 5

# Instantiating the model the model
# inception_model = Inception_v1(classNum)  #-- main inception-v1 model
inception_model = Inception_v1_Bottleneck(classNum)

## Download Pre-trained Model 

Download the pre-trained 'Inception v1 caffe model' from the link : https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet

In [13]:
import urllib

# path, names of the downlaoded pre-trained caffe models
caffe_prototxt = 'bvlc_googlenet.prototxt'
caffe_model = 'bvlc_googlenet.caffemodel'

if not path.exists(caffe_model):
    model_loader = urllib.URLopener()
    model_loader.retrieve("http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel", caffe_model)

## Import weights from Caffe Model

In [14]:
# loading the weights to the BigDL inception model, EXCEPT the weights for the last fc layer (classification layer)
model = Model.load_caffe(inception_model, caffe_prototxt, caffe_model, match_all=False, bigdl_type="float")

# if we want to export the whole caffe model including definition, this can be used.
#model = Model.load_caffe_model(inception_model, caffe_prototxt, caffe_model, match_all=True)

## Pre-process Images

0. Load an image
0. Crop if not-square to a square shape
0. Resize to 224 x 224

In [None]:
def crop_image(img):
    new_dim = min(img.width, img.height)
    x = (img.width - new_dim) / 2
    y = (img.height - new_dim) / 2
    cropped = img.crop((x, y, x + new_dim, y + new_dim))
    return cropped

# transform_input = Transformer([ChannelNormalizer(0.485, 0.456, 0.406, 0.229, 0.224, 0.225),
#                                TransposeToTensor(False)])

def preprocess_images(image_dir, out_dir):
    file_names = sorted(os.listdir(image_dir))
    for file_name in file_names:
        image_path = path.join(image_dir, file_name)
        input_img = Image.open(image_path)
        cropped = crop_image(input_img)
        resized = cropped.resize((IMAGE_SIZE, IMAGE_SIZE), Image.ANTIALIAS)
        out_path = path.join(out_dir, file_name) + '.jpg'
        resized.save(out_path)

In [None]:
preprocess_images(DATA_PATH, PROCESSED_PATH)

## Calculate Bottlenecks for Images

In [15]:
def load_images(image_dir):
    file_names = sorted(os.listdir(image_dir))
    images = np.zeros((len(file_names), IMAGE_SIZE, IMAGE_SIZE, 3))
    for index, file_name in enumerate(file_names):
        file_path = path.join(image_dir, file_name)
        input_img = Image.open(file_path).convert('RGB')
        img_np = np.array(input_img)
        assert img_np.shape[0] == 224, file_path
        images[index] = img_np
    return file_names, images

In [19]:
file_names, imgs = load_images(PROCESSED_PATH)
len(file_names), imgs.shape

(2611, (2611, 224, 224, 3))

In [20]:
transform_input = Transformer([TransposeToTensor(False)])

In [18]:
# img = img_batch[0]
# print(img.shape)
# img_t = transform_input(img)
# print(img_t.shape)
# # rdd_images = sc.parallelize([Sample.from_ndarray(img, np.array(0))])

In [20]:
pred_list = []
batch_size = 128
for start in range(0, imgs.shape[0], batch_size):
    img_batch = imgs[start : start + batch_size]
    rdd_img = sc.parallelize(img_batch)
    rdd_sample = rdd_img.map(lambda img: Sample.from_ndarray(transform_input(img), np.array(0)))    
    preds = model.predict(rdd_sample)
    p = preds.collect()
    pred_list.extend(p)

In [28]:
len(pred_list)

2611

In [23]:
import pandas as pd

In [24]:
labels = pd.read_csv(DATA_ROOT + '/vegnonveg-samples_labels.csv')
labels = labels.sort_values(by='obs_uid')

In [32]:
# for a, b in zip(file_names, labels['obs_uid'].tolist()):
#     assert a == b + ".jpg", "{} = {}".format(a, b)

In [46]:
data = {
    'bottleneck_values': pred_list,
    'labels': labels['item_name'].tolist()
}

In [47]:
import pickle
pickle.dump(data, open(DATA_ROOT + "/bottlenecks_with_labels.pkl", 'wb'))

## Train and Test NN classifier

In [34]:
import pickle
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.linear_model import SGDClassifier, LogisticRegression

In [35]:
data = pickle.load(open(DATA_ROOT + "/bottlenecks_with_labels.pkl", 'rb'))

In [36]:
x_train, x_test, train_labels, test_labels = train_test_split(data['bottleneck_values'], data['labels'], test_size=0.2, random_state=101)
len(x_train), len(train_labels), len(x_test), len(test_labels)

(2088, 2088, 523, 523)

In [17]:
clf = MLPClassifier(hidden_layer_sizes=(256,))

In [18]:
%%time 

cross_val_score(clf, x_train, train_labels, cv=4, scoring='accuracy')

CPU times: user 59.2 s, sys: 6.51 s, total: 1min 5s
Wall time: 33.4 s


array([ 0.56415094,  0.56190476,  0.6088632 ,  0.57198444])

## Other classifiers

In [41]:
%%time 

cross_val_score(LogisticRegression(), x_train, train_labels, cv=4, scoring='accuracy')

CPU times: user 31 s, sys: 153 ms, total: 31.2 s
Wall time: 30.7 s


array([ 0.5754717 ,  0.56380952,  0.56069364,  0.54669261])