In [6]:
from keras.utils.data_utils import get_file
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
import sys
import PIL.Image as pil_image
import glob
import json
from collections import defaultdict
import scipy.io
import numpy as np
import os
import random
%matplotlib inline
import matplotlib.pyplot as plt
from myutil import *
import keras
from keras.utils import np_utils
from keras.models import load_model,Model
from keras.applications.imagenet_utils import preprocess_input
from keras.applications import VGG16, ResNet50
from keras.layers import Dense, GlobalAveragePooling2D, Input, Flatten
from keras import layers
from keras import backend as K
from keras import optimizers
from keras.callbacks import ModelCheckpoint, TensorBoard

Using TensorFlow backend.


In [7]:
def original(img):
    """
    return the original pic
    """
    tmp = np.zeros_like(img)
    for i in range(3):
        tmp[:,:,i] = img[:,:,i] - img[:,:,i].min()
    return tmp.astype(np.uint8)

def lookup(d, key, i):
    """
    looking up key value i in dictionary d 
    """
    if key == 'make+model':
        for item in d:
            if item['pp_brand_id']+' '+item['pp_genre_id'] == i:
                return item['chinese']
    else:
        for item in d:
            if item[key] == i:
                return item['chinese']
            
def crop_img(img, box):
    """
    box: left, top, right, bottom
    """
    w, h = img.size
    box = [box[0]*w, box[1]*h,box[2]*w,box[3]*h]
    return img.crop(box)
            
def load_img(path, grayscale=False, target_size=None, box=None):
    """Loads an image into PIL format.
    # Arguments
        path: Path to image file
        grayscale: Boolean, whether to load the image as grayscale.
        target_size: Either `None` (default to original size)
            or tuple of ints `(img_height, img_width)`.
        box: The crop rectangle, as a (left, upper, right, lower)-tuple.
    # Returns
        A PIL Image instance.
    # Raises
        ImportError: if PIL is not available.
    """
    if pil_image is None:
        raise ImportError('Could not import PIL.Image. '
                          'The use of `array_to_img` requires PIL.')
    img = pil_image.open(path)
    if grayscale:
        if img.mode != 'L':
            img = img.convert('L')
    else:
        if img.mode != 'RGB':
            img = img.convert('RGB')
    if box:
        img = crop_img(img, box)
        
    if target_size:
        hw_tuple = (target_size[1], target_size[0])
        #resize(w, h)
        if img.size != hw_tuple:
            img = img.resize(hw_tuple)
    return img
            
def load_data(image_paths, labels, num_of_class=10, target_size=(227, 227), box=None):
    """
    Given list of paths, resize and bounding box load images as one numpy array of shape
        (num_images, crop_size, crop_size, channel)
        box:[top, left, bottom, right]
    :return X: image array
     return y: one hot encoded labels
    """
    if box:
        X = np.zeros((len(image_paths), crop_size[0],crop_size[1], 3))
        ## google output box :## 0: top 1: left 2 lower 3 right
        for i,path in enumerate(image_paths):
            new_box = (box[i][1],box[i][0],box[i][3], box[i][2])
            X[i, :] = img_to_array(load_img(path, target_size=target_size, box=new_box))
        y = np_utils.to_categorical(labels, num_of_class)
        return X, y
    else:
        X = np.zeros((len(image_paths), crop_size[0],crop_size[1], 3))
        for i,path in enumerate(image_paths):
            X[i, :] = img_to_array(load_img(path, target_size=target_size))
        y = np_utils.to_categorical(labels, num_of_class)
        return X, y
    
def load_data_flip(image_paths, labels, num_of_class=10, target_size=(227, 227), box=None):
    """
    Given list of paths, resize and bounding box load images as one numpy array of shape
        (num_images, crop_size, crop_size, channel)
        box:[top, left, bottom, right]
    :return X: image array
     return y: one hot encoded labels
    """
    if box.any():
        X = np.zeros((len(image_paths), crop_size[0],crop_size[1], 3))
        ## google output box :## 0: top 1: left 2 lower 3 right
        for i,path in enumerate(image_paths):
            new_box = (box[i][1],box[i][0],box[i][3], box[i][2])
            if bool(random.getrandbits(1)):
                X[i, :] = img_to_array(load_img(path, target_size=target_size, box=new_box))
            else: 
                X[i, :] = img_to_array(load_img(path, target_size=target_size, box=new_box))[:,::-1,:]
        y = np_utils.to_categorical(labels, num_of_class)
        return X, y
    else:
        X = np.zeros((len(image_paths), crop_size[0],crop_size[1], 3))
        for i,path in enumerate(image_paths):
            if bool(random.getrandbits(1)):
                X[i, :] = img_to_array(load_img(path, target_size=target_size))
            else: 
                X[i, :] = img_to_array(load_img(path, target_size=target_size))[:,::-1,:]
        y = np_utils.to_categorical(labels, num_of_class)
        return X, y
    
def judge_box(left, right, left_t, right_t):
    if left < left_t and right > right_t:
        return True

def load_box(path,x_threshold=0.51,x_threshold2=0.51):
    if x_threshold > x_threshold2:
        print 'threshold error'
    with open(path) as f:
        bb_list = np.load(f).item()
    result = defaultdict(list)
    ## delete left > 0.5 right border < 0.5
    ## 0: top 1: left 2 lower 3 right
    for k in bb_list.keys():
        if len(bb_list[k]) == 1:
            result[k] = bb_list[k]
            continue
        for bb_box in bb_list[k]:
            if judge_box(left=bb_box[1], right=bb_box[3], left_t=x_threshold, right_t=x_threshold2):
                result[k].append(bb_box)
    return result


In [8]:
def generate_arrays(batch_size, target_size = (448,448)):
    sample_number = len(train_paths)
    box = np.array([box_list['/'.join(item.split('/')[-3:])][0] for item in train_paths])
    while True:
        if shuffle == True :
            idx1 = list(range(len(train_labels[0])))
            random.shuffle(idx1)
            
        for i in range(0,sample_number,batch_size):
            if i + batch_size > sample_number:
                idx = idx1[i:]
            else:
                idx = idx1[i:i+batch_size]
            
            X_train, y_train = load_data_flip(train_paths[idx], train_labels[0][idx], num_of_makes, target_size, box[idx])
            y_train2 = np_utils.to_categorical(train_labels[1][idx], num_of_models)
            #y_train3 = np_utils.to_categorical(train_labels[2][idx], num_of_types)
            X_train = preprocess_input(X_train)
            ## no car pic
            i_tmp = i % 30489
            X_negative, y_negative = load_data(train_negative[i_tmp:i_tmp+2], negative_label[0][i_tmp:i_tmp+2], num_of_makes, target_size)
            y_negative2 = np_utils.to_categorical(negative_label[0][i_tmp:i_tmp+2], num_of_models)
            #y_negative3 = np_utils.to_categorical(negative_label[0][i_tmp:i_tmp+2], num_of_types)
            #yield (X_train, {'make':y_train, 'model':y_train2})
            if len(X_train) == 0:
                print i, X_train
            yield (np.concatenate((X_train, X_negative), axis=0),
                   {'make':np.concatenate((y_train, y_negative), axis=0), 
                    'model':np.concatenate((y_train2, y_negative2), axis=0)
                    }
                  )

In [3]:
# setting
#(img_height, img_width)
crop_size = (448, 448)
data_dir = '/root/playground/START/car_class/yiche_image/'
shuffle = True
alpha = 0.5
model_name = ''



# key:genre(int) value:1~10 find in type list
genre_type_map = np.load(os.path.join(data_dir,'map/yiche_car_type.npy')).item()


## key: box_list('20169/3223/bl110088.jpg', value:box
box_list =load_box(os.path.join(data_dir,'map/yiche_box.npy'))

## car_info
car_info = np.load(os.path.join(data_dir,'map/yiche_car_info_npy.npy'))

train_paths = []
train_labels = defaultdict(list)
train_makes = []
train_models = []
train_types = []

val_paths = []
val_labels = defaultdict(list)
val_makes = []
val_models = []
val_types = []

pos = car_info[0]['imgs'].keys()

# indentify make/model/car_id.jpg
brands = os.listdir(data_dir+'yiche_processed')
for brand in brands:
    genres = os.listdir(os.path.join(data_dir,'yiche_processed', brand))
    for genre in genres:
        for po in pos:
            pics = glob.glob(os.path.join(data_dir,'yiche_processed', brand, genre,po+'*.jpg'))
            if len(pics)<3:
                split = len(pics)
            else:
                split = len(pics)*4/5
            # train data
            for pic in pics[:split]:
                # two pic in a row
                train_paths += [pic]
                # label|
                train_models += [brand + ' ' +genre]
                train_makes += [brand]
                train_types += [genre_type_map[int(genre)]]
            
            #validation data
            for pic in pics[split:]:
                # two pic in a row
                val_paths += [pic]
                # label|
                val_models += [brand + ' ' +genre]
                val_makes += [brand]
                val_types += [genre_type_map[int(genre)]]

assert(len(train_makes) == len(train_paths))

#indexing building
l2i_makes = sorted(list(set(train_makes + ['-1'])))
l2i_models = sorted(list(set(train_models + ['-1'])))

# for multiple label
train_labels[0] = np.array([l2i_makes.index(item) for item in train_makes])
train_labels[1] = np.array([l2i_models.index(item) for item in train_models])
train_labels[2] = np.array(train_types)

val_labels[0] = np.array([l2i_makes.index(item) for item in val_makes])
val_labels[1] = np.array([l2i_models.index(item) for item in val_models])
val_labels[2] = np.array(val_types)
train_paths = np.array(train_paths)

# data info
num_of_models = len(l2i_models)
num_of_makes = len(l2i_makes)

#saving mapping info
#np.save(os.path.join(data_dir,'map/l2i_make'),l2i_makes)
#np.save(os.path.join(data_dir,'map/l2i_models'),l2i_models)


print 'train brand num:',len(l2i_makes),'genre num:', len(l2i_models)
print  'img num:',len(train_makes), 'validaiont img num:', len(val_makes)


train brand num: 230 genre num: 1581
img num: 41865 validaiont img num: 11508


In [4]:
## Caltech 256 except 252 car side
train_negative = []
cal_path = glob.glob('/root/playground/START/car_class/256_ObjectCategories/*')
for item in cal_path:
    train_negative += glob.glob(item + '/*.jpg')
    
random.shuffle(train_negative)
train_negative = train_negative
negative_label = [[0]*len(train_negative), [0]*len(train_negative)]
print len(train_labels[0]), len(train_negative)

41865 30491


## Create Model

In [None]:
K.clear_session()
config = tf.ConfigProto(device_count={'CPU' : 1, 'GPU' : 0})
sess = tf.Session(config=config)
set_session(sess)

output_dim = [230, 1581]
img = tf.placeholder(tf.float32, shape=(None, 448, 448, 3))
label_make = tf.placeholder(tf.float32, shape=(None, output_dim[0]))
label_model = tf.placeholder(tf.float32, shape=(None, output_dim[1]))

base_model = VGG16(input_tensor=img, include_top=False)

# last conv layer of vgg
conv5_3 = base_model.layers[-2].output

# bilinear pool
phi_I = tf.reshape(tf.einsum('ijkm,ijkn->imn', conv5_3, conv5_3), [-1, 512*512])
phi_I = tf.divide(phi_I,784.0)
y_ssqrt = tf.multiply(tf.sign(phi_I),tf.sqrt(tf.abs(phi_I)+1e-12))
z_l2 = tf.nn.l2_normalize(y_ssqrt, dim=1)

# softmax
fc1 = Dense(output_dim[0], activation='softmax', name='make')
fc2 = Dense(output_dim[1], activation='softmax', name='model')
pred_make = fc1(z_l2)
pred_model = fc2(z_l2)

# create multitask softmax layer
loss_make = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label_make, logits=pred_make))
loss_model = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label_model, logits=pred_model))
loss_joint = 0.5*loss_make + 0.5*loss_model
optimizer = tf.train.MomentumOptimizer(learning_rate=0.9, momentum=0.9).minimize(loss_joint)

# evaluation metric
correct_prediction_make = tf.equal(tf.argmax(pred_make, 1), tf.argmax(label_make, 1))
accuracy_make = tf.reduce_mean(tf.cast(correct_prediction_make, tf.float32))

correct_prediction_model = tf.equal(tf.argmax(pred_model, 1), tf.argmax(label_model, 1))
accuracy_model = tf.reduce_mean(tf.cast(correct_prediction_model, tf.float32))

# initialization
init = tf.global_variables_initializer()
sess.run(init)

# laod vgg weight
WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
                                    WEIGHTS_PATH_NO_TOP,
                                    cache_subdir='models')
print weights_path
#base_model.load_weights(weights_path)
print 'load model complete'

/root/.keras/models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [19]:
batch_size = 5
sout = sys.stdout
print('Starting training')
lr = 1.0
break_training_epoch = 1
generator = generate_arrays(batch_size, crop_size)
for epoch in range(break_training_epoch):
    avg_cost = 0.
    total_batch = int(len(train_paths)/batch_size)
    for b in range(total_batch):
        batch_xs, batch_ys = generator.next()
        batch_y1 = batch_ys['make']
        batch_y2 = batch_ys['model']
        res = sess.run([loss_make, loss_model,loss_joint, accuracy_make, accuracy_model,optimizer], 
                       feed_dict={img: batch_xs, label_make: batch_y1,label_model:batch_y2 })
        sout.write("%d/%d\t loss_make:%5f loss_model:%5f loss_joint:%5f %.4f\t%.4f"%(b,total_batch,res[0],res[1],res[2], res[3],res[4])) 
        sout.flush()
        sout.write('\r')
    sout.write('\n')

Starting training
3/8373	 loss_make:5.438079 loss_model:7.365817 loss_joint:6.401948 0.2857	0.0000

KeyboardInterrupt: 

In [9]:
base_model.layers[2].get_weights()

[array([[[[  1.66219279e-01,   1.42701820e-01,  -4.02113283e-03, ...,
             6.18828237e-02,  -1.74057148e-02,  -3.00644431e-02],
          [  9.46945231e-03,   3.87477316e-03,   5.08365929e-02, ...,
            -2.77981739e-02,   1.71373668e-03,   6.82722731e-03],
          [  6.32681847e-02,   2.12877709e-02,  -1.63465310e-02, ...,
             8.80054955e-04,   6.68104272e-03,  -1.41139806e-03],
          ..., 
          [  3.47490981e-03,   8.47019628e-02,  -4.07223180e-02, ...,
            -1.13523193e-02,  -7.48998486e-03,   3.19077494e-03],
          [  5.97234145e-02,   4.97663505e-02,  -3.23118735e-03, ...,
             1.43114366e-02,   3.03175431e-02,  -4.23925705e-02],
          [  1.33459672e-01,   4.95484173e-02,  -1.78808011e-02, ...,
             2.25385167e-02,   3.02020740e-02,  -2.17075031e-02]],
 
         [[  2.12007999e-01,   2.10127644e-02,  -1.47626130e-02, ...,
             2.29580477e-02,   1.23102348e-02,  -3.08422819e-02],
          [ -2.62175221e-03, 

In [20]:
base_model.layers[2].get_weights()

[array([[[[  1.66219637e-01,   1.42701536e-01,  -4.02133074e-03, ...,
             6.18830323e-02,  -1.74050219e-02,  -3.00646629e-02],
          [  9.46845207e-03,   3.87373753e-03,   5.08370474e-02, ...,
            -2.77981255e-02,   1.71578722e-03,   6.82684872e-03],
          [  6.32681325e-02,   2.12877449e-02,  -1.63462851e-02, ...,
             8.80071952e-04,   6.68212073e-03,  -1.41169527e-03],
          ..., 
          [  3.47735453e-03,   8.47014561e-02,  -4.07227464e-02, ...,
            -1.13517242e-02,  -7.49272574e-03,   3.19109811e-03],
          [  5.97230680e-02,   4.97662686e-02,  -3.23145301e-03, ...,
             1.43113760e-02,   3.03176586e-02,  -4.23925333e-02],
          [  1.33459508e-01,   4.95483428e-02,  -1.78809240e-02, ...,
             2.25383770e-02,   3.02023385e-02,  -2.17075516e-02]],
 
         [[  2.12008223e-01,   2.10124291e-02,  -1.47627257e-02, ...,
             2.29585003e-02,   1.23113580e-02,  -3.08425371e-02],
          [ -2.62276735e-03, 

In [12]:
fc1.get_weights()

[array([[-0.00413576, -0.0006756 , -0.0037106 , ..., -0.00403977,
         -0.00420736,  0.00389873],
        [-0.00413694, -0.00323165,  0.00388925, ...,  0.00040348,
         -0.00098363, -0.00389307],
        [ 0.00017147, -0.0022986 , -0.00253703, ..., -0.00378009,
         -0.00244464,  0.00041722],
        ..., 
        [ 0.00413261, -0.00360248,  0.00223137, ..., -0.00176643,
         -0.00192812,  0.00222079],
        [-0.00187596,  0.00193346,  0.00126447, ...,  0.00450095,
         -0.00288231,  0.0002259 ],
        [-0.00257495,  0.00263139,  0.00189016, ...,  0.00191831,
          0.00459433,  0.00021668]], dtype=float32),
 array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0

In [21]:
fc1.get_weights()

[array([[-0.0040932 , -0.00067632, -0.00371132, ..., -0.00404049,
         -0.00420808,  0.003898  ],
        [-0.00413374, -0.00323171,  0.00388919, ...,  0.00040342,
         -0.00098368, -0.00389312],
        [ 0.00018196, -0.00229867, -0.0025371 , ..., -0.00378016,
         -0.00244471,  0.00041715],
        ..., 
        [ 0.00413293, -0.00360249,  0.00223136, ..., -0.00176644,
         -0.00192813,  0.00222078],
        [-0.00187535,  0.00193345,  0.00126445, ...,  0.00450093,
         -0.00288233,  0.00022588],
        [-0.00257116,  0.00263122,  0.00188998, ...,  0.00191814,
          0.00459415,  0.00021651]], dtype=float32),
 array([ 0.0072673 , -0.00011194, -0.00011224, -0.0001119 ,  0.0008502 ,
         0.00016745,  0.00064668,  0.000168  , -0.00011243, -0.00011149,
         0.00041688,  0.00041787, -0.00011199, -0.00011156, -0.00011221,
        -0.00011188, -0.00011194, -0.00011165, -0.00011198, -0.00011171,
         0.00103112,  0.00217623, -0.00011184, -0.00011191, -0.00

In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline
import matplotlib.pyplot as plt

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /data/mnist/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /data/mnist/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /data/mnist/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /data/mnist/t10k-labels-idx1-ubyte.gz


In [None]:
MNIST = input_data.read_data_sets("/data/mnist", one_hot=True)

In [4]:
train_data = MNIST.train.images.reshape(-1,28,28)

In [5]:
train_data[0]

(55000, 28, 28)