# Imports

In [1]:
import os, sys, pickle, math
import numpy as np
from glob import glob
from shutil import copyfile
from PIL import ImageFile


from sklearn.cross_validation import KFold
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten, Lambda
from keras.layers.normalization import BatchNormalization
from keras.layers import Activation, Input
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, RMSprop
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras.utils.np_utils import to_categorical
from keras.utils.data_utils import get_file
from keras.preprocessing import image
from keras.models import Model
from keras.applications.vgg16 import preprocess_input
from sklearn.metrics import log_loss
from keras import backend as K

Using TensorFlow backend.


# Paths

In [2]:
cur_dir=os.getcwd()

In [3]:
data_dir=cur_dir+'/data/'
#data_dir=cur_dir+'/data/sample/'
test_dir=cur_dir+'/data/test/'
local_model_path=os.path.dirname(cur_dir)+'/models/'

In [4]:
train_dir=data_dir+'train/'
valid_dir=data_dir+'valid/'
save_dir=data_dir+'save/'

type1_dir=train_dir+'Type_1/'
type2_dir=train_dir+'Type_2/'
type3_dir=train_dir+'Type_3/'

In [26]:
sample_data_dir=data_dir+'sample/'
sample_train_dir=sample_data_dir+'train/'
sample_valid_dir=sample_data_dir+'valid/'
sample_save_dir=sample_data_dir+'save/'

sample_type1_dir=sample_train_dir+'Type_1/'
sample_type2_dir=sample_train_dir+'Type_2/'
sample_type3_dir=sample_train_dir+'Type_3/'

# Directories

In [6]:
%cd $data_dir
%mkdir -p valid/Type_1
%mkdir -p valid/Type_2
%mkdir -p valid/Type_3
%mkdir results
%mkdir save
%mkdir -p sample/train/Type_1
%mkdir -p sample/train/Type_2
%mkdir -p sample/train/Type_3
%mkdir -p sample/test
%mkdir -p sample/valid/Type_1
%mkdir -p sample/valid/Type_2
%mkdir -p sample/valid/Type_3
%mkdir -p sample/results
%mkdir -p sample/save
%mkdir -p test/unknown

/tmp/working/Intel/data


# Configuration

In [50]:
conf = dict()
conf['sample_size'] = 0.33
conf['val_split'] = 0.2
conf['batch_size'] = 64
conf['nb_epoch'] = 1
conf['patience'] = 3
conf['dropout'] = 0.7
conf['image_size'] = (224, 224, 3)

# Setup Directories

In [8]:
%cd $type1_dir
g = glob('*.jpg')
shuf=np.random.permutation(g)
for i in range(int(len(g)*conf['val_split'])): os.rename(shuf[i], valid_dir+ 'Type_1/' + shuf[i])

/tmp/working/Intel/data/train/Type_1


In [9]:
%cd $type2_dir
g = glob('*.jpg')
shuf=np.random.permutation(g)
for i in range(int(len(g)*conf['val_split'])): os.rename(shuf[i], valid_dir + 'Type_2/' + shuf[i])

/tmp/working/Intel/data/train/Type_2


In [10]:
%cd $type3_dir
g = glob('*.jpg')
shuf=np.random.permutation(g)
for i in range(int(len(g)*conf['val_split'])): os.rename(shuf[i], valid_dir + 'Type_3/' + shuf[i])

/tmp/working/Intel/data/train/Type_3


In [11]:
%cd $type1_dir
g = glob('*.jpg')
shuf=np.random.permutation(g)
for i in range(int(len(g)*conf['sample_size'])): copyfile(shuf[i], sample_type1_dir + shuf[i])

/tmp/working/Intel/data/train/Type_1


In [12]:
%cd $type2_dir
g = glob('*.jpg')
shuf=np.random.permutation(g)
for i in range(int(len(g)*conf['sample_size'])): copyfile(shuf[i], sample_type2_dir + shuf[i])

/tmp/working/Intel/data/train/Type_2


In [13]:
%cd $type3_dir
g = glob('*.jpg')
shuf=np.random.permutation(g)
for i in range(int(len(g)*conf['sample_size'])): copyfile(shuf[i], sample_type3_dir + shuf[i])

/tmp/working/Intel/data/train/Type_3


In [14]:
%cd $sample_type1_dir
g = glob('*.jpg')
shuf=np.random.permutation(g)
for i in range(int(len(g)*conf['val_split'])): os.rename(shuf[i], sample_valid_dir+ 'Type_1/' + shuf[i])

/tmp/working/Intel/data/sample/train/Type_1


In [15]:
%cd $sample_type2_dir
g = glob('*.jpg')
shuf=np.random.permutation(g)
for i in range(int(len(g)*conf['val_split'])): os.rename(shuf[i], sample_valid_dir+ 'Type_2/' + shuf[i])

/tmp/working/Intel/data/sample/train/Type_2


In [16]:
%cd $sample_type3_dir
g = glob('*.jpg')
shuf=np.random.permutation(g)
for i in range(int(len(g)*conf['val_split'])): os.rename(shuf[i], sample_valid_dir+ 'Type_3/' + shuf[i])

/tmp/working/Intel/data/sample/train/Type_3


In [21]:
%cd $test_dir
%mv *.jpg unknown/

/tmp/working/Intel/data/test
mv: cannot stat ‘*.jpg’: No such file or directory


In [7]:
%cd $cur_dir

/tmp/working/Intel


# Utils

In [6]:
def vgg_preprocess(x):
    vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((1,1,3))
    x = x - vgg_mean
    return x[:, ::-1]

def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=64, class_mode='categorical',
                target_size=(224,224)):
    return gen.flow_from_directory(dirname, target_size=target_size,
            class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

def get_images(train_dir, claass):
    claass_dir = train_dir+claass
    images = [claass+'/'+im for im in os.listdir(claass_dir)]
    return images

def read_image(src, img_size):
    im = cv2.imread(src)
    im = cv2.resize(im, img_size[0:2], interpolation=cv2.INTER_CUBIC)
    return im

def load_train(train_dir, img_size):
    claasses=[claass for claass in os.listdir(train_dir) if claass[0]!='.']
    files = []
    y_all = []

    for i, claass in enumerate(claasses):
        claass_files = get_images(train_dir, claass)
        files.extend(claass_files)

        y_claass = np.tile(i, len(claass_files))
        y_all.extend(y_claass)
        print("{0} photos of {1}".format(len(claass_files), claass))
    y_all = np.array(y_all)
    X_all = np.ndarray((len(files), img_size[0], img_size[1], img_size[2]), dtype=np.uint8)
    for i, im in enumerate(files): 
        #print(train_dir+im)
        X_all[i] = read_image(train_dir+im, img_size)
        if i%1000 == 0: print('Processed {} of {}'.format(i, len(files)))
    print(X_all.shape, y_all.shape)
    return X_all, y_all

# Load Data

In [9]:
gen=image.ImageDataGenerator(preprocessing_function=preprocess_input)

In [7]:
batches_train=get_batches(train_dir, target_size=conf['image_size'][0:2])
batches_valid=get_batches(valid_dir, target_size=conf['image_size'][0:2])

Found 1185 images belonging to 3 classes.
Found 296 images belonging to 3 classes.


In [8]:
train_labels=to_categorical(batches_train.classes)
valid_labels=to_categorical(batches_valid.classes)

In [31]:
train_labels.shape

(1185, 3)

In [9]:
def conv_block(model, num_layers, num_filters):
    for i in range(num_layers):
        model.add(ZeroPadding2D((1, 1)))
        model.add(Convolution2D(num_filters, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    return model
    
def FCB_block(model, units=4096, dropout=False, bn=False):
    model.add(Dense(units))
    if bn: model.add(BatchNormalization())
    model.add(Activation('relu'))
    if dropout: model.add(Dropout(0.5))
    return model

def create_full_vgg16(input_shape):
    model=Sequential()
    model.add(Lambda(vgg_preprocess, input_shape=input_shape, output_shape=input_shape))
    model=conv_block(model, 2, 64)
    model=conv_block(model, 2, 128)    
    model=conv_block(model, 3, 256)    
    model=conv_block(model, 3, 512)    
    model=conv_block(model, 3, 512)
    
    model.add(Flatten())
    model=FCB_block(model, dropout=True)
    model=FCB_block(model, dropout=True)
    model.add(Dense(1000, activation='softmax'))
    fname='vgg16_weights_tf_dim_ordering_tf_kernels.h5'
    local_path=local_model_path+fname
    down_path='https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
    if os.path.exists(local_path):
        model.load_weights(local_path)
    else:
        model.load_weights(get_file(fname, down_path, cache_subdir='models'))
    return model

def create_conv_vgg16(input_shape):
    model=Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model=conv_block(model, 1, 64)
    model=conv_block(model, 2, 128)    
    model=conv_block(model, 3, 256)    
    model=conv_block(model, 3, 512)    
    model=conv_block(model, 3, 512)
    fname='vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
    local_path=local_model_path+fname
    down_path='https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
    if os.path.exists(local_path):
        model.load_weights(local_path)
    else:
        model.load_weights(get_file(fname, down_path, cache_subdir='models'))
    return model

In [40]:
?Dense

In [10]:
def finetune(model, num_class, level=2):
    layers=model.layers
    layers_idx=[idx for idx, layer in enumerate(layers) if type(layer) is Convolution2D or type(layer) is Dense]
    for i in range(layers_idx[level*-1]): layers[i].trainable=False
    model.pop()
    model.add(Dense(num_class, activation='softmax'))
    return model

def get_base_model(model, level=2):
    layers=model.layers
    layers_idx=[idx for idx, layer in enumerate(layers) if type(layer) is Convolution2D or type(layer) is Dense]
    for i in range(len(layers)): 
        if i>=layers_idx[level*-1]:
            model.pop()
    return model

In [11]:
vgg16_full=create_full_vgg16(conf['image_size'])

In [12]:
vgg16_ft1=finetune(vgg16_full, 4)

In [13]:
vgg16_base1=get_base_model(vgg16_ft1, level=3)

In [14]:
vgg16_base1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_1 (Lambda)            (None, 224, 224, 3)       0         
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 226, 226, 3)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 224, 224, 64)      1792      
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 226, 226, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 224, 224, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 112, 112, 64)      0         
_________________________________________________________________
zero_padding2d_3 (ZeroPaddin (None, 114, 114, 64)      0         
__________

In [2]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

Object `predict_generator` not found.


In [38]:
train_base=vgg16_base1.predict_generator(batches_train, math.ceil(batches_train.n/conf['batch_size']))
valid_base=vgg16_base1.predict_generator(batches_valid, math.ceil(batches_valid.n/conf['batch_size']))

Exception in thread Thread-5:
Traceback (most recent call last):
  File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.5/threading.py", line 862, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 606, in data_generator_task
    generator_output = next(self._generator)
  File "/usr/local/lib/python3.5/dist-packages/keras/preprocessing/image.py", line 727, in __next__
    return self.next(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/keras/preprocessing/image.py", line 960, in next
    target_size=self.target_size)
  File "/usr/local/lib/python3.5/dist-packages/keras/preprocessing/image.py", line 330, in load_img
    img = img.resize(wh_tuple)
  File "/usr/local/lib/python3.5/dist-packages/PIL/Image.py", line 1630, in resize
    self.load()
  File "/usr/local/lib/python3.5/dist-packages/PIL/ImageFile.py", line 231, in load
    "(%d 

ValueError: Cannot feed value of shape () for Tensor 'lambda_2_input:0', which has shape '(?, 224, 224, 3)'

In [None]:
train_base.shape

In [90]:
np.save(save_dir+'X_train_conv_224', train_base)
np.save(save_dir+'X_valid_conv_224', valid_base)

In [15]:
train_base=np.load(save_dir+'X1_train_conv_224.npy')
valid_base=np.load(save_dir+'X1_valid_conv_224.npy')

In [16]:
train_base.shape

(1185, 25088)

In [42]:
vgg16_top1=Sequential()
vgg16_top1.add(Dense(4096, input_shape=(25088, )))
vgg16_top1.add(BatchNormalization())
vgg16_top1.add(Activation('relu'))
vgg16_top1.add(Dropout(conf['dropout']))
vgg16_top1.add(Dense(4096))
vgg16_top1.add(BatchNormalization())
vgg16_top1.add(Activation('relu'))
vgg16_top1.add(Dropout(conf['dropout']))
vgg16_top1.add(Dense(3, activation='softmax'))

In [47]:
sgd = SGD(lr=1e-4, decay=1e-6, momentum=0.9, nesterov=True)

In [48]:
vgg16_top1.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [49]:
vgg16_top1.fit(train_base, train_labels, batch_size=conf['batch_size'], epochs=5, validation_data=(valid_base, valid_labels))

Train on 1185 samples, validate on 296 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f69000b3cc0>

In [38]:
vgg16_top1.save_weights(save_dir+'m1.h5')