Check GPU

In [1]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

Using TensorFlow backend.


['/job:localhost/replica:0/task:0/device:GPU:0']

Imports

In [2]:
import sys, os, csv
from urllib import request, error
from PIL import Image
from io import BytesIO
import boto3
import random

In [3]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import io
from io import BytesIO

import time
import tempfile
import pickle
from random import shuffle
import multiprocessing
from multiprocessing import Process, Manager

In [4]:
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import applications
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.utils import Sequence

from skimage.transform import resize

Constants

In [5]:
# set constants
model_dir = '../models'

batch_size = 32

height = 100
width = 100
color_mode = 'rgb'
depth = 3 if color_mode == 'rgb' else 1

n_layers_to_tune = 0

should_prune = False
should_subset = True
subset_length = 10000
should_debug = False

n_cpus = multiprocessing.cpu_count()
n_workers = n_cpus - 1 # None defaults to n_cpus
print('There are', n_cpus, 'cpu cores available')

There are 16 cpu cores available


Setup

In [24]:
def debug(*string):
    if should_debug:
        print(string)

In [7]:
s3 = boto3.resource('s3')
bucket_name = 'landmark-data-12345'
bucket = s3.Bucket(bucket_name)

In [8]:
class_dist = {}
classes = []
filekeys = []

if os.path.isfile('pickles/filekeys'):
    print('loading data from files')
    
    file = open('pickles/filekeys', 'rb')
    filekeys = pickle.load(file)
    file.close()
    
    file = open('pickles/classes', 'rb')
    classes = pickle.load(file)
    file.close()
    
    file = open('pickles/class_dist', 'rb')
    class_dist = pickle.load(file)
    file.close()
    
else:
    print('data pickles dont exist, generating')
    objects = bucket.objects.filter(Prefix="data/train/")
    for o in objects:
        if o.key.endswith('.jpg'):
            filekeys.append(o.key)
            cl = o.key.split('/')[2]
            if cl not in classes:
                class_dist[cl] = 1
                classes.append(cl)
            else:
                class_dist[cl] += 1

    shuffle(filekeys)
    file = open('pickles/filekeys', 'wb')
    pickle.dump(filekeys, file)
    file.close()

    file = open('pickles/classes', 'wb')
    pickle.dump(classes, file)
    file.close()

    file = open('pickles/class_dist', 'wb')
    pickle.dump(class_dist, file)
    file.close()
    
print('data loaded')

loading data from files
data loaded


In [9]:
if should_subset:
    filekeys = filekeys[:subset_length]
num_filekeys = len(filekeys)
num_classes = len(classes)
print('there are', num_filekeys, 'images across', num_classes, 'classes')

there are 10000 images across 14940 classes


Data viz

In [10]:
if should_prune:
    min_training_examples = 100
    # min_training_examples = 0

    if min_training_examples > 0:
        pruned_classes = [ key for key in class_dist.keys() if class_dist[key] > min_training_examples ]
        num_classes_pruned = len(pruned_classes)

        filekeys_pruned = [ key for key in filekeys if key.split('/')[2] in pruned_classes ]
        num_filekeys_pruned = len(filekeys_pruned)
    else:
        pruned_classes = class_dist.keys()
        num_classes_pruned = len(pruned_classes)

        filekeys_pruned = filekeys
        num_filekeys_pruned = len(filekeys_pruned)

    file = open('pickles/pruned_classes', 'wb')
    pickle.dump(pruned_classes, file)
    file.close()

    print(len(pruned_classes),'classes with more than',min_training_examples,'examples')
    print('after pruning, there are', num_filekeys_pruned, 'images across', num_classes_pruned, 'classes')

In [11]:
# class_dist_keys = np.array(list(class_dist.keys()))
# class_dist_values = np.array(list(class_dist.values()))
# sorted_class_dist_values = sorted(class_dist_values)
# print(class_dist_values)
# print(sorted_class_dist_values)
# print([key for key in class_dist.keys() if class_dist[key] == 49495])

# plt.bar(range(len(sorted_class_dist_values)), sorted_class_dist_values)
# plt.ylim(top=4000)
# plt.show()

Caching

In [12]:
# def generate_batch(idx, batch_size=batch_size):
#     images = []
#     labels = []
#     while len(images) < batch_size:
#         filekey = filekeys[idx]
#         label = filekey.split('/')[2]

#         try:
#             img = load_s3_file(filekey)
#             processed_img = process_img(img)
#             images.append(processed_img)

#             onehot_label_idx = classes.index(label)
#             onehot_label = np.zeros(num_classes)
#             onehot_label[onehot_label_idx] = 1
#             labels.append(onehot_label)
#         except:
#             pass

#         idx = (idx + 1) % len(filekeys)
            
#     return ( np.array(images), np.array(labels) )

# def generate_batch_for_cache(L, idx, batch_size=batch_size):
#     while len(L) > 100:
#         time.sleep(0.5)

#     result = generate_batch(idx, batch_size)
#     debug('adding batch starting at index', idx)
#     L.append(result)

Helper funcs

In [13]:
def load_s3_file_indirect(filekey):
    debug('LS3: loading')
    object = bucket.Object(filekey)
    debug('LS3: a')
    tmp = tempfile.NamedTemporaryFile()
    debug('LS3: b')
    with open(tmp.name, 'wb') as f:
        debug('LS3: c')
        object.download_fileobj(f)
        debug('LS3: d')
        img = mpimg.imread(tmp.name)
        debug('LS3: done loading')
        return img
    
# def load_s3_file_direct(filekey):
#     debug('LS3: loading')
#     object = bucket.Object(filekey)
#     debug('LS3: a')
#     file_stream = io.StringIO()
#     debug('LS3: b')
#     object.download_fileobj(file_stream)
#     debug('LS3: c')
#     img = mpimg.imread(file_stream)
#     debug('LS3: done loading')
#     return img

def load_s3_file(filekey):
    return load_s3_file_indirect(filekey)

In [26]:
def process_img(img):
    debug('PI: processing')
    processed_img = img / 255.0
    debug('PI: a')
#     processed_img = resize(img, (height, width, depth), mode='reflect', anti_aliasing=True)
    processed_img = processed_img[:height,:width,:depth]
    print(processed_img.shape)
    debug('PI: done processing')
    return processed_img

In [15]:
class BatchSequence(Sequence):

    def __init__(self, filekeys=filekeys, batch_size=batch_size):
        debug('BS: initializing')
        self.filekeys = filekeys
        self.batch_size = batch_size
        debug('BS: done initializing')

    def __len__(self):
        debug('BS: sending length')
        return int(np.ceil(len(self.filekeys) / float(self.batch_size)))

    def __getitem__(self, idx):
        debug('BS: getting batch')
        images = []
        labels = []
        global_idx = idx*self.batch_size
        
        while len(images) < self.batch_size:
            filekey = filekeys[global_idx]
            label = filekey.split('/')[2]
            
            try:
                img = load_s3_file(filekey)
                processed_img = process_img(img)
                images.append(processed_img)

                onehot_label_idx = classes.index(label)
                onehot_label = np.zeros(num_classes)
                onehot_label[onehot_label_idx] = 1
                labels.append(onehot_label)
            except:
                pass

            global_idx = (global_idx + 1) % len(filekeys)
            
        debug('BS: providing batch', idx, 'to queue')
        return ( np.array(images), np.array(labels) )


In [16]:
# def get_batch(batch_size=batch_size):
#     idx = 0
#     while True:
#         images = []
#         labels = []
#         while len(images) < batch_size:
#             filekey = filekeys[idx]
#             label = filekey.split('/')[2]
            
#             try:
#                 img = load_s3_file(filekey)
#                 processed_img = process_img(img)
#                 images.append(processed_img)

#                 onehot_label_idx = classes.index(label)
#                 onehot_label = np.zeros(num_classes)
#                 onehot_label[onehot_label_idx] = 1
#                 labels.append(onehot_label)
#             except:
#                 pass

#             idx = (idx + 1) % len(filekeys)
            
#         result = ( np.array(images), np.array(labels) )
#         yield result

# def get_batch(batch_size=batch_size):
#     while True:
#         while len(batch_cache) == 0:
#             time.sleep(0.5)
#         yield batch_cache.pop(0)

Model

In [17]:
# create model
pre_model = applications.VGG19(weights="imagenet", 
                           include_top=False, 
                           input_shape=(width, height, depth))

model = Sequential()
for idx, layer in enumerate(pre_model._layers):
    if idx < len(pre_model._layers) - n_layers_to_tune:
        layer.trainable = False
    model.add(layer)
    
model.add(Flatten(input_shape=pre_model.output_shape[1:]))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

In [18]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 25, 25, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 25, 25, 256)       295168    
__________

In [19]:
# compile model
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy', 'categorical_accuracy'])

Train

In [20]:
# tell model to save after each epoch
class SaveEachEpoch(Callback):
    def on_epoch_end(self, epoch, logs={}):
        debug('SEE: saving model for epoch', epoch)
        filename1 = '/landmark_model_' + str(epoch) + '.h5'
        filename2 = '/landmark_model_weights_' + str(epoch) + '.h5'
        source1 = model_dir+filename1
        dest1 = 'models'+filename1
        source2 = model_dir+filename2
        dest2 = 'models'+filename2

        try:
            self.model.save(source1)
            bucket.upload_file(source1, dest1)

            self.model.save_weights(source2)
            bucket.upload_file(source2, dest2)

            # todo: clean up/delete model files
        except:
            debug('SEE: error saving model')
            return
        
        debug('SEE: done saving model')


In [21]:
# # start caching batches
# idx_list = range(0, num_filekeys, batch_size)
# pool = multiprocessing.Pool(processes=n_processes)  # Num of CPUs
# with Manager() as manager:
#     batch_cache = manager.list()  # <-- can be shared between processes.
#     for idx in idx_list:
#         pool.apply_async(func = generate_batch_for_cache, args = (batch_cache, idx))

In [28]:
# train model
model.fit_generator(BatchSequence(), 
                    epochs=5, 
                    verbose=1,
                    callbacks=[SaveEachEpoch()],
                    use_multiprocessing=True,
                    max_queue_size=50,
                    workers=n_workers,
                    shuffle=True
                   )

Epoch 1/5
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100,

StopIteration: could not broadcast input array from shape (100,100,3) into shape (100)

(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(96, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 1

(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 

(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)
(100, 100, 3)


In [None]:
# # close pool after training
# pool.close()
# pool.join()