Check GPU

In [1]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

Using TensorFlow backend.


['/job:localhost/replica:0/task:0/device:GPU:0']

Imports

In [2]:
import sys, os, csv
from urllib import request, error
from PIL import Image
from io import BytesIO
import boto3
import random

In [3]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import io
from io import BytesIO

import time
import tempfile
import pickle
from random import shuffle
import multiprocessing
from multiprocessing import Process, Manager

In [4]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import applications
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.utils import Sequence

from skimage.transform import resize

In [5]:
print('Using tensorflow', tf.__version__)
print('Using keras', keras.__version__)

Using tensorflow 1.10.1
Using keras 2.1.6-tf


Constants

In [6]:
# set constants
model_dir = '../models'

batch_size = 32

height = 100
width = 100
color_mode = 'rgb'
depth = 3 if color_mode == 'rgb' else 1

n_layers_to_tune = 0

should_prune = False
should_subset = True
subset_length = 10000
should_debug = False

n_cpus = multiprocessing.cpu_count()
n_workers = n_cpus - 1 # None defaults to n_cpus
print('There are', n_cpus, 'cpu cores available')

There are 4 cpu cores available


Setup

In [7]:
def debug(*string):
    if should_debug:
        print(string)

In [8]:
s3 = boto3.resource('s3')
bucket_name = 'landmark-data-12345'
bucket = s3.Bucket(bucket_name)

In [9]:
class_dist = {}
classes = []
filekeys = []

if os.path.isfile('pickles/filekeys'):
    print('loading data from files')
    
    file = open('pickles/filekeys', 'rb')
    filekeys = pickle.load(file)
    file.close()
    
    file = open('pickles/classes', 'rb')
    classes = pickle.load(file)
    file.close()
    
    file = open('pickles/class_dist', 'rb')
    class_dist = pickle.load(file)
    file.close()
    
else:
    print('data pickles dont exist, generating')
    objects = bucket.objects.filter(Prefix="data/train/")
    for o in objects:
        if o.key.endswith('.jpg'):
            filekeys.append(o.key)
            cl = o.key.split('/')[2]
            if cl not in classes:
                class_dist[cl] = 1
                classes.append(cl)
            else:
                class_dist[cl] += 1

    shuffle(filekeys)
    file = open('pickles/filekeys', 'wb')
    pickle.dump(filekeys, file)
    file.close()

    file = open('pickles/classes', 'wb')
    pickle.dump(classes, file)
    file.close()

    file = open('pickles/class_dist', 'wb')
    pickle.dump(class_dist, file)
    file.close()
    
print('data loaded')

loading data from files
data loaded


In [10]:
if should_subset:
    filekeys = filekeys[:subset_length]
num_filekeys = len(filekeys)
num_classes = len(classes)
print('there are', num_filekeys, 'images across', num_classes, 'classes')

there are 10000 images across 14940 classes


Helper funcs

In [11]:
def load_s3_file_indirect(filekey):
    debug('LS3: loading')
    object = bucket.Object(filekey)
    debug('LS3: a')
    tmp = tempfile.NamedTemporaryFile()
    debug('LS3: b')
    with open(tmp.name, 'wb') as f:
        debug('LS3: c')
        object.download_fileobj(f)
        debug('LS3: d')
        img = mpimg.imread(tmp.name)
        debug('LS3: done loading')
        return img
    
def load_s3_file_direct(filekey):
    debug('LS3: loading')
    object = bucket.Object(filekey)
    debug('LS3: a')
    file_stream = io.StringIO()
    debug('LS3: b')
    object.download_fileobj(file_stream)
    debug('LS3: c')
    img = mpimg.imread(file_stream)
    debug('LS3: done loading')
    return img

def load_s3_file(filekey):
    return load_s3_file_indirect(filekey)

In [19]:
# def get_batch():
#     idx = 0
#     while True:
#         images = []
#         labels = []
#         while len(images) < batch_size:
#             filekey = filekeys[idx]
#             label = filekey.split('/')[2]
            
#             try:
#                 img = load_s3_file(filekey)
#                 images.append(img)
#                 labels.append(label)
#             except:
#                 pass

#             idx = (idx + 1) % len(filekeys)
            
#         result = ( np.array(images), np.array(labels) )
#         yield result

def get_batch():
    idx = 0
    while True:
        images = []
        labels = []
        while len(images) < 1:
            filekey = filekeys[idx]
            label = filekey.split('/')[2]
            
            try:
                img = load_s3_file(filekey)
                images.append(img)
                labels.append(label)
            except:
                pass

            idx = (idx + 1) % len(filekeys)
            
        result = ( np.array(images), np.array(labels) )
        yield result

In [23]:
def tfdata_generator():
    '''Construct a data generator using tf.Dataset'''

    def preprocess_fn(image, label):
        '''A transformation function to preprocess raw data
        into trainable input. '''
        x = tf.image.resize_image_with_pad(tf.cast(image, tf.float32), height, width)
        y = tf.one_hot(tf.cast(label, tf.int32), num_classes)
        return x, label

    dataset = tf.data.Dataset().batch(batch_size).from_generator(get_batch,
                output_types=(tf.float32, tf.float32), 
#                 output_shapes=((width, height, depth), (num_classes,))
                )

    # Transform and batch data at the same time
    dataset = dataset.apply(tf.contrib.data.map_and_batch(
        preprocess_fn, batch_size,
        num_parallel_batches=n_workers))
    dataset = dataset.repeat()
    dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)

    return dataset

train_dataset = tfdata_generator()

Model

In [14]:
# create model
pre_model = applications.VGG19(weights="imagenet", 
                           include_top=False, 
                           input_shape=(width, height, depth))

model = Sequential()
for idx, layer in enumerate(pre_model._layers):
    if idx < len(pre_model._layers) - n_layers_to_tune:
        layer.trainable = False
    model.add(layer)
    
model.add(Flatten(input_shape=pre_model.output_shape[1:]))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 25, 25, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 25, 25, 256)       295168    
__________

In [16]:
# compile model
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy', 'categorical_accuracy'])

Train

In [17]:
# tell model to save after each epoch
class SaveEachEpoch(Callback):
    def on_epoch_end(self, epoch, logs={}):
        debug('SEE: saving model for epoch', epoch)
        filename1 = '/landmark_model_' + str(epoch) + '.h5'
        filename2 = '/landmark_model_weights_' + str(epoch) + '.h5'
        source1 = model_dir+filename1
        dest1 = 'models'+filename1
        source2 = model_dir+filename2
        dest2 = 'models'+filename2

        try:
            self.model.save(source1)
            bucket.upload_file(source1, dest1)

            self.model.save_weights(source2)
            bucket.upload_file(source2, dest2)

            # todo: clean up/delete model files
        except:
            debug('SEE: error saving model')
            return
        
        debug('SEE: done saving model')

In [24]:
# train model
model.fit(train_dataset.make_one_shot_iterator(),
            epochs=5, 
            steps_per_epoch=len(filekeys)//batch_size,
            verbose=1,
            callbacks=[SaveEachEpoch()]
            )

Epoch 1/5
  0/312 [..............................] - ETA: 0sEpoch 2/5
  0/312 [..............................] - ETA: 0sEpoch 3/5
  0/312 [..............................] - ETA: 0sEpoch 4/5
  0/312 [..............................] - ETA: 0sEpoch 5/5
  0/312 [..............................] - ETA: 0s

<tensorflow.python.keras.callbacks.History at 0x7fad54091588>