In [2]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, Input
from keras import applications
from keras.layers import AveragePooling2D, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, CSVLogger, TensorBoard, LambdaCallback
from keras.applications.resnet50 import ResNet50, preprocess_input 
from keras.layers import Conv2D, Convolution2D, MaxPooling2D, ZeroPadding2D, BatchNormalization, Activation
from keras.optimizers import SGD
from keras import backend as K
import numpy as np
import pandas as pd
from keras import layers
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from keras.models import load_model
import random
import cv2
from PIL import Image
import os
from keras.utils import to_categorical
from collections import defaultdict
from multiprocessing import Pool
from functools import partial
import multiprocessing
import jpeg4py as jpeg
from io import BytesIO

In [3]:
#parameters
classes = 10
batch_size=20
image_size = 512
train_total = 3500
validation_total = 875

In [4]:
model_resnet = ResNet50(include_top=False, weights = 'imagenet',input_shape=(image_size,image_size,3))

In [6]:
x = model_resnet.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
x = Dense(classes, activation='softmax')(x)

In [7]:
for layer in model_resnet.layers:
    layer.trainable = True

In [9]:
model = Model(inputs=model_resnet.input, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 256, 256, 64) 9472        input_1[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 256, 256, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 256, 256, 64) 0           bn_conv1[0][0]                   
__________________________________________________________________________________________________
max_poolin

In [10]:
train_datagen = ImageDataGenerator(horizontal_flip=True)
validation_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)                  

In [11]:
train_generator = train_datagen.flow_from_directory(
                    'train',
                    target_size=(image_size,image_size),
                    batch_size=batch_size,
                    class_mode='categorical',
                    shuffle=True
                    )
validation_generator = validation_datagen.flow_from_directory(
                        'validation',
                        target_size=(image_size,image_size),
                        batch_size=batch_size,
                        class_mode='categorical',
                        shuffle=False)

Found 790600 images belonging to 10 classes.
Found 875 images belonging to 10 classes.


In [12]:
def random_crop(image, crop_size):
    h,w,d = image.shape
    rand_num_h = random.randint(0,h-crop_size)
    rand_num_w = random.randint(0,w-crop_size)
    image_crop = image[rand_num_h:rand_num_h+crop_size,rand_num_w:rand_num_w+crop_size,:]
    return image_crop

In [13]:
def multi_process_train(x,y,imggen):

    current  = multiprocessing.current_process()
    
    image = jpeg.JPEG('train/' + x).decode()
    image = image/1.
    image = imggen.random_transform(image)
    image = preprocess_input(image)
    return image,y

In [14]:
def multi_process_valid(x,y):

    current  = multiprocessing.current_process()
    
    image = jpeg.JPEG('validation/' + x).decode()
    image = image/1.

    image = preprocess_input(image)
    return image,y

In [15]:
def train_gen(train_files, train_classes, batch_size, target_size, imggen):
    min_batch_size = batch_size //10
    train_data = list(zip(train_files, train_classes))
    train_data_dict = defaultdict(list)
    for i in range(len(train_data)):
        train_data_dict[train_data[i][0].split('/')[0]].append(train_data[i])
    
    while(True):
        for i in train_data_dict.keys():
            random.shuffle(train_data_dict[i])
        for start in range(0, len(train_data), min_batch_size):
            #image_crop_list = []
            image_classes_list = []
            image_file_list = []
            for j in train_data_dict.keys():
                end = min(start + min_batch_size, len(train_data_dict[j])) 
    
                for i in range(start,end):
                    image_classes_list.append(train_data_dict[j][i][1])
                    image_file_list.append(train_data_dict[j][i][0])
                    
                
            p = Pool(6)
            image_crop_list = p.starmap(partial(multi_process_train,imggen=imggen),zip(image_file_list,image_classes_list))
            p.close()
            p.join()
            
            x_batch_list = [i[0] for i in image_crop_list]
            y_batch_list = [i[1] for i in image_crop_list]
            x_batch = np.array(x_batch_list, np.float32)
            y_batch = np.array(y_batch_list)         
            
            yield (x_batch, y_batch)

In [16]:
train_crop_generator = train_gen(train_generator.filenames,to_categorical(train_generator.classes),batch_size,image_size,train_datagen) 

In [16]:
def valid_gen(valid_files, valid_classes, batch_size, target_size, imggen):

    valid_data = list(zip(valid_files, valid_classes))
    
    while(True):
        
        for start in range(0, len(valid_data), batch_size):
            #image_crop_list = []
            image_classes_list = []
            end = min(start + batch_size, len(valid_data)) 
    
            x_batch = [valid_data[i][0] for i in range(start,end)]
            image_classes_list = [valid_data[i][1] for i in range(start,end)]
            
            p = Pool(1)
            image_crop_list = p.starmap(multi_process_valid,zip(x_batch,image_classes_list))
            p.close()
            p.join()
            
            x_batch_list = [i[0] for i in image_crop_list]
            y_batch_list = [i[1] for i in image_crop_list]
            x_batch = np.array(x_batch_list, np.float32)
            y_batch = np.array(y_batch_list)                  
            
            yield (x_batch, y_batch)

In [17]:
valid_aug_generator = valid_gen(validation_generator.filenames,to_categorical(validation_generator.classes),batch_size,image_size,validation_datagen) 

In [18]:
model.compile(loss='categorical_crossentropy',
             optimizer=SGD(lr=1e-5),
             metrics=['accuracy'])

In [19]:
from pushbullet import Pushbullet
pb = Pushbullet('o.KiDDDXPuzV4qKbXh4Lywbgw1tK2oFfq1')

In [20]:
pushbullet_callback = LambdaCallback(
    on_epoch_end=lambda epoch, logs: pb.push_note("epoch: "+str(epoch),"train_loss: "+str(logs['loss'])+"    val_loss"+str(logs['val_loss'])))

In [21]:
callbacks = [ModelCheckpoint(filepath='resnet_ext_data_full_moto_x_multi.hdf5', verbose=1, save_best_only=True, save_weights_only=True),
ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1),
#EarlyStopping(monitor='val_loss', patience=0, verbose=1),
CSVLogger('./3-metrics_ext_data_4.csv'),
TensorBoard(log_dir='logs_ext_data_moto_x', write_graph=True),
pushbullet_callback]

In [23]:
model.fit_generator(
                    train_crop_generator,
                    steps_per_epoch = int(np.ceil(train_total/batch_size)),
                    epochs=100,
                    validation_data=valid_aug_generator,
                    validation_steps= int(np.ceil(validation_total/batch_size)),
                    verbose=1,
                    callbacks=callbacks)

ResourceExhaustedError: OOM when allocating tensor of shape [1,1,512,2048] and type float
	 [[Node: training/SGD/zeros_180 = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [1,1,512,2048] values: [[[0 0 0]]]...>, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

Caused by op 'training/SGD/zeros_180', defined at:
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2827, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-23-25e4f269a8e8>", line 8, in <module>
    callbacks=callbacks)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/keras/engine/training.py", line 2026, in fit_generator
    self._make_train_function()
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/keras/engine/training.py", line 970, in _make_train_function
    loss=self.total_loss)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/keras/optimizers.py", line 171, in get_updates
    moments = [K.zeros(shape) for shape in shapes]
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/keras/optimizers.py", line 171, in <listcomp>
    moments = [K.zeros(shape) for shape in shapes]
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py", line 689, in zeros
    v = tf.zeros(shape=shape, dtype=tf_dtype, name=name)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py", line 1439, in zeros
    output = constant(zero, shape=shape, dtype=dtype, name=name)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 214, in constant
    name=name).outputs[0]
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor of shape [1,1,512,2048] and type float
	 [[Node: training/SGD/zeros_180 = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [1,1,512,2048] values: [[[0 0 0]]]...>, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]


In [22]:
model.load_weights('resnet_before_aug_LB_86.hdf5')


In [83]:
model.evaluate_generator(valid_aug_generator,int(np.ceil(validation_total/batch_size)))

[0.28084943084578429, 0.92571428069046569]

In [1]:
#validation predictions for analysis

In [111]:
predictions_valid = model.predict_generator(validation_generator,int(np.ceil(validation_total/batch_size)))
predictions_valid = np.argmax(predictions_valid, axis=1)

In [115]:
validation_generator.class_indices

{'HTC-1-M7': 0,
 'LG-Nexus-5x': 1,
 'Motorola-Droid-Maxx': 2,
 'Motorola-Nexus-6': 3,
 'Motorola-X': 4,
 'Samsung-Galaxy-Note3': 5,
 'Samsung-Galaxy-S4': 6,
 'Sony-NEX-7': 7,
 'iPhone-4s': 8,
 'iPhone-6': 9}

In [112]:
true_positive = np.zeros(10)
false_positive = np.zeros(10)
true_negative = np.zeros(10)
false_negative = np.zeros(10)
for i in range(len(predictions_valid)):
    if(predictions_valid[i]==validation_generator.classes[i]):
        true_positive[predictions_valid[i]] = true_positive[predictions_valid[i]] + 1
    else:
        false_positive[predictions_valid[i]] = false_positive[predictions_valid[i]] + 1
        false_negative[validation_generator.classes[i]] = false_negative[validation_generator.classes[i]] +1

In [113]:
print('true_positive')
print(true_positive)
print('false_positive')
print(false_positive)
print('false_negative')
print(false_negative)
print('Total')
print(true_positive+false_negative)


true_positive
[  1.   4.   0.  42.   7.  12.   5.  12.  11.  10.]
false_positive
[   9.    4.    1.  351.    9.   22.    7.    6.   13.   24.]
false_negative
[ 52.  58.  61.   7.  49.  46.  50.  26.  46.  51.]
Total
[ 53.  62.  61.  49.  56.  58.  55.  38.  57.  61.]


In [2]:
#test predictions

In [52]:
filenames_list = []
images_list = []
for i in os.listdir('test/temp/'):
    img = Image.open('test/temp/'+i)
    filenames_list.append(i)
    images_list.append(np.array(img, np.float32))
    

In [53]:
images_list = [preprocess_input(x) for x in images_list]

In [54]:
new_array_arr = np.array(images_list)


In [55]:
new_array_arr.shape

(13200, 224, 224, 3)

In [56]:
test_preds = model.predict(new_array_arr)

In [60]:
class_ids = {train_generator.class_indices[x]: x for x in train_generator.class_indices}

In [30]:
for index,i in enumerate(result_filenames):
    if(i.split('_')[-1]=='manip.tif'):
        predicted_classes[index]=''

In [65]:
submission = pd.DataFrame({'fname':result_filenames,'camera':predicted_classes})
submission.to_csv('submission_resnet_v2_ext_data_moto_x.csv', encoding="utf8", index=False)

In [66]:
from IPython.display import FileLink
FileLink('submission_resnet_v2_ext_data_moto_x.csv')