In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, Input
from keras import applications
from keras.layers import AveragePooling2D, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, CSVLogger, TensorBoard, LambdaCallback
from keras.applications.resnet50 import ResNet50, preprocess_input 
from keras.layers import Conv2D, Convolution2D, MaxPooling2D, ZeroPadding2D, BatchNormalization, Activation
from keras.optimizers import Adam
from keras import backend as K
import numpy as np
import pandas as pd
from keras import layers
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from keras.models import load_model
import random
import cv2
from PIL import Image
import os
from keras.utils import to_categorical
from collections import defaultdict
from PIL import Image
import skimage
import skimage.io
import jpeg4py as jpeg
from io import BytesIO
import imageio
from multiprocessing import Pool
from functools import partial
import multiprocessing

Using TensorFlow backend.


In [2]:
#parameters
classes = 10
batch_size=100
image_size = 224
train_total = 3500
validation_total = 875

In [3]:
model_resnet = ResNet50(include_top=False, weights = 'imagenet',input_shape=(image_size,image_size,3))

In [4]:
model_resnet.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        input_1[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 112, 112, 64) 0           bn_conv1[0][0]                   
__________________________________________________________________________________________________
max_poolin

In [5]:
x = model_resnet.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
x = Dense(classes, activation='softmax')(x)

In [6]:
for layer in model_resnet.layers:
    layer.trainable = True

In [7]:
#for layer in model_incepres.layers[-5:]:
#    layer.trainable = True

In [8]:
model = Model(inputs=model_resnet.input, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        input_1[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 112, 112, 64) 0           bn_conv1[0][0]                   
__________________________________________________________________________________________________
max_poolin

In [9]:
train_datagen = ImageDataGenerator(horizontal_flip=True)
validation_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
                    

In [10]:
train_generator = train_datagen.flow_from_directory(
                    'train',
                    target_size=(image_size,image_size),
                    batch_size=batch_size,
                    class_mode='categorical',
                    shuffle=True
                    )
validation_generator = validation_datagen.flow_from_directory(
                        'validation',
                        target_size=(image_size,image_size),
                        batch_size=batch_size,
                        class_mode='categorical',
                        shuffle=False)

Found 790600 images belonging to 10 classes.
Found 875 images belonging to 10 classes.


In [11]:
def image_aug_valid(image, index):
    Image.LOAD_TRUNCATED_IMAGES = True
    mpimg.LOAD_TRUNCATED_IMAGES = True
    if(index==0):
        res_image = np.uint8(cv2.pow(image/255.,0.8)*255.)
    elif(index==1):
        res_image = np.uint8(cv2.pow(image/255.,1.2)*255.)
    elif(index==2):
        res_image = cv2.resize(image,(0,0),fx=0.5,fy=0.5,interpolation = cv2.INTER_CUBIC)
    elif(index==3):
        res_image = cv2.resize(image,(0,0),fx=0.8,fy=0.8,interpolation = cv2.INTER_CUBIC)    
    elif(index==4):
        res_image = cv2.resize(image,(0,0),fx=1.5,fy=1.5,interpolation = cv2.INTER_CUBIC)
    elif(index==5):
        res_image = cv2.resize(image,(0,0),fx=2.0,fy=2.0,interpolation = cv2.INTER_CUBIC)
    elif(index==6):
        img = Image.fromarray(image)
        out_70_valid = BytesIO()
        img.save(out_70_valid, "JPEG", quality=70)
        res_image = jpeg.JPEG(np.frombuffer(out_70_valid.getvalue(), dtype=np.uint8)).decode()
        del img
        del out_70_valid
    elif(index==7):
        img = Image.fromarray(image)
        out_90_valid = BytesIO()
        img.save(out_90_valid, "JPEG", quality=90)
        res_image = jpeg.JPEG(np.frombuffer(out_90_valid.getvalue(), dtype=np.uint8)).decode()
        del img
        del out_90_valid
    return res_image 

In [12]:
def image_aug(image, index):
   # Image.LOAD_TRUNCATED_IMAGES = True
   # mpimg.LOAD_TRUNCATED_IMAGES = True
    if(index==0):
        res_image = np.uint8(cv2.pow(image/255.,0.8)*255.)
    elif(index==1):
        res_image = np.uint8(cv2.pow(image/255.,1.2)*255.)
    elif(index==2):
        res_image = cv2.resize(image,(0,0),fx=0.5,fy=0.5,interpolation = cv2.INTER_CUBIC)
    elif(index==3):
        res_image = cv2.resize(image,(0,0),fx=0.8,fy=0.8,interpolation = cv2.INTER_CUBIC)    
    elif(index==4):
        res_image = cv2.resize(image,(0,0),fx=1.5,fy=1.5,interpolation = cv2.INTER_CUBIC)
    elif(index==5):
        res_image = cv2.resize(image,(0,0),fx=2.0,fy=2.0,interpolation = cv2.INTER_CUBIC)
    elif(index==6):
        img = Image.fromarray(image)
        out_70 = BytesIO()
        img.save(out_70, "JPEG", quality=70)
        res_image = jpeg.JPEG(np.frombuffer(out_70.getvalue(), dtype=np.uint8)).decode()
        del img
        del out_70
    elif(index==7):
        img = Image.fromarray(image)
        out_90 = BytesIO()
        img.save(out_90, "JPEG", quality=90)
        res_image = jpeg.JPEG(np.frombuffer(out_90.getvalue(), dtype=np.uint8)).decode()
        del img
        del out_90
    return res_image 

In [13]:
def random_crop(image, crop_size):
    h,w,d = image.shape
    rand_num_h = random.randint(0,h-crop_size)
    rand_num_w = random.randint(0,w-crop_size)
    image_crop = image[rand_num_h:rand_num_h+crop_size,rand_num_w:rand_num_w+crop_size,:]
    return image_crop

In [14]:
def multi_process_train(x,y,imggen):

    current  = multiprocessing.current_process()
    
    image = jpeg.JPEG('train/' + x).decode()
    
    rand_num = random.randint(0,7)
    image = image_aug(image,rand_num)
    image = random_crop(image,224)
    image = imggen.random_transform(image)
    image = image/1.
    image = preprocess_input(image)
    return image,y

In [15]:
def multi_process_valid(x,y):

    current  = multiprocessing.current_process()
    
    image = jpeg.JPEG('validation/' + x).decode()
    
    rand_num = random.randint(0,7)
    aug_image = image_aug_valid(image,rand_num)
    image = random_crop(aug_image,224)
    image = image/1.
    image = preprocess_input(image)
    return image,y

In [26]:
def train_gen(train_files, train_classes, batch_size, target_size, imggen):
    min_batch_size = batch_size //10
    train_data = list(zip(train_files, train_classes))
    train_data_dict = defaultdict(list)
    for i in range(len(train_data)):
        train_data_dict[train_data[i][0].split('/')[0]].append(train_data[i])
       
    while(True):
        for i in train_data_dict.keys():
            random.shuffle(train_data_dict[i])
        for start in range(0, len(train_data), min_batch_size):
            #image_crop_list = []
            image_classes_list = []
            image_file_list = []
            for j in train_data_dict.keys():
                end = min(start + min_batch_size, len(train_data_dict[j])) 
                
                for i in range(start,end):
                    image_classes_list.append(train_data_dict[j][i][1])
                    image_file_list.append(train_data_dict[j][i][0])
                    
                    #if(len(image.shape)<3):
                    #    print(_1_)
                    #    continue
                    #rand_num = random.randint(0,7)
                    
                    #augmented_image = image_aug(image,rand_num)
                    #aug_image_recrop = random_crop(augmented_image,224)
                    #image_crop_list.append(aug_image_recrop)
                    #image_classes_list.append(train_data_dict[j][start+index][1])
                    
                    #del image
                    #del aug_image_recrop
            
            #if(len(image_classes_list)<batch_size-2*min_batch_size):
            #    break
            #print(image_classes_list)
            #print(image_file_list)
            data_iter = ((i,j) for i,j in zip(image_file_list,image_classes_list))
            p = Pool(6)
            results = p.starmap_async(partial(multi_process_train,imggen=imggen),data_iter)
            image_crop_list = results.get()
            p.close()
            p.join()
            
            x_batch_list = [i[0] for i in image_crop_list]
            y_batch_list = [i[1] for i in image_crop_list]
            x_batch = np.array(x_batch_list, np.float32)
            y_batch = np.array(y_batch_list)          
            
            yield (x_batch, y_batch)

In [27]:
train_crop_generator = train_gen(train_generator.filenames,to_categorical(train_generator.classes),batch_size,image_size,train_datagen) 

In [28]:
def valid_gen(valid_files, valid_classes, batch_size, target_size, imggen):

    valid_data = list(zip(valid_files, valid_classes))
  
    while(True):
        
        for start in range(0, len(valid_data), batch_size):
            #image_crop_list = []
            image_classes_list = []
            end = min(start + batch_size, len(valid_data)) 
    
            x_batch = [valid_data[i][0] for i in range(start,end)]
            image_classes_list = [valid_data[i][1] for i in range(start,end)]
            
           
            image_crop_list = [multi_process_valid(i) for i in zip(x_batch,image_classes_list)]
            
           # p = Pool(1)
           # image_crop_list = p.starmap(multi_process_valid,zip(x_batch,image_classes_list))
           # p.close()
           # p.join()
            
            x_batch_list = [i[0] for i in image_crop_list]
            y_batch_list = [i[1] for i in image_crop_list]
            x_batch = np.array(x_batch_list, np.float32)
            y_batch = np.array(y_batch_list)       
            
            yield (x_batch, y_batch)

In [29]:
valid_aug_generator = valid_gen(validation_generator.filenames,to_categorical(validation_generator.classes),batch_size,image_size,validation_datagen) 

In [30]:
model.compile(loss='categorical_crossentropy',
             optimizer=Adam(lr=1e-5),
             metrics=['accuracy'])

In [31]:
from pushbullet import Pushbullet
pb = Pushbullet('o.KiDDDXPuzV4qKbXh4Lywbgw1tK2oFfq1')

In [32]:
pushbullet_callback = LambdaCallback(
    on_epoch_end=lambda epoch, logs: pb.push_note("epoch: "+str(epoch),"train_loss: "+str(logs['loss'])+"    val_loss"+str(logs['val_loss'])))

In [33]:
callbacks = [ModelCheckpoint(filepath='resnet_image_aug_full_temp.hdf5', verbose=1, save_best_only=True, save_weights_only=True),
ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1),
#EarlyStopping(monitor='val_loss', patience=0, verbose=1),
CSVLogger('./3-metrics_image_aug_2.csv'),
TensorBoard(log_dir='logs_image_aug', write_graph=True),
            pushbullet_callback]

In [None]:
history = model.fit_generator(
                    train_crop_generator,
                    steps_per_epoch = 20*int(np.ceil(train_total/batch_size)),
                    epochs=100,
                    validation_data=valid_aug_generator,
                    validation_steps= 8*int(np.ceil(validation_total/batch_size)),
                    verbose=1,
                    callbacks=callbacks)

Epoch 1/100

Process ForkPoolWorker-1523:
Process ForkPoolWorker-1519:
Process ForkPoolWorker-1521:
Process ForkPoolWorker-1520:
Traceback (most recent call last):
Process ForkPoolWorker-1522:
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/janardhan/anaconda3/envs/dl/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/janardhan/anaconda3/env

In [34]:
model.load_weights('resnet_image_aug_full.hdf5')

In [107]:
model.evaluate_generator(valid_aug_generator,8*int(np.ceil(validation_total/batch_size)))

[0.55251469021396982, 0.8331428549119404]

In [19]:
# validation predictions for analysis

In [48]:
predictions_valid = model.predict_generator(validation_generator,int(np.ceil(validation_total/batch_size)))
predictions_valid = np.argmax(predictions_valid, axis=1)

In [49]:
validation_generator.class_indices

{'HTC-1-M7': 0,
 'LG-Nexus-5x': 1,
 'Motorola-Droid-Maxx': 2,
 'Motorola-Nexus-6': 3,
 'Motorola-X': 4,
 'Samsung-Galaxy-Note3': 5,
 'Samsung-Galaxy-S4': 6,
 'Sony-NEX-7': 7,
 'iPhone-4s': 8,
 'iPhone-6': 9}

In [50]:
true_positive = np.zeros(10)
false_positive = np.zeros(10)
true_negative = np.zeros(10)
false_negative = np.zeros(10)
for i in range(len(predictions_valid)):
    if(predictions_valid[i]==validation_generator.classes[i]):
        true_positive[predictions_valid[i]] = true_positive[predictions_valid[i]] + 1
    else:
        false_positive[predictions_valid[i]] = false_positive[predictions_valid[i]] + 1
        false_negative[validation_generator.classes[i]] = false_negative[validation_generator.classes[i]] +1

In [51]:
print('true_positive')
print(true_positive)
print('false_positive')
print(false_positive)
print('false_negative')
print(false_negative)
print('Total')
print(true_positive+false_negative)


true_positive
[ 361.  326.  243.  267.  299.  276.  375.  280.  317.  340.]
false_positive
[ 47.  28.  36.  71.  21.  30.  36.  30.  22.  59.]
false_negative
[ 31.  70.  21.  41.  45.  28.  21.  64.  35.  24.]
Total
[ 392.  396.  264.  308.  344.  304.  396.  344.  352.  364.]


In [12]:
#validation accuracy 83%-image augmentation

In [163]:
filenames_list = []
images_list = []
for i in os.listdir('test/temp/'):
    img = Image.open('test/temp/'+i)
    filenames_list.append(i)
    images_list.append(np.array(img, np.float32))
    

In [164]:
images_list = [preprocess_input(x) for x in images_list]

In [165]:
new_array_arr = np.array(images_list)


In [166]:
new_array_arr.shape

(13200, 224, 224, 3)

In [167]:
test_preds = model.predict(new_array_arr)

In [168]:
test_preds[0]

array([ 0.00807561,  0.68546778,  0.03120292,  0.0124573 ,  0.02165228,
        0.20855525,  0.00310105,  0.01519923,  0.00783777,  0.00645074], dtype=float32)

In [169]:
f = defaultdict(list)
for i,j in enumerate(filenames_list):
    f[j[2:]].append(i)

In [170]:
f

defaultdict(list,
            {'img_43c9cb0_manip.tif': [3107, 4110, 6171, 8882, 10133],
             'img_c1362f8_manip.tif': [343, 925, 5802, 8280, 10279],
             'img_89cf403_manip.tif': [3282, 3929, 7154, 8147, 10538],
             'img_953b8fd_manip.tif': [1605, 1822, 3268, 6053, 8209],
             'img_3d7e180_manip.tif': [3670, 6600, 7831, 9293, 12037],
             'img_f6b9240_unalt.tif': [3035, 3207, 9577, 10561, 11372],
             'img_dcb50fd_unalt.tif': [302, 1838, 5903, 8136, 10930],
             'img_91d5d77_unalt.tif': [2070, 2908, 3115, 7358, 8117],
             'img_ae8994c_manip.tif': [2377, 4279, 5738, 9814, 10816],
             'img_5b2f4fd_unalt.tif': [367, 2171, 7858, 8371, 12462],
             'img_8026ed4_unalt.tif': [5971, 6432, 6665, 8793, 10110],
             'img_0f07519_manip.tif': [1341, 2202, 3509, 10062, 12267],
             'img_7368be7_manip.tif': [2236, 3475, 7850, 8201, 13184],
             'img_1f15e74_unalt.tif': [697, 2928, 8558, 9477, 1

In [171]:
class_ids = {train_generator.class_indices[x]: x for x in train_generator.class_indices}

In [175]:
model.load_weights('resnet_before_aug_LB_86.hdf5')

In [176]:
test_preds_unalt = model.predict(new_array_arr)

In [181]:
test_preds_avg = (test_preds+test_preds_unalt)/2

In [186]:
result_filenames=[]
predicted_classes=[]
for i in f.keys():
    if(i.split('_')[-1]=='manip.tif'):
        x = np.mean(test_preds[f[i]],axis=0)
        result_filenames.append(i)
    else:
        x = np.mean(test_preds_unalt[f[i]],axis=0)
        result_filenames.append(i)
    predicted_classes.append(class_ids[np.argmax(x)])

In [187]:
predicted_classes

['iPhone-4s',
 'Motorola-Nexus-6',
 'Samsung-Galaxy-S4',
 'Samsung-Galaxy-S4',
 'HTC-1-M7',
 'HTC-1-M7',
 'iPhone-4s',
 'Sony-NEX-7',
 'Samsung-Galaxy-Note3',
 'Motorola-Droid-Maxx',
 'HTC-1-M7',
 'Samsung-Galaxy-S4',
 'Sony-NEX-7',
 'iPhone-6',
 'Motorola-X',
 'Motorola-Droid-Maxx',
 'HTC-1-M7',
 'iPhone-6',
 'Motorola-Droid-Maxx',
 'Motorola-Nexus-6',
 'iPhone-6',
 'HTC-1-M7',
 'Samsung-Galaxy-Note3',
 'HTC-1-M7',
 'Motorola-Droid-Maxx',
 'Sony-NEX-7',
 'Samsung-Galaxy-S4',
 'Motorola-Droid-Maxx',
 'iPhone-4s',
 'LG-Nexus-5x',
 'Motorola-Nexus-6',
 'LG-Nexus-5x',
 'Motorola-X',
 'Samsung-Galaxy-S4',
 'HTC-1-M7',
 'Motorola-Nexus-6',
 'iPhone-4s',
 'Motorola-Nexus-6',
 'Motorola-Droid-Maxx',
 'Samsung-Galaxy-S4',
 'Sony-NEX-7',
 'Sony-NEX-7',
 'Motorola-X',
 'Samsung-Galaxy-Note3',
 'Samsung-Galaxy-Note3',
 'Motorola-Droid-Maxx',
 'Motorola-X',
 'iPhone-6',
 'Motorola-Nexus-6',
 'Samsung-Galaxy-S4',
 'LG-Nexus-5x',
 'Sony-NEX-7',
 'iPhone-6',
 'Motorola-Droid-Maxx',
 'Motorola-X',
 'i

In [56]:
#class_ids = {train_generator.class_indices[x]: x for x in train_generator.class_indices}
#predicted_classes = [class_ids[x] for x in np.argmax(test_preds, axis=1)]

In [30]:
for index,i in enumerate(result_filenames):
    if(i.split('_')[-1]=='manip.tif'):
        predicted_classes[index]=''

In [188]:
submission = pd.DataFrame({'fname':result_filenames,'camera':predicted_classes})
submission.to_csv('submission_resnet_with_aug_trained_max.csv', encoding="utf8", index=False)

In [189]:
from IPython.display import FileLink
FileLink('submission_resnet_with_aug_trained_max.csv')