In [1]:
# -*- coding: utf-8 -*-
import os
import pandas as pd
import numpy as np
from PIL import Image
import gc, math
import pickle

from keras.models import Sequential
from keras.optimizers import SGD
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.models import model_from_json
from keras.models import Model
from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.layers.normalization import BatchNormalization
from keras import regularizers
from keras import backend as K
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import log_loss, accuracy_score, confusion_matrix

from cnnmodels import vgg_std16_model, preprocess_input, create_rect5, load_img, train_generator, test_generator
from cnnmodels import identity_block, testcv_generator, conv_block, resnet50_model, trainb_generator

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [8]:
# Params
img_rows, img_cols = 224, 224 # Resolution of inputs
channel = 3
ROWS, COLS = 224, 224
BATCHSIZE = 32
SEAL_CLASSES = ['adult_males', 'subadult_males','adult_females','juveniles','pups', 'others']
nb_perClass = int(BATCHSIZE / len(SEAL_CLASSES))
TRAIN_DIR = '../darknet/seals/JPEGImagesBlk'
TEST_DIR = '../darknet/seals/JPEGImagesTest'
num_class = len(SEAL_CLASSES)

In [3]:
# Data generator
train_datagen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True)

# Fine-tune Example
rfcnCV = pd.read_pickle('../coords/rfcnCVclass.pkl')
rfcnCV.head(3)

Unnamed: 0,img,proba,x0,y0,x1,y1,seal,adult_males,subadult_males,adult_females,juveniles,pups,other
0,228_54,0.894,364.95,295.1,464.95,395.1,0,0,0,0,0,0,1
1,228_54,0.893,228.2,376.3,328.2,476.3,0,0,0,0,0,0,1
2,228_54,0.72,407.75,409.95,507.75,509.95,0,0,0,0,0,0,1


In [4]:
# Lets make our validation set
folds = [rfcnCV.img.str.split('_').apply(lambda x: x[0]).astype(int) % 2 != 0,
        rfcnCV.img.str.split('_').apply(lambda x: x[0]).astype(int) % 2 == 0]
rfcnCV.shape

(146060, 13)

In [5]:
def train_generator(datagen, df):
    while 1:
        batch_x = np.zeros((BATCHSIZE, ROWS, COLS, 3), dtype=K.floatx())
        batch_y = np.zeros((BATCHSIZE, len(SEAL_CLASSES)), dtype=K.floatx())
        fn = lambda obj: obj.loc[np.random.choice(obj.index, size=nb_perClass, replace=False),:]
        batch_df = df.groupby(['seal'], as_index=True).apply(fn)
        i = 0
        for index,row in batch_df.iterrows():
            row = row.tolist()
            image_file = os.path.join(TRAIN_DIR, row[0])
            seal = row[6]
            bbox = row[2:6]
            cropped = load_img(image_file+'.jpg',bbox,target_size=(ROWS,COLS))
            x = np.asarray(cropped, dtype=K.floatx())
            x = datagen.random_transform(x)
            x = preprocess_input(x)
            batch_x[i] = x
            batch_y[i] = row[7:] # Add in all classes
            i += 1
        yield (batch_x.transpose(0, 3, 1, 2), batch_y)

def testcv_generator(df, datagen = None, batch_size = BATCHSIZE):
    n = df.shape[0]
    batch_index = 0
    while 1:
        current_index = batch_index * batch_size
        if n >= current_index + batch_size:
            current_batch_size = batch_size
            batch_index += 1    
        else:
            current_batch_size = n - current_index
            batch_index = 0        
        batch_df = df[current_index:current_index+current_batch_size]
        batch_x = np.zeros((batch_df.shape[0], ROWS, COLS, 3), dtype=K.floatx())
        i = 0
        for index,row in batch_df.iterrows():
            row = row.tolist()
            image_file = os.path.join(TRAIN_DIR, row[0]+'.jpg')
            bbox = row[2:6]
            cropped = load_img(image_file,bbox,target_size=(ROWS,COLS))
            x = np.asarray(cropped, dtype=K.floatx())
            if datagen is not None: x = datagen.random_transform(x)            
            x = preprocess_input(x)
            batch_x[i] = x
            i += 1
        if batch_index%50 == 0: print(batch_index)
        #return(batch_x.transpose(0, 3, 1, 2))
        yield(batch_x.transpose(0, 3, 1, 2))


In [None]:
for fold in range(2):
    print "Fold: " + str(fold+1)
    train_df = rfcnCV[~folds[fold]]
    test_df = rfcnCV[folds[fold]]
    valid_df = test_df[:20000]
    
    # validation_data (valid_x,valid_y)
    print "Part ... A"
    df_1 = valid_df
    l = valid_df.groupby('seal').size()
    nb_NoF_valid = math.ceil(l.sum()/10)
    valid_x = np.zeros((valid_df.shape[0], ROWS, COLS, 3), dtype=K.floatx())
    valid_y = np.zeros((valid_df.shape[0], len(SEAL_CLASSES)), dtype=K.floatx())
    i = 0
    for index,row in valid_df.iterrows():
        if index % 300 == 0 : print index
        row = row.tolist()
        image_file = os.path.join(TRAIN_DIR, row[0])
        seal = row[6]
        bbox = row[2:6]
        cropped = load_img(image_file+'.jpg',bbox,target_size=(ROWS,COLS))
        x = np.asarray(cropped, dtype=K.floatx())
        x = preprocess_input(x)
        valid_x[i] = x
        valid_y[i] = row[7:]
        i += 1
    valid_x = valid_x.transpose(0, 3, 1, 2)
    valid_x.shape

    # Load our model
    print "Part ... B"
    nb_epoch = 2
    samples_per_epoch = 74000
    model = resnet50_model(ROWS, COLS, channel, num_class)
    for layer in model.layers:
        layer.trainable = False
    for layer in model.layers[-3:]:
        layer.trainable = True

    # Start Fine-tuning
    print "Part ... C"
    model.fit_generator(train_generator(train_datagen, train_df),
              nb_epoch=nb_epoch,
              samples_per_epoch=samples_per_epoch, #50000,
              verbose=1,
              validation_data=(valid_x, valid_y),
              )

    for layer in model.layers[38:]:
        layer.trainable = True
    model.optimizer.lr = 1e-5
    nb_epoch = 6
    print "Part ... D"
    model.fit_generator(train_generator(train_datagen, df=train_df),
              nb_epoch=nb_epoch,
              samples_per_epoch=samples_per_epoch,
              verbose=1,
              validation_data=(valid_x, valid_y),
              )

    # Test preds save
    print "Part ... E"
    test_preds = model.predict_generator(testcv_generator(test_df), val_samples=test_df.shape[0])
    df = pd.concat([test_df.reset_index(drop=True), pd.DataFrame(test_preds,  columns=SEAL_CLASSES)], axis=1)
    df.to_pickle('../coords/resnet50CVclassPreds0405_fold' + str(fold+1) + '.pkl')
    df[['img']+SEAL_CLASSES].to_csv('../coords/resnet50CVclassPreds0405_fold' + str(fold+1) + '.csv', index=False)
    
    # Clean up
    del model, train_df, test_df, valid_df, valid_x, df_1
    gc.collect()
    gc.collect()

Fold: 2
Part ... A
0
300
600
900
1200
1500
1800
2100
2400
