In [1]:
import pandas as pd
import numpy as np
import os.path

from keras.preprocessing.image import Iterator
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from keras.models import load_model

import threading
lock = threading.Lock()

from keras.callbacks import ModelCheckpoint

from BsonIterator import BSONIterator

Using TensorFlow backend.


In [2]:
METADATA_DIR = 'metadata'
DATA_DIR = '../../dataset'

train_offsets_df = pd.read_csv(os.path.join(METADATA_DIR,"train_offsets.csv"), index_col=0)
train_images_df = pd.read_csv(os.path.join(METADATA_DIR,"RandomSplit_Train_0.5_0.1.csv"), index_col=0)
val_images_df = pd.read_csv(os.path.join(METADATA_DIR,"RandomSplit_Val_0.5_0.1.csv"), index_col=0)

num_classes = 5270
num_train_images = len(train_images_df)
num_val_images = len(val_images_df)
batch_size = 32

# Create a generator for training and a generator for validation.
# Tip: use ImageDataGenerator for data augmentation and preprocessing.
assert os.path.exists(os.path.join(DATA_DIR, 'train.bson'))
train_bson_file = open(os.path.join(DATA_DIR, 'train.bson'), "rb")
train_datagen = ImageDataGenerator()
train_gen = BSONIterator(train_bson_file, train_images_df, train_offsets_df,
                         num_classes, train_datagen, lock, target_size = (224,224),
                         batch_size=batch_size, shuffle=True)

val_datagen = ImageDataGenerator()
val_gen = BSONIterator(train_bson_file, val_images_df, train_offsets_df,
                       num_classes, val_datagen, lock, target_size = (224,224),
                       batch_size=batch_size, shuffle=True)

  mask |= (ar1 == a)


Found 5570369 images belonging to 5270 classes.
Found 614245 images belonging to 5270 classes.


In [3]:
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras.optimizers import Adam

from keras.callbacks import ModelCheckpoint

# Save model Dir
MODEL_DIR = 'saved_models'

# Checkpointer 
checkpointer = ModelCheckpoint(filepath='saved_models/Resnet50.{epoch:02d}-{acc:.2f}-{val_acc:.2f}_RandomSplit_Train_0.5_0.1.h5', verbose=1)

# create the base pre-trained model
base_model = ResNet50(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
# and a logistic layer 
predictions = Dense(num_classes, activation='softmax')(x)


# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions, name= 'Resenet50')


In [16]:
for layer in model.layers[:175]:
    layer.trainable = False
for layer in model.layers[175:]:
    layer.trainable = True

In [17]:
# compile the model (should be done *after* setting layers to non-trainable)
# model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
from keras.optimizers import Adam
opt = Adam(lr=0.0001)
model.compile(optimizer=opt,
              loss="categorical_crossentropy",
              metrics=["accuracy"])


In [14]:
from keras.callbacks import TensorBoard
tb = TensorBoard()

In [18]:
num_epochs = 2


# train the model on the new data for a few epochs
model.fit_generator(train_gen,
                    steps_per_epoch = num_train_images // batch_size,
                    epochs = num_epochs,
                    validation_data = val_gen,
                    validation_steps = num_val_images // batch_size,
                    workers = 4, callbacks=[checkpointer])


Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f5e70ff14d0>

In [6]:
for i,layer in enumerate(model.layers):
    print (i, layer.name)

(0, 'input_1')
(1, 'conv1')
(2, 'bn_conv1')
(3, 'activation_1')
(4, 'max_pooling2d_1')
(5, 'res2a_branch2a')
(6, 'bn2a_branch2a')
(7, 'activation_2')
(8, 'res2a_branch2b')
(9, 'bn2a_branch2b')
(10, 'activation_3')
(11, 'res2a_branch2c')
(12, 'res2a_branch1')
(13, 'bn2a_branch2c')
(14, 'bn2a_branch1')
(15, 'add_1')
(16, 'activation_4')
(17, 'res2b_branch2a')
(18, 'bn2b_branch2a')
(19, 'activation_5')
(20, 'res2b_branch2b')
(21, 'bn2b_branch2b')
(22, 'activation_6')
(23, 'res2b_branch2c')
(24, 'bn2b_branch2c')
(25, 'add_2')
(26, 'activation_7')
(27, 'res2c_branch2a')
(28, 'bn2c_branch2a')
(29, 'activation_8')
(30, 'res2c_branch2b')
(31, 'bn2c_branch2b')
(32, 'activation_9')
(33, 'res2c_branch2c')
(34, 'bn2c_branch2c')
(35, 'add_3')
(36, 'activation_10')
(37, 'res3a_branch2a')
(38, 'bn3a_branch2a')
(39, 'activation_11')
(40, 'res3a_branch2b')
(41, 'bn3a_branch2b')
(42, 'activation_12')
(43, 'res3a_branch2c')
(44, 'res3a_branch1')
(45, 'bn3a_branch2c')
(46, 'bn3a_branch1')
(47, 'add_4')
(4