In [1]:
from pathlib import Path
from tqdm import tqdm_notebook, tqdm, trange
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, BatchNormalization, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.utils import to_categorical


In [2]:
train_df = pd.read_csv('datasets/train_sets.csv')
val_df = pd.read_csv('datasets/val_sets.csv')

In [3]:
train_df

Unnamed: 0,Image,Id,whale_id_index
0,66f164af.jpg,new_whale,0
1,81bd5469.jpg,w_143b201,20
2,193ae7a0.jpg,w_5297ab3,95
3,feb15de4.jpg,w_8d46cef,167
4,7fe190ce.jpg,w_43b50e5,76
...,...,...,...
2649,53c207cf.jpg,w_94cd45e,176
2650,8cc05900.jpg,w_7554f44,140
2651,fa8ed4af.jpg,w_43be268,77
2652,59ecd346.jpg,w_2a04ceb,43


In [4]:
def fit_image(img, expected_shape=(478, 968)):
    HEIGHT, WIDTH = expected_shape
    height, width = img.shape[:2]

    if WIDTH / width < HEIGHT / height:
        ratio = WIDTH / width
    else:
        ratio = HEIGHT / height

    blank_img = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)

    img = cv2.resize(img, None, fx=ratio, fy=ratio)
    height, width = img.shape[:2]
#     blank_img[HEIGHT // 2 - height // 2: HEIGHT // 2 + height // 2,
#              WIDTH // 2 - width // 2: WIDTH // 2 + width // 2] = img
    blank_img[(HEIGHT - height) // 2: (HEIGHT + height) // 2,
             (WIDTH - width) // 2: (WIDTH + width) // 2] = img
    img = blank_img
    return img

class DataGenerator:
    def __init__(self, df, dataset_dir, batch_size=32):
        self.df = df
        self.dataset_dir = dataset_dir
        self.number_class = self.df['whale_id_index'].max() + 1
        self.batch_size = batch_size
    
    @property
    def number_step(self):
        return len(self.df) // self.batch_size
    
    def create_generator(self, shuffle=False):
        df = self.df
        if shuffle:
            df = df.sample(frac=1, replace=False)
        while True:
            for i in range(0, len(df), self.batch_size):
                batch_df = df.iloc[i:i+self.batch_size]
                
                xs = []
                ys = []
                for _, whale in batch_df.iterrows():
                    img = cv2.imread(str(self.dataset_dir / whale['Image']))
                    img = fit_image(img)
                    img = img[:, :, ::-1]
                    img = img / 255.
                    
                    _class = to_categorical(whale['whale_id_index'], self.number_class)
                    
                    xs.append(img)
                    ys.append(_class)
                    
                xs = np.array(xs)
                ys = np.array(ys)
                yield xs, ys
                

In [5]:
input_dir = Path('datasets/train/train')

train_datagen = DataGenerator(train_df, input_dir)
train_gen = train_datagen.create_generator(shuffle=True)

val_datagen = DataGenerator(val_df, input_dir)
val_gen = val_datagen.create_generator(shuffle=True)

In [6]:
backbone = DenseNet121(include_top=False,
                       weights='imagenet',
                       input_shape=(968, 478, 3)
                      )
model = Sequential([
    backbone,
    GlobalAveragePooling2D(),
    Dense(train_datagen.number_class, activation='softmax')
])

In [7]:
backbone.trainable = False

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet121 (Model)          (None, 30, 15, 1024)      7037504   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 313)               320825    
Total params: 7,358,329
Trainable params: 320,825
Non-trainable params: 7,037,504
_________________________________________________________________


In [9]:
optimizer = Adam(1e-3)
model.compile(optimizer,
              loss='categorical_crossentropy',
              metrics=['acc'],
             )

In [10]:
model_dir = Path('benchmark/baseline')
model_dir.mkdir(exist_ok=True)

model.fit(train_gen,
          steps_per_epoch=train_datagen.number_step,
          validation_data=val_gen,
          validation_steps=val_datagen.number_step,
          epochs=50,
          callbacks=[
              TensorBoard(str(model_dir)),
              ModelCheckpoint(str(model_dir / 'freeze-ep-{epoch:02d}-val_acc{val_acc:.4f}.hdf5'),
                        monitor='val_acc',
                        verbose=1,
                        save_best_only=True,
                        mode='max')
          ]
          
)

Epoch 1/50
Epoch 00001: val_acc improved from -inf to 0.24219, saving model to benchmark/baseline/freeze-ep-01-val_acc0.2422.hdf5
Epoch 2/50
Epoch 00002: val_acc did not improve from 0.24219
Epoch 3/50
Epoch 00003: val_acc did not improve from 0.24219
Epoch 4/50
Epoch 00004: val_acc improved from 0.24219 to 0.24531, saving model to benchmark/baseline/freeze-ep-04-val_acc0.2453.hdf5
Epoch 5/50
Epoch 00005: val_acc did not improve from 0.24531
Epoch 6/50
Epoch 00006: val_acc did not improve from 0.24531
Epoch 7/50
Epoch 00007: val_acc improved from 0.24531 to 0.25000, saving model to benchmark/baseline/freeze-ep-07-val_acc0.2500.hdf5
Epoch 8/50
Epoch 00008: val_acc improved from 0.25000 to 0.25625, saving model to benchmark/baseline/freeze-ep-08-val_acc0.2562.hdf5
Epoch 9/50
Epoch 00009: val_acc improved from 0.25625 to 0.26250, saving model to benchmark/baseline/freeze-ep-09-val_acc0.2625.hdf5
Epoch 10/50
Epoch 00010: val_acc improved from 0.26250 to 0.26719, saving model to benchmark/b

Epoch 27/50
Epoch 00027: val_acc improved from 0.40469 to 0.40938, saving model to benchmark/baseline/freeze-ep-27-val_acc0.4094.hdf5
Epoch 28/50
Epoch 00028: val_acc improved from 0.40938 to 0.41406, saving model to benchmark/baseline/freeze-ep-28-val_acc0.4141.hdf5
Epoch 29/50
Epoch 00029: val_acc improved from 0.41406 to 0.42031, saving model to benchmark/baseline/freeze-ep-29-val_acc0.4203.hdf5
Epoch 30/50
Epoch 00030: val_acc improved from 0.42031 to 0.42500, saving model to benchmark/baseline/freeze-ep-30-val_acc0.4250.hdf5
Epoch 31/50
Epoch 00031: val_acc did not improve from 0.42500
Epoch 32/50
Epoch 00032: val_acc did not improve from 0.42500
Epoch 33/50
Epoch 00033: val_acc did not improve from 0.42500
Epoch 34/50
Epoch 00034: val_acc did not improve from 0.42500
Epoch 35/50
Epoch 00035: val_acc did not improve from 0.42500
Epoch 36/50
Epoch 00036: val_acc did not improve from 0.42500
Epoch 37/50
Epoch 00037: val_acc did not improve from 0.42500
Epoch 38/50
Epoch 00038: val_a

<tensorflow.python.keras.callbacks.History at 0x7f9a2c4b1ef0>