In [13]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from pathlib import Path
from tqdm import tqdm_notebook, tqdm, trange
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, BatchNormalization, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.utils import to_categorical
from DataPrep import preprocess_image, augment_pipeline

In [2]:
train_df = pd.read_csv('datasets/train_sets.csv')
val_df = pd.read_csv('datasets/val_sets.csv')

In [3]:
train_df

Unnamed: 0,Image,Id,whale_id_index
0,66f164af.jpg,new_whale,0
1,81bd5469.jpg,w_143b201,20
2,193ae7a0.jpg,w_5297ab3,95
3,feb15de4.jpg,w_8d46cef,167
4,7fe190ce.jpg,w_43b50e5,76
...,...,...,...
2649,53c207cf.jpg,w_94cd45e,176
2650,8cc05900.jpg,w_7554f44,140
2651,fa8ed4af.jpg,w_43be268,77
2652,59ecd346.jpg,w_2a04ceb,43


In [14]:

class DataGenerator:
    def __init__(self, df, dataset_dir, batch_size=32):
        self.df = df
        self.dataset_dir = dataset_dir
        self.number_class = self.df['whale_id_index'].max() + 1
        self.batch_size = batch_size
    
    @property
    def number_step(self):
        return len(self.df) // self.batch_size
    
    def create_generator(self, shuffle=False, augment=False):
        df = self.df
        if shuffle:
            df = df.sample(frac=1, replace=False)
        while True:
            for i in range(0, len(df), self.batch_size):
                batch_df = df.iloc[i:i+self.batch_size]
                
                xs = []
                ys = []
                for _, whale in batch_df.iterrows():
                    img = cv2.imread(str(self.dataset_dir / whale['Image']))
                    if augment:
                        img = augment_pipeline.augment(image=img)
                    img = preprocess_image(img)
                    
                    _class = to_categorical(whale['whale_id_index'], self.number_class)
                    
                    xs.append(img)
                    ys.append(_class)
                    
                xs = np.array(xs)
                ys = np.array(ys)
                yield xs, ys
                

In [15]:
input_dir = Path('datasets/train/train')

train_datagen = DataGenerator(train_df, input_dir)
train_gen = train_datagen.create_generator(shuffle=True, augment=False)

val_datagen = DataGenerator(val_df, input_dir)
val_gen = val_datagen.create_generator(shuffle=False)

In [16]:
backbone = DenseNet121(include_top=False,
                       weights='imagenet',
                       input_shape=(478, 968, 3)
                      )
model = Sequential([
    backbone,
    GlobalAveragePooling2D(),
    Dense(train_datagen.number_class, activation='softmax')
])

In [7]:
backbone.trainable = False

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet121 (Model)          (None, 15, 30, 1024)      7037504   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 313)               320825    
Total params: 7,358,329
Trainable params: 320,825
Non-trainable params: 7,037,504
_________________________________________________________________


In [9]:
optimizer = Adam(1e-3)
model.compile(optimizer,
              loss='categorical_crossentropy',
              metrics=['acc'],
             )

In [12]:
model_dir = Path('benchmark/baseline')
model_dir.mkdir(exist_ok=True)

model.fit(train_gen,
          steps_per_epoch=train_datagen.number_step,
          validation_data=val_gen,
          validation_steps=val_datagen.number_step,
          epochs=75,
          callbacks=[
              TensorBoard(str(model_dir)),
              ModelCheckpoint(str(model_dir / 'freeze-ep-{epoch:02d}-val_acc{val_acc:.4f}.hdf5'),
                        monitor='val_acc',
                        verbose=1,
                        save_best_only=True,
                        mode='max')
          ]
          
)

Epoch 1/75
Epoch 00001: val_acc improved from -inf to 0.24688, saving model to benchmark/baseline/freeze-ep-01-val_acc0.2469.hdf5
Epoch 2/75
Epoch 00002: val_acc did not improve from 0.24688
Epoch 3/75
Epoch 00003: val_acc did not improve from 0.24688
Epoch 4/75
Epoch 00004: val_acc did not improve from 0.24688
Epoch 5/75
Epoch 00005: val_acc improved from 0.24688 to 0.25469, saving model to benchmark/baseline/freeze-ep-05-val_acc0.2547.hdf5
Epoch 6/75
Epoch 00006: val_acc improved from 0.25469 to 0.26250, saving model to benchmark/baseline/freeze-ep-06-val_acc0.2625.hdf5
Epoch 7/75
Epoch 00007: val_acc improved from 0.26250 to 0.26562, saving model to benchmark/baseline/freeze-ep-07-val_acc0.2656.hdf5
Epoch 8/75
Epoch 00008: val_acc improved from 0.26562 to 0.27500, saving model to benchmark/baseline/freeze-ep-08-val_acc0.2750.hdf5
Epoch 9/75
Epoch 00009: val_acc improved from 0.27500 to 0.28281, saving model to benchmark/baseline/freeze-ep-09-val_acc0.2828.hdf5
Epoch 10/75
Epoch 0001

Epoch 27/75
Epoch 00027: val_acc improved from 0.39375 to 0.40625, saving model to benchmark/baseline/freeze-ep-27-val_acc0.4062.hdf5
Epoch 28/75
Epoch 00028: val_acc improved from 0.40625 to 0.41563, saving model to benchmark/baseline/freeze-ep-28-val_acc0.4156.hdf5
Epoch 29/75
Epoch 00029: val_acc improved from 0.41563 to 0.41875, saving model to benchmark/baseline/freeze-ep-29-val_acc0.4187.hdf5
Epoch 30/75
Epoch 00030: val_acc improved from 0.41875 to 0.42031, saving model to benchmark/baseline/freeze-ep-30-val_acc0.4203.hdf5
Epoch 31/75
Epoch 00031: val_acc improved from 0.42031 to 0.42969, saving model to benchmark/baseline/freeze-ep-31-val_acc0.4297.hdf5
Epoch 32/75
Epoch 00032: val_acc improved from 0.42969 to 0.43281, saving model to benchmark/baseline/freeze-ep-32-val_acc0.4328.hdf5
Epoch 33/75
Epoch 00033: val_acc improved from 0.43281 to 0.43594, saving model to benchmark/baseline/freeze-ep-33-val_acc0.4359.hdf5
Epoch 34/75
Epoch 00034: val_acc improved from 0.43594 to 0.44

Epoch 53/75
Epoch 00053: val_acc improved from 0.48750 to 0.48906, saving model to benchmark/baseline/freeze-ep-53-val_acc0.4891.hdf5
Epoch 54/75
Epoch 00054: val_acc improved from 0.48906 to 0.49219, saving model to benchmark/baseline/freeze-ep-54-val_acc0.4922.hdf5
Epoch 55/75
Epoch 00055: val_acc improved from 0.49219 to 0.49375, saving model to benchmark/baseline/freeze-ep-55-val_acc0.4938.hdf5
Epoch 56/75
Epoch 00056: val_acc did not improve from 0.49375
Epoch 57/75
Epoch 00057: val_acc did not improve from 0.49375
Epoch 58/75
Epoch 00058: val_acc did not improve from 0.49375
Epoch 59/75
Epoch 00059: val_acc did not improve from 0.49375
Epoch 60/75
Epoch 00060: val_acc did not improve from 0.49375
Epoch 61/75
Epoch 00061: val_acc did not improve from 0.49375
Epoch 62/75
Epoch 00062: val_acc did not improve from 0.49375
Epoch 63/75
Epoch 00063: val_acc did not improve from 0.49375
Epoch 64/75
Epoch 00064: val_acc did not improve from 0.49375
Epoch 65/75
Epoch 00065: val_acc did not

<tensorflow.python.keras.callbacks.History at 0x7fb328302b00>