In [1]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [2]:
from tensorflow.python.client import device_lib
dev=device_lib.list_local_devices()
dev[len(dev)-1]

name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15956161332
locality {
  bus_id: 1
  links {
  }
}
incarnation: 3923132551920636511
physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"

In [0]:
#global consts
IMG_SIZE=128
BATCH_SIZE=32
DROP_OUT=0.2
FOLD=0
TRAIN_DIR = './train/'
TRAIN_IMG_DIR=TRAIN_DIR+str(IMG_SIZE)
EPOCHS = 10
STATS = (0.0692, 0.2051)
NET_NAME='mvgg16_[2_1_1]_mixup'
INPUT_DIR ='/content/drive/My Drive/kaggle/bengali/input/'
OUTPUT_DIR = '/content/drive/My Drive/kaggle/bengali/output/'
OUTPUT_SUBDIR = ''
DATASET_224='224x224-bengali.zip'
DATASET_128='grapheme-imgs-128x128.zip'
TRAIN_WITH_FOLD = 'train_with_fold.csv'
PRETRAINED_WEIGHTS = 'm_mvgg16_[2_1_1]_mixup_eps10_offset20_sz128_bs32_do0.2_Lookahead_IS_fold0.h5'

if IMG_SIZE == 128: DATASET=DATASET_128
if IMG_SIZE == 224: DATASET=DATASET_224


#cosine anneling consts
LR_MAX = 1e-3
LR_MIN = 1e-6
T_MAX = 40
EP_OFFSET=20

In [0]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import cv2
import math
import os
import errno
from tqdm.auto import tqdm
import gc

import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import LeakyReLU
from keras.layers.normalization import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau,LambdaCallback
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import load_model
from tensorflow.keras import layers
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Activation
from tensorflow.keras.utils import get_custom_objects
from sklearn.utils import class_weight
from sklearn.metrics import  recall_score, confusion_matrix

Using TensorFlow backend.


In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
try:
  os.mkdir(TRAIN_DIR)
except OSError as e:
    if e.errno == errno.EEXIST:
        print(TRAIN_DIR+' already exists')
    else:
        raise
try:
  os.mkdir(TRAIN_IMG_DIR)
except OSError as e:
    if e.errno == errno.EEXIST:
        print(TRAIN_IMG_DIR+' already exists')
    else:
        raise

os.system('cp '+ '"'+INPUT_DIR+DATASET+'" ' + TRAIN_DIR)
os.system('cp '+ '"'+INPUT_DIR+TRAIN_WITH_FOLD+'" ' + TRAIN_DIR)
os.system('cp '+ '"'+OUTPUT_DIR+OUTPUT_SUBDIR+PRETRAINED_WEIGHTS+'" '+TRAIN_DIR)

./train/ already exists
./train/128 already exists


0

In [0]:
os.system('unzip -q '+TRAIN_DIR+DATASET+ ' -d '+ TRAIN_IMG_DIR)

0

In [0]:
!ls train

 128
 grapheme-imgs-128x128.zip
'm_mvgg16_[2_1_1]_mixup_eps10_offset20_sz128_bs32_do0.2_Lookahead_IS_fold0.h5'
 train_with_fold.csv


In [0]:
dataset_np = pd.read_csv(TRAIN_DIR+TRAIN_WITH_FOLD).to_numpy()
dataset_np[:,0]+='.png'
dataset_m = dataset_np.shape[0]


valid_m = np.where(dataset_np[:,6]==FOLD)[0].shape[0]
train_m = dataset_m-valid_m

fold_train_inds = np.where(dataset_np[:,6] != FOLD)
fold_valid_inds = np.where(dataset_np[:,6] == FOLD)

train_np = dataset_np[fold_train_inds]
valid_np = dataset_np[fold_valid_inds]

train_df = pd.DataFrame(train_np)
valid_df = pd.DataFrame(valid_np)

train_df.rename(columns={0:'filename',1:'root_class',2:'vowel_class',3:'cons_class', 4:'grapheme'}, inplace=True)
valid_df.rename(columns={0:'filename',1:'root_class',2:'vowel_class',3:'cons_class', 4:'grapheme'}, inplace=True)

train_df.drop(columns=[5,6], inplace=True)
valid_df.drop(columns=[5,6], inplace=True)

In [0]:
cons_unique=np.unique(train_np[:,3])
cons_y_train = train_np[:,3]
vowel_unique=np.unique(train_np[:,2])
vowel_y_train = train_np[:,2]
root_unique =np.unique(train_np[:,1])
root_y_train = train_np[:,1]

cons_class_weights = class_weight.compute_class_weight('balanced',
                                                 cons_unique,
                                                 cons_y_train)

vowel_class_weights = class_weight.compute_class_weight('balanced',
                                                 vowel_unique,
                                                 vowel_y_train)

root_class_weights = class_weight.compute_class_weight('balanced',
                                                 root_unique,
                                                 root_y_train)
cons_cw_dict=dict(enumerate(cons_class_weights))
vowel_cw_dict=dict(enumerate(vowel_class_weights))
root_cw_dict=dict(enumerate(root_class_weights))

model_cw={}
model_cw['root']=root_cw_dict
model_cw['vowel']=vowel_cw_dict
model_cw['consonant']=cons_cw_dict

In [0]:
hard_roots=np.array([ 20,  32,  54,  60,  61,  62,  63,  67,  84,  85,  86, 104, 116,135, 140, 144, 145, 152, 154, 162])

In [0]:
for k in hard_roots:
  model_cw['root'][k] *=4

In [0]:
class Mish(Activation):
    '''
    Mish Activation Function.
    .. math::
        mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
    Shape:
        - Input: Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
        - Output: Same shape as the input.
    Examples:
        >>> X = Activation('Mish', name="conv1_act")(X_input)
        reference: https://github.com/digantamisra98/Mish
    '''

    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'


def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))

get_custom_objects().update({'Mish': Mish(mish)})

In [0]:
backbone_net = VGG16(include_top=False, weights=None, input_shape=(IMG_SIZE, IMG_SIZE, 1))

In [0]:
def build_head(x_in, n, name=None):
  x = layers.GlobalAveragePooling2D()(x_in)
  x = Activation('Mish', name='mish_act1_'+name) (x)
  x = layers.BatchNormalization()(x)
  x = layers.Dropout(DROP_OUT)(x)
  x = layers.Dense(512)(x)
  x = Activation('Mish', name='mish_act2_'+name) (x)
  x = layers.BatchNormalization()(x)
  x = layers.Dropout(DROP_OUT)(x)
  x = layers.Dense(n, name=name, activation='softmax')(x)
  return x

In [0]:
def build_model(backbone):
    x_in = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 1))
    #x =  layers.Concatenate()([x_in, x_in, x_in])
    x = backbone(x_in)
    out_root = build_head(x, 168,'root')
    out_vowel = build_head(x, 11,'vowel')
    out_consonant = build_head(x,7,'consonant')
    
    model = tf.keras.Model(inputs=x_in, outputs=[out_root, out_vowel, out_consonant])
    
    return model

In [0]:
model = build_model(backbone_net)

In [0]:
for (n, layer) in enumerate(model.layers[1].layers):
  if 'activation' in layer.get_config() and layer.get_config()['activation'] == 'relu':
    print('replacing #{}: {}, {}'.format(n, layer, layer.activation))
    layer.activation = Mish(mish)
    print('-> {}'.format(layer.activation))

replacing #1: <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f4522a81e80>, <function relu at 0x7f4567b8c1e0>
-> <__main__.Mish object at 0x7f451ed72fd0>
replacing #2: <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f45236232e8>, <function relu at 0x7f4567b8c1e0>
-> <__main__.Mish object at 0x7f451ed7d320>
replacing #4: <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f452280a240>, <function relu at 0x7f4567b8c1e0>
-> <__main__.Mish object at 0x7f451ed7d518>
replacing #5: <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f451ef91630>, <function relu at 0x7f4567b8c1e0>
-> <__main__.Mish object at 0x7f451ef6e400>
replacing #7: <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f451ef9be80>, <function relu at 0x7f4567b8c1e0>
-> <__main__.Mish object at 0x7f451ed72e10>
replacing #8: <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f451ef9bf28>, <function relu at 0x7f4567b8c1e0>
-> <__main__

In [0]:
import tempfile

def apply_modifications(model, custom_objects=None):
    """Applies modifications to the model layers to create a new Graph. For example, simply changing
    `model.layers[idx].activation = new activation` does not change the graph. The entire graph needs to be updated
    with modified inbound and outbound tensors because of change in layer building function.
    Args:
        model: The `keras.models.Model` instance.
    Returns:
        The modified model with changes applied. Does not mutate the original `model`.
        reference: https://github.com/raghakot/keras-vis
    """
    
    # The strategy is to save the modified model and load it back. This is done because setting the activation
    # in a Keras layer doesnt actually change the graph. We have to iterate the entire graph and change the
    # layer inbound and outbound nodes with modified tensors. This is doubly complicated in Keras 2.x since
    # multiple inbound and outbound nodes are allowed with the Graph API.
    model_path = os.path.join(tempfile.gettempdir(), next(tempfile._get_candidate_names()) + '.h5')
    try:
        model.save(model_path)
        return load_model(model_path, custom_objects=custom_objects)
    finally:
        os.remove(model_path)

In [0]:
model = apply_modifications(model, custom_objects={'mish':Mish(mish)})



In [0]:
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf


@tf.keras.utils.register_keras_serializable(package='Addons')
class Lookahead(tf.keras.optimizers.Optimizer):
    """This class allows to extend optimizers with the lookahead mechanism.
    The mechanism is proposed by Michael R. Zhang et.al in the paper
    [Lookahead Optimizer: k steps forward, 1 step back]
    (https://arxiv.org/abs/1907.08610v1). The optimizer iteratively updates two
    sets of weights: the search directions for weights are chosen by the inner
    optimizer, while the "slow weights" are updated each `k` steps based on the
    directions of the "fast weights" and the two sets of weights are
    synchronized. This method improves the learning stability and lowers the
    variance of its inner optimizer.
    Example of usage:
    ```python
    opt = tf.keras.optimizers.SGD(learning_rate)
    opt = tfa.optimizers.Lookahead(opt)
    ```
    """

    def __init__(self,
                 optimizer,
                 sync_period=6,
                 slow_step_size=0.5,
                 name="Lookahead",
                 **kwargs):
        r"""Wrap optimizer with the lookahead mechanism.
        Args:
            optimizer: The original optimizer that will be used to compute
                and apply the gradients.
            sync_period: An integer. The synchronization period of lookahead.
                Enable lookahead mechanism by setting it with a positive value.
            slow_step_size: A floating point value.
                The ratio for updating the slow weights.
            name: Optional name for the operations created when applying
                gradients. Defaults to "Lookahead".
            **kwargs: keyword arguments. Allowed to be {`clipnorm`,
                `clipvalue`, `lr`, `decay`}. `clipnorm` is clip gradients
                by norm; `clipvalue` is clip gradients by value, `decay` is
                included for backward compatibility to allow time inverse
                decay of learning rate. `lr` is included for backward
                compatibility, recommended to use `learning_rate` instead.
        """
        super(Lookahead, self).__init__(name, **kwargs)

        if isinstance(optimizer, str):
            optimizer = tf.keras.optimizers.get(optimizer)
        if not isinstance(optimizer, tf.keras.optimizers.Optimizer):
            raise TypeError(
                "optimizer is not an object of tf.keras.optimizers.Optimizer")

        self._optimizer = optimizer
        self._set_hyper('sync_period', sync_period)
        self._set_hyper('slow_step_size', slow_step_size)
        self._initialized = False

    def _create_slots(self, var_list):
        self._optimizer._create_slots(var_list=var_list)  # pylint: disable=protected-access
        for var in var_list:
            self.add_slot(var, 'slow')

    def _create_hypers(self):
        self._optimizer._create_hypers()  # pylint: disable=protected-access

    def _prepare(self, var_list):
        return self._optimizer._prepare(var_list=var_list)  # pylint: disable=protected-access

    def apply_gradients(self, grads_and_vars, name=None):
        self._optimizer._iterations = self.iterations  # pylint: disable=protected-access
        return super(Lookahead, self).apply_gradients(grads_and_vars, name)

    def _init_op(self, var):
        slow_var = self.get_slot(var, 'slow')
        return slow_var.assign(
            tf.where(
                tf.equal(self.iterations,
                         tf.constant(0, dtype=self.iterations.dtype)),
                var,
                slow_var,
            ),
            use_locking=self._use_locking)

    def _look_ahead_op(self, var):
        var_dtype = var.dtype.base_dtype
        slow_var = self.get_slot(var, 'slow')
        local_step = tf.cast(self.iterations + 1, tf.dtypes.int64)
        sync_period = self._get_hyper('sync_period', tf.dtypes.int64)
        slow_step_size = self._get_hyper('slow_step_size', var_dtype)
        step_back = slow_var + slow_step_size * (var - slow_var)
        sync_cond = tf.equal(
            tf.math.floordiv(local_step, sync_period) * sync_period,
            local_step)
        with tf.control_dependencies([step_back]):
            slow_update = slow_var.assign(
                tf.where(
                    sync_cond,
                    step_back,
                    slow_var,
                ),
                use_locking=self._use_locking)
            var_update = var.assign(
                tf.where(
                    sync_cond,
                    step_back,
                    var,
                ),
                use_locking=self._use_locking)
        return tf.group(slow_update, var_update)

    @property
    def weights(self):
        return self._weights + self._optimizer.weights

    def _resource_apply_dense(self, grad, var):
        init_op = self._init_op(var)
        with tf.control_dependencies([init_op]):
            train_op = self._optimizer._resource_apply_dense(grad, var)  # pylint: disable=protected-access
            with tf.control_dependencies([train_op]):
                look_ahead_op = self._look_ahead_op(var)
        return tf.group(init_op, train_op, look_ahead_op)

    def _resource_apply_sparse(self, grad, var, indices):
        init_op = self._init_op(var)
        with tf.control_dependencies([init_op]):
            train_op = self._optimizer._resource_apply_sparse(  # pylint: disable=protected-access
                grad, var, indices)
            with tf.control_dependencies([train_op]):
                look_ahead_op = self._look_ahead_op(var)
        return tf.group(init_op, train_op, look_ahead_op)

    def get_config(self):
        config = {
            'optimizer': tf.keras.optimizers.serialize(self._optimizer),
            'sync_period': self._serialize_hyperparameter('sync_period'),
            'slow_step_size': self._serialize_hyperparameter('slow_step_size'),
        }
        base_config = super(Lookahead, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    @property
    def learning_rate(self):
        return self._optimizer._get_hyper('learning_rate')

    @learning_rate.setter
    def learning_rate(self, learning_rate):
        self._optimizer._set_hyper('learning_rate', learning_rate)

    @property
    def lr(self):
        return self.learning_rate

    @lr.setter
    def lr(self, lr):
        self.learning_rate = lr

    @classmethod
    def from_config(cls, config, custom_objects=None):
        optimizer = tf.keras.optimizers.deserialize(
            config.pop('optimizer'),
            custom_objects=custom_objects,
        )
        return cls(optimizer, **config)

In [0]:
loss_dict={'root': 'categorical_crossentropy',
           'vowel':    'categorical_crossentropy',
           'consonant':'categorical_crossentropy'}
           
metrics_dict={ 'root':     [tf.keras.metrics.Recall(name='recall')],
               'vowel':    [tf.keras.metrics.Recall(name='recall')],
              'consonant': [tf.keras.metrics.Recall(name='recall')]}

opt = Lookahead(tf.keras.optimizers.Adam(LR_MAX))

model.compile(optimizer=opt, loss=loss_dict, loss_weights=[2,1,1]) 
                  #metrics=metrics_dict)

In [0]:
train_datagen = ImageDataGenerator(rotation_range = 10,
                                   shear_range = 0.1)

valid_datagen = ImageDataGenerator() 

In [0]:
columns=["root_class","vowel_class", "cons_class"]

train_generator1 = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=TRAIN_IMG_DIR,
        x_col="filename",
        y_col=columns,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode="multi_output",
        color_mode="grayscale")

train_generator2 = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=TRAIN_IMG_DIR,
        x_col="filename",
        y_col=columns,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode="multi_output",
        color_mode="grayscale")


valid_generator = valid_datagen.flow_from_dataframe(
        dataframe=valid_df,
        directory=TRAIN_IMG_DIR,
        x_col="filename",
        y_col=columns,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode="multi_output",
        color_mode="grayscale")


Found 160672 validated image filenames.
Found 160672 validated image filenames.
Found 40168 validated image filenames.


In [0]:
def split_into_3_outputs(y_batch):
    
    y_root =tf.keras.utils.to_categorical(y_batch[0],168)
    y_vowel=tf.keras.utils.to_categorical(y_batch[1],11)
    y_cons =tf.keras.utils.to_categorical(y_batch[2],7)
    
    return y_root,y_vowel,y_cons

In [0]:
def aux_data_gen(generator):
    while True:
        batch = next(generator)
        batch_x = (batch[0].astype(np.float32)/255.0 - STATS[0])/STATS[1]
        yield batch_x, split_into_3_outputs(batch[1])

In [0]:
def mixup_data_gen(generator1, generator2, alpha=0.4):
    while True:
      x1,y1 = next(generator1)
      x2,y2 = next(generator2)
      bs = x1.shape[0]
      l = np.random.beta(alpha, alpha, bs)

      y1_root =  y1[0]
      y1_vowel = y1[1]
      y1_cons =  y1[2]

      y2_root =  y2[0]
      y2_vowel = y2[1]
      y2_cons =  y2[2]

      x_l = l.reshape(bs, 1, 1, 1)
      y_l = l.reshape(bs, 1)

      x = x1 * x_l + x2 * (1 - x_l)

      y_root =  y1_root  *  y_l + y2_root  * (1 - y_l)
      y_vowel = y1_vowel *  y_l + y2_vowel * (1 - y_l)
      y_cons =  y1_cons  *  y_l + y2_cons  * (1 - y_l)

      yield x,[y_root,y_vowel,y_cons]

In [0]:
class CosineAnnealingScheduler(Callback):
    """Cosine annealing scheduler.
       reference: https://github.com/4uiiurz1/keras-cosine-annealing
    """
    
    def __init__(self, T_max, eta_max, eta_min=0, verbose=0, epoch_offset=0):
        super(CosineAnnealingScheduler, self).__init__()
        self.T_max = T_max
        self.eta_max = eta_max
        self.eta_min = eta_min
        self.verbose = verbose
        self.epoch_offset = epoch_offset

    def on_epoch_begin(self, epoch, logs=None):
        if not hasattr(self.model.optimizer, 'lr'):
            raise ValueError('Optimizer must have a "lr" attribute.')
        lr = self.eta_min + (self.eta_max - self.eta_min) * (1 + math.cos(math.pi * (epoch + self.epoch_offset) / self.T_max)) / 2
        K.set_value(self.model.optimizer.lr, lr)
        if self.verbose > 0:
            print('\nEpoch %05d: CosineAnnealingScheduler setting learning '
                  'rate to %s.' % (epoch + 1, lr))

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        logs['lr'] = K.get_value(self.model.optimizer.lr)

In [0]:
TRAIN_DIR+PRETRAINED_WEIGHTS

'./train/m_mvgg16_[2_1_1]_mixup_eps10_offset20_sz128_bs32_do0.2_Lookahead_IS_fold0.h5'

In [0]:
model.load_weights(TRAIN_DIR+PRETRAINED_WEIGHTS)

In [0]:
mixup_datagen=mixup_data_gen(aux_data_gen(train_generator1),aux_data_gen(train_generator2))
regular_datagen=aux_data_gen(train_generator1)

In [0]:
def test_batch_generator(frame, train_dir,batch_size=64):    
    
    num_imgs = len(frame)
    stats = (0.0692, 0.2051)
    
    for batch_start in range(0, num_imgs,batch_size):   
            cur_batch_size = min(num_imgs,batch_start+batch_size)-batch_start

            idx = np.arange(batch_start,batch_start+cur_batch_size)
            names_batch = frame.iloc[idx,0].values
            imgs_batch = np.zeros((cur_batch_size,128,128,1))
            
            for j in range(cur_batch_size):
                img = cv2.imread(train_dir+'/'+names_batch[j])
                img = (img.astype(np.float32)/255.0 - stats[0])/stats[1]
                imgs_batch[j,:,:,0] = img[:,:,0]

            yield imgs_batch

In [0]:
def compute_cm(frame,root_predicts,vowel_predicts,consonant_predicts):
  p_root=np.array([np.argmax(x) for x in root_predicts[:]]).reshape(-1)
  p_vowel = np.array([np.argmax(x) for x in vowel_predicts[:]]).reshape(-1)
  p_consonant = np.array([np.argmax(x) for x in consonant_predicts[:]]).reshape(-1)
  l = len(p_root)
  t_root=np.array(frame.iloc[:l,1].values, dtype=int)
  t_vowel=np.array(frame.iloc[:l,2].values, dtype=int)
  t_consonant=np.array(frame.iloc[:l,3].values, dtype=int)
  
  vowel_cm = confusion_matrix(t_vowel, p_vowel)
  vowel_recalls = np.diag(vowel_cm / np.sum(vowel_cm, axis = 1))

  cons_cm = confusion_matrix(t_consonant,p_consonant)
  cons_recalls = np.diag(cons_cm / np.sum(cons_cm, axis = 1))

  root_cm = confusion_matrix(t_root, p_root)
  root_recalls = np.diag(root_cm / np.sum(root_cm, axis = 1))

  return root_recalls,vowel_recalls,cons_recalls

In [0]:
def get_p_dicts(model,generator):
    root_predicts,vowel_predicts, consonant_predicts = [],[],[]
    for batch_x in tqdm(generator):
        batch_predict = model.predict(batch_x)
        for j in range(batch_predict[0].shape[0]):
            root_predicts += [batch_predict[0][j]]
            vowel_predicts += [batch_predict[1][j]]
            consonant_predicts += [batch_predict[2][j]]
    return root_predicts,vowel_predicts,consonant_predicts

In [0]:
ca_shed = CosineAnnealingScheduler(T_max=T_MAX, eta_max=LR_MAX, eta_min=LR_MIN,verbose=1, epoch_offset=EP_OFFSET)

suffix = NET_NAME+\
                '_eps'+str(EPOCHS)+\
                '_offset'+str(EP_OFFSET)+\
                '_sz'+str(IMG_SIZE)+\
                '_bs'+str(BATCH_SIZE)+\
                '_do'+str(DROP_OUT)+\
                '_'+model.optimizer.get_config()['name']+\
                '_IS_fold'+str(FOLD)

m_cp = ModelCheckpoint(OUTPUT_DIR+'w_chk_'+suffix+'.h5',
                       monitor='val_loss',
                       verbose=1,
                       save_weights_only=True, 
                       save_best_only=True,
                       mode='min')

history=model.fit_generator(mixup_datagen,
                    validation_data=aux_data_gen(valid_generator),
                    epochs=EPOCHS,
                    steps_per_epoch=train_m//BATCH_SIZE+1,
                    validation_steps = valid_m//BATCH_SIZE+1,
                    callbacks=[ca_shed,m_cp],
                    verbose=2)

valid_gen = test_batch_generator(valid_df,TRAIN_IMG_DIR, batch_size=512)
val_root_preds,val_vowel_preds,val_consonant_preds = get_p_dicts(model,valid_gen)
val_root_recalls,val_vowel_recalls,val_cons_recalls=compute_cm(valid_df,val_root_preds,val_vowel_preds,val_consonant_preds)
val_root_recall,val_vowel_recall, val_cons_recall = np.mean(val_root_recalls),np.mean(val_vowel_recalls),np.mean(val_cons_recalls)
val_recall = 0.5*val_root_recall+0.25*(val_vowel_recall+val_cons_recall)
gc.collect()
print(f'val_root_recall: {val_root_recall} val_vowel_recall: {val_vowel_recall} val_cons_recall: {val_cons_recall} val_recall: {val_recall}')

#history.history['weighted_recall']= 0.5*np.array(history.history['root_recall'])+0.25*np.array(history.history['consonant_recall'])+0.25*np.array(history.history['vowel_recall'])
#history.history['val_weighted_recall']= 0.5*np.array(history.history['val_root_recall'])+0.25*np.array(history.history['val_consonant_recall'])+0.25*np.array(history.history['val_vowel_recall'])


model.save_weights(OUTPUT_DIR+'w_'+suffix+'.h5')
model.save(OUTPUT_DIR+'m_'+suffix+'.h5')
pd.DataFrame(history.history).to_csv(OUTPUT_DIR+'h_'+suffix+'.csv', index=False)

Instructions for updating:
Please use Model.fit, which supports generators.
Train for 5022 steps, validate for 1256 steps

Epoch 00001: CosineAnnealingScheduler setting learning rate to 0.0005005000000000001.
Epoch 1/10
Instructions for updating:
If using Keras pass *_constraint arguments to layers.

Epoch 00001: val_loss improved from inf to 0.46725, saving model to /content/drive/My Drive/kaggle/bengali/output/w_chk_mvgg16_[2_1_1]_mixup_eps10_offset20_sz128_bs32_do0.2_Lookahead_IS_fold0.h5
5022/5022 - 709s - loss: 2.5169 - root_loss: 0.8795 - vowel_loss: 0.4576 - consonant_loss: 0.3002 - val_loss: 0.4672 - val_root_loss: 0.1756 - val_vowel_loss: 0.0553 - val_consonant_loss: 0.0608

Epoch 00002: CosineAnnealingScheduler setting learning rate to 0.0004613096816839416.
Epoch 2/10

Epoch 00002: val_loss did not improve from 0.46725
5022/5022 - 692s - loss: 2.4783 - root_loss: 0.8637 - vowel_loss: 0.4543 - consonant_loss: 0.2966 - val_loss: 0.4763 - val_root_loss: 0.1782 - val_vowel_loss:

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


val_root_recall: 0.948510100587673 val_vowel_recall: 0.9832464709171589 val_cons_recall: 0.9743545324008543 val_recall: 0.9636553011233399


In [0]:
model.optimizer.get_config()

{'name': 'Lookahead',
 'optimizer': {'class_name': 'Adam',
  'config': {'amsgrad': False,
   'beta_1': 0.9,
   'beta_2': 0.999,
   'decay': 0.0,
   'epsilon': 1e-07,
   'learning_rate': 0.0001761007,
   'name': 'Adam'}},
 'slow_step_size': 0.5,
 'sync_period': 6}