In [1]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [2]:
from tensorflow.python.client import device_lib
dev=device_lib.list_local_devices()
dev[len(dev)-1]

name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15956161332
locality {
  bus_id: 1
  links {
  }
}
incarnation: 11733604899297000045
physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"

In [0]:
#global consts
IMG_SIZE=128
BATCH_SIZE=32
DROP_OUT=0.5
FOLD=0
TRAIN_DIR = './train/'
TRAIN_IMG_DIR=TRAIN_DIR+str(IMG_SIZE)
EPOCHS = 10
STATS = (0.0692, 0.2051)
NET_NAME='md121_cutout_pl'
INPUT_DIR ='/content/drive/My Drive/kaggle/bengali/input/'
OUTPUT_DIR = '/content/drive/My Drive/kaggle/bengali/output/'
OUTPUT_SUBDIR = ''
DATASET_224='224x224-bengali.zip'
DATASET_128='grapheme-imgs-128x128.zip'
TRAIN_WITH_FOLD = 'train_with_fold.csv'
PRETRAINED = True
PRETRAINED_WEIGHTS = 'w_md121_cutout_pl_eps10_offset135_sz128_bs32_do0.5_Adam_IS_fold0.h5'
LOG_FILE = OUTPUT_DIR+'log_'+NET_NAME+'.csv'
PL_THRESHOLD = 0.95

if IMG_SIZE == 128: DATASET=DATASET_128
if IMG_SIZE == 224: DATASET=DATASET_224


#cosine anneling consts
LR_MAX = 0.01
LR_MIN = 0.003
T_MAX = 60
EP_OFFSET=155

In [4]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import cv2
import math
import os
import errno
from tqdm.auto import tqdm
import gc
import time
import datetime

import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import LeakyReLU
from keras.layers.normalization import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau,LambdaCallback
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.models import load_model
from tensorflow.keras import layers
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.callbacks import ModelCheckpoint,CSVLogger
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Activation
from tensorflow.keras.utils import get_custom_objects
from sklearn.utils import class_weight
from sklearn.metrics import  recall_score, confusion_matrix

Using TensorFlow backend.


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
try:
  os.mkdir(TRAIN_DIR)
except OSError as e:
    if e.errno == errno.EEXIST:
        print(TRAIN_DIR+' already exists')
    else:
        raise
try:
  os.mkdir(TRAIN_IMG_DIR)
except OSError as e:
    if e.errno == errno.EEXIST:
        print(TRAIN_IMG_DIR+' already exists')
    else:
        raise

os.system('cp '+ '"'+INPUT_DIR+DATASET+'" ' + TRAIN_DIR)
os.system('cp '+ '"'+INPUT_DIR+TRAIN_WITH_FOLD+'" ' + TRAIN_DIR)
if PRETRAINED: os.system('cp '+ '"'+OUTPUT_DIR+OUTPUT_SUBDIR+PRETRAINED_WEIGHTS+'" '+TRAIN_DIR)

In [7]:
os.system('unzip -q '+TRAIN_DIR+DATASET+ ' -d '+ TRAIN_IMG_DIR)

0

In [8]:
!ls train

128
grapheme-imgs-128x128.zip
train_with_fold.csv
w_md121_cutout_pl_eps10_offset135_sz128_bs32_do0.5_Adam_IS_fold0.h5


In [0]:
dataset_np = pd.read_csv(TRAIN_DIR+TRAIN_WITH_FOLD).to_numpy()
dataset_np[:,0]+='.png'
dataset_m = dataset_np.shape[0]


valid_m = np.where(dataset_np[:,6]==FOLD)[0].shape[0]
train_m = dataset_m-valid_m

fold_train_inds = np.where(dataset_np[:,6] != FOLD)
fold_valid_inds = np.where(dataset_np[:,6] == FOLD)

train_np = dataset_np[fold_train_inds]
valid_np = dataset_np[fold_valid_inds]

train_df = pd.DataFrame(train_np)
valid_df = pd.DataFrame(valid_np)

train_df.rename(columns={0:'filename',1:'root_class',2:'vowel_class',3:'cons_class', 4:'grapheme'}, inplace=True)
valid_df.rename(columns={0:'filename',1:'root_class',2:'vowel_class',3:'cons_class', 4:'grapheme'}, inplace=True)

train_df.drop(columns=[5,6], inplace=True)
valid_df.drop(columns=[5,6], inplace=True)

In [0]:
cons_unique=np.unique(train_np[:,3])
cons_y_train = train_np[:,3]
vowel_unique=np.unique(train_np[:,2])
vowel_y_train = train_np[:,2]
root_unique =np.unique(train_np[:,1])
root_y_train = train_np[:,1]

cons_class_weights = class_weight.compute_class_weight('balanced',
                                                 cons_unique,
                                                 cons_y_train)

vowel_class_weights = class_weight.compute_class_weight('balanced',
                                                 vowel_unique,
                                                 vowel_y_train)

root_class_weights = class_weight.compute_class_weight('balanced',
                                                 root_unique,
                                                 root_y_train)
cons_cw_dict=dict(enumerate(cons_class_weights))
vowel_cw_dict=dict(enumerate(vowel_class_weights))
root_cw_dict=dict(enumerate(root_class_weights))

model_cw={}
model_cw['root']=root_cw_dict
model_cw['vowel']=vowel_cw_dict
model_cw['consonant']=cons_cw_dict

In [0]:
hard_roots=np.array([ 20,  32,  54,  60,  61,  62,  63,  67,  84,  85,  86, 104, 116,135, 140, 144, 145, 152, 154, 162])

In [0]:
class Mish(Activation):
    '''
    Mish Activation Function.
    .. math::
        mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
    Shape:
        - Input: Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
        - Output: Same shape as the input.
    Examples:
        >>> X = Activation('Mish', name="conv1_act")(X_input)
        reference: https://github.com/digantamisra98/Mish
    '''

    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'


def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))

get_custom_objects().update({'Mish': Mish(mish)})

In [0]:
def build_backbone(x_in, img_size=128):
  backbone_net = DenseNet121(include_top=False, weights=None, input_shape=(img_size, img_size, 1)) 
  x = backbone_net (x_in)
  x_avg = layers.GlobalAveragePooling2D()(x)
  x_max = layers.GlobalMaxPooling2D()(x)
  x = layers.Concatenate()([x_max, x_avg])
  x = Activation('Mish', name='mish_backbone') (x)

  return x

In [0]:
def build_head(x_in, n, name=None, drop_out=0.5):
  x = layers.BatchNormalization()(x_in)
  x = layers.Dropout(drop_out)(x)
  x = layers.Dense(512)(x)
  x = Activation('Mish', name='mish_act2_'+name) (x)
  x = layers.BatchNormalization()(x)
  x = layers.Dropout(drop_out)(x)
  x = layers.Dense(n, name=name, activation='softmax')(x)
  return x

In [0]:
def build_md121_v2_model(img_size=128, drop_out=0.5):
    x_in = layers.Input(shape=(img_size, img_size, 1))
    x = build_backbone(x_in, img_size)
    out_root = build_head(x, 168,'root',drop_out=drop_out)
    out_vowel = build_head(x, 11,'vowel',drop_out=drop_out)
    out_consonant = build_head(x,7,'consonant',drop_out=drop_out)
    
    model = tf.keras.Model(inputs=x_in, outputs=[out_root, out_vowel, out_consonant])
    
    return model

In [0]:
model = build_md121_v2_model(img_size=IMG_SIZE,drop_out=DROP_OUT)

In [0]:
#from tensorflow.keras.utils import plot_model
#plot_model(model, to_file='model.png',show_shapes=True)

In [0]:
#model.summary()

In [0]:
for (n, layer) in enumerate(model.layers[1].layers):
  if 'activation' in layer.get_config() and layer.get_config()['activation'] == 'relu':
    #print('replacing #{}: {}, {}'.format(n, layer, layer.activation))
    layer.activation = Mish(mish)
    #print('-> {}'.format(layer.activation))

In [0]:
import tempfile

def apply_modifications(model, custom_objects=None):
    """Applies modifications to the model layers to create a new Graph. For example, simply changing
    `model.layers[idx].activation = new activation` does not change the graph. The entire graph needs to be updated
    with modified inbound and outbound tensors because of change in layer building function.
    Args:
        model: The `keras.models.Model` instance.
    Returns:
        The modified model with changes applied. Does not mutate the original `model`.
        reference: https://github.com/raghakot/keras-vis
    """
    
    # The strategy is to save the modified model and load it back. This is done because setting the activation
    # in a Keras layer doesnt actually change the graph. We have to iterate the entire graph and change the
    # layer inbound and outbound nodes with modified tensors. This is doubly complicated in Keras 2.x since
    # multiple inbound and outbound nodes are allowed with the Graph API.
    model_path = os.path.join(tempfile.gettempdir(), next(tempfile._get_candidate_names()) + '.h5')
    try:
        model.save(model_path)
        return load_model(model_path, custom_objects=custom_objects)
    finally:
        os.remove(model_path)

In [21]:
model = apply_modifications(model, custom_objects={'mish':Mish(mish)})



In [0]:
loss_dict={'root': 'categorical_crossentropy',
           'vowel':    'categorical_crossentropy',
           'consonant':'categorical_crossentropy'}
           
metrics_dict={ 'root':     [tf.keras.metrics.Recall(name='recall')],
               'vowel':    [tf.keras.metrics.Recall(name='recall')],
              'consonant': [tf.keras.metrics.Recall(name='recall')]}

#opt = Lookahead(tf.keras.optimizers.SGD(learning_rate=LR_MAX, momentum=0.9,nesterov=True))

opt=tf.keras.optimizers.Adam(learning_rate=0.00003)

model.compile(optimizer=opt, loss=loss_dict, loss_weights=[2.0,1.0,1.0])

In [0]:
class CosineAnnealingScheduler(Callback):
    """Cosine annealing scheduler.
       reference: https://github.com/4uiiurz1/keras-cosine-annealing
    """
    
    def __init__(self, T_max, eta_max, eta_min=0, verbose=0, epoch_offset=0):
        super(CosineAnnealingScheduler, self).__init__()
        self.T_max = T_max
        self.eta_max = eta_max
        self.eta_min = eta_min
        self.verbose = verbose
        self.epoch_offset = epoch_offset

    def on_epoch_begin(self, epoch, logs=None):
        if not hasattr(self.model.optimizer, 'lr'):
            raise ValueError('Optimizer must have a "lr" attribute.')
        lr = self.eta_min + (self.eta_max - self.eta_min) * (1 + math.cos(math.pi * (epoch + self.epoch_offset) / self.T_max)) / 2
        K.set_value(self.model.optimizer.lr, lr)
        if self.verbose > 0:
            print('\nEpoch %05d: CosineAnnealingScheduler setting learning '
                  'rate to %s.' % (epoch + 1, lr))

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        logs['lr'] = K.get_value(self.model.optimizer.lr)

In [0]:
#reference https://github.com/yu4u/cutout-random-erasing/blob/master/random_eraser.py
def get_random_eraser(p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=False):
    def eraser(input_img):
        img_h, img_w, img_c = input_img.shape
        p_1 = np.random.rand()

        if p_1 > p:
            return input_img

        while True:
            s = np.random.uniform(s_l, s_h) * img_h * img_w
            r = np.random.uniform(r_1, r_2)
            w = int(np.sqrt(s / r))
            h = int(np.sqrt(s * r))
            left = np.random.randint(0, img_w)
            top = np.random.randint(0, img_h)

            if left + w <= img_w and top + h <= img_h:
                break

        if pixel_level:
            c = np.random.uniform(v_l, v_h, (h, w, img_c))
        else:
            c = np.random.uniform(v_l, v_h)

        input_img[top:top + h, left:left + w, :] = c

        return input_img

    return eraser

In [0]:
def split_into_3_outputs(y_batch):
    
    y_root =tf.keras.utils.to_categorical(y_batch[0],168)
    y_vowel=tf.keras.utils.to_categorical(y_batch[1],11)
    y_cons =tf.keras.utils.to_categorical(y_batch[2],7)
    
    return y_root,y_vowel,y_cons

In [0]:
def aux_data_gen(generator):
    while True:
        batch = next(generator)
        batch_x = (batch[0].astype(np.float32)/255.0 - STATS[0])/STATS[1]
        yield batch_x, split_into_3_outputs(batch[1])

In [0]:
def mixup_data_gen(generator1, generator2, alpha=0.4):
    while True:
      x1,y1 = next(generator1)
      x2,y2 = next(generator2)
      bs = x1.shape[0]
      l = np.random.beta(alpha, alpha, bs)

      y1_root =  y1[0]
      y1_vowel = y1[1]
      y1_cons =  y1[2]

      y2_root =  y2[0]
      y2_vowel = y2[1]
      y2_cons =  y2[2]

      x_l = l.reshape(bs, 1, 1, 1)
      y_l = l.reshape(bs, 1)

      x = x1 * x_l + x2 * (1 - x_l)

      y_root =  y1_root  *  y_l + y2_root  * (1 - y_l)
      y_vowel = y1_vowel *  y_l + y2_vowel * (1 - y_l)
      y_cons =  y1_cons  *  y_l + y2_cons  * (1 - y_l)

      yield x,[y_root,y_vowel,y_cons]

In [0]:
def test_batch_generator(frame, train_dir,batch_size=64):    
    
    num_imgs = len(frame)
    stats = (0.0692, 0.2051)
    
    for batch_start in range(0, num_imgs,batch_size):   
            cur_batch_size = min(num_imgs,batch_start+batch_size)-batch_start

            idx = np.arange(batch_start,batch_start+cur_batch_size)
            names_batch = frame.iloc[idx,0].values
            imgs_batch = np.zeros((cur_batch_size,128,128,1))
            
            for j in range(cur_batch_size):
                img = cv2.imread(train_dir+'/'+names_batch[j],0)
                img = (img.astype(np.float32)/255.0 - stats[0])/stats[1]
                imgs_batch[j,:,:,0] = img

            yield imgs_batch

In [0]:
def compute_cm(frame,root_predicts,vowel_predicts,consonant_predicts):
  p_root=np.array([np.argmax(x) for x in root_predicts[:]]).reshape(-1)
  p_vowel = np.array([np.argmax(x) for x in vowel_predicts[:]]).reshape(-1)
  p_consonant = np.array([np.argmax(x) for x in consonant_predicts[:]]).reshape(-1)
  l = len(p_root)
  t_root=np.array(frame.iloc[:l,1].values, dtype=int)
  t_vowel=np.array(frame.iloc[:l,2].values, dtype=int)
  t_consonant=np.array(frame.iloc[:l,3].values, dtype=int)
  
  vowel_cm = confusion_matrix(t_vowel, p_vowel)
  vowel_recalls = np.diag(vowel_cm / np.sum(vowel_cm, axis = 1))

  cons_cm = confusion_matrix(t_consonant,p_consonant)
  cons_recalls = np.diag(cons_cm / np.sum(cons_cm, axis = 1))

  root_cm = confusion_matrix(t_root, p_root)
  root_recalls = np.diag(root_cm / np.sum(root_cm, axis = 1))

  return root_recalls,vowel_recalls,cons_recalls

In [0]:
def compute_recall(frame,root_predicts,vowel_predicts,consonant_predicts):
    
    p_root=np.array([np.argmax(x) for x in root_predicts[:]]).reshape(-1)
    p_vowel = np.array([np.argmax(x) for x in vowel_predicts[:]]).reshape(-1)
    p_consonant = np.array([np.argmax(x) for x in consonant_predicts[:]]).reshape(-1)
    l = len(p_root)
    t_root=np.array(frame.iloc[:l,1].values, dtype=int)
    t_vowel=np.array(frame.iloc[:l,2].values, dtype=int)
    t_consonant=np.array(frame.iloc[:l,3].values, dtype=int)
    root_recall = recall_score(t_root, p_root, average='macro')
    vowel_recall = recall_score(t_vowel, p_vowel, average='macro')
    cons_recall = recall_score(t_consonant,p_consonant,average='macro')

    return root_recall,vowel_recall, cons_recall

In [0]:
def get_p_dicts(model,generator):
    root_predicts,vowel_predicts, consonant_predicts = [],[],[]
    for batch_x in tqdm(generator):
        batch_predict = model.predict(batch_x)
        for j in range(batch_predict[0].shape[0]):
            root_predicts += [batch_predict[0][j]]
            vowel_predicts += [batch_predict[1][j]]
            consonant_predicts += [batch_predict[2][j]]
    return root_predicts,vowel_predicts,consonant_predicts

In [0]:
def get_p_dicts_fromnp(model,v_np, batch_size=128):
  root_predicts,vowel_predicts, consonant_predicts = [],[],[]
  num_imgs = v_np.shape[0]
  for batch_start in tqdm(range(0, num_imgs,batch_size)):
    cur_batch_size = min(num_imgs,batch_start+batch_size)-batch_start
    idx = np.arange(batch_start,batch_start+cur_batch_size)
    predict = model.predict(v_np[idx])
    for j in range(cur_batch_size):
      root_predicts += [predict[0][j]]
      vowel_predicts += [predict[1][j]]
      consonant_predicts += [predict[2][j]]
  return root_predicts,vowel_predicts,consonant_predicts

In [33]:
TRAIN_DIR+PRETRAINED_WEIGHTS

'./train/w_md121_cutout_pl_eps10_offset135_sz128_bs32_do0.5_Adam_IS_fold0.h5'

In [0]:
if PRETRAINED: model.load_weights(TRAIN_DIR+PRETRAINED_WEIGHTS)

In [35]:
#pseudo labeling

pl_generator = test_batch_generator(valid_df,TRAIN_IMG_DIR,batch_size=512)

pl_root_predicts,pl_vowel_predicts, pl_consonant_predicts = [],[],[]
for pl_batch_x in tqdm(pl_generator):
        pl_batch_predict = model.predict(pl_batch_x)
        for j in range(pl_batch_predict[0].shape[0]):
            pl_root_predicts += [pl_batch_predict[0][j]]
            pl_vowel_predicts += [pl_batch_predict[1][j]]
            pl_consonant_predicts += [pl_batch_predict[2][j]]

pl_root=np.array([[np.argmax(x),np.max(x)] for x in pl_root_predicts[:]]).reshape(-1,2)
pl_vowel = np.array([[np.argmax(x),np.max(x)] for x in pl_vowel_predicts[:]]).reshape(-1,2)
pl_consonant = np.array([[np.argmax(x),np.max(x)] for x in pl_consonant_predicts[:]]).reshape(-1,2)

pl_graphem = np.concatenate((pl_root,pl_vowel,pl_consonant), axis=1)

pl_indices = []
for j in range(pl_graphem.shape[0]):
  if pl_graphem[j,1]>PL_THRESHOLD and pl_graphem[j,3]>PL_THRESHOLD and pl_graphem[j,5]>PL_THRESHOLD:
    pl_indices.append(j)

pl_train_df = valid_df.iloc[pl_indices]

pl_train_m = len(pl_train_df)+len(train_df)

pl_ratio = len(pl_train_df)/pl_train_m

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [36]:
pl_ratio

0.18354404650595552

In [0]:
train_datagen = ImageDataGenerator(rotation_range = 20,
                                   shear_range = 0.2,
                                   zoom_range = 0.1,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   preprocessing_function=get_random_eraser(s_l=0.05,s_h=0.1,r_1=0.8,r_2=1.2,v_l=0,v_h=0))

pl_train_datagen = ImageDataGenerator(rotation_range = 20,
                                   shear_range = 0.2,
                                   zoom_range = 0.1,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   preprocessing_function=get_random_eraser(p=1.0,s_l=0.05,s_h=0.1,r_1=0.8,r_2=1.2,v_l=0,v_h=0))

In [38]:
columns=["root_class","vowel_class", "cons_class"]
pl_train_generator = pl_train_datagen.flow_from_dataframe(
        dataframe=pl_train_df,
        directory=TRAIN_IMG_DIR,
        x_col="filename",
        y_col=columns,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode="multi_output",
        color_mode="grayscale")

train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=TRAIN_IMG_DIR,
        x_col="filename",
        y_col=columns,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode="multi_output",
        color_mode="grayscale")

Found 36120 validated image filenames.
Found 160672 validated image filenames.


In [0]:
def pl_aux_data_gen(main_generator, pl_generator):
  while True:
    p = np.random.rand()
    if p < 1.0 - pl_ratio:
      batch_x,y = next(main_generator)
    else:
      batch_x,y= next(pl_generator)

    yield batch_x,y

In [0]:
#mixup_datagen=mixup_data_gen(aux_data_gen(train_generator1),aux_data_gen(train_generator2))
pl_cutout_datagen=pl_aux_data_gen(aux_data_gen(train_generator),aux_data_gen(pl_train_generator))

In [0]:
class KaggleValidation(Callback):
    
    def __init__(self, valid_df,train_steps, vals_per_epoch=1,batch_size = 32, logfile=None, initial_epoch=0):
        super().__init__()
        self.valid_df = valid_df
        self.batch_size = batch_size
        self.current_epoch=initial_epoch
        self.num_batches = train_steps
        self.val_steps = train_steps//vals_per_epoch
        self.logfile = logfile
        self.best_kr=0.9700
        self.best_v_kr = 0.986
        self.best_c_kr = 0.982
        self.best_r_kr = 0.959
        self.best_comb_kr = 0.9700
       
    def do_savelog(self):
      log_df = pd.DataFrame()
      log_df['epoch']=self.iters
      log_df['lr']=self.lr_values
      log_df['loss']=self.losses
      log_df['root_loss']=self.root_losses
      log_df['vowel_loss']=self.vowel_losses
      log_df['consonant_loss']=self.cons_losses
      log_df['kaggle']=self.val_kaggle_recalls
      log_df['root']=self.val_root_recalls
      log_df['vowel']=self.val_vowel_recalls
      log_df['cons'] =self.val_consonant_recalls
      log_df['time']=self.time_stamp
      log_df.to_csv(self.logfile, index=False, float_format='%.6f')
      
    def do_validation(self,num_iter=0, logs={}):
        valid_gen = test_batch_generator(self.valid_df,TRAIN_IMG_DIR, batch_size=self.batch_size)
        val_root_preds,val_vowel_preds,val_consonant_preds = get_p_dicts(self.model,valid_gen)
        val_root_recall,val_vowel_recall, val_cons_recall = compute_recall(self.valid_df,
                                                                           val_root_preds,
                                                                           val_vowel_preds,
                                                                           val_consonant_preds)
        val_kaggle_recall = 0.5*val_root_recall+0.25*(val_vowel_recall+val_cons_recall)
        loc_time = math.floor(time.time()-self.time_start)
        print(f'[{str(datetime.timedelta(seconds=loc_time))}] - kaggle:{val_kaggle_recall:.4f} - root:{val_root_recall:.4f} -\
 vowel:{val_vowel_recall:.4f} - cons: {val_cons_recall:.4f}')
        self.val_root_recalls.append(val_root_recall)
        self.val_vowel_recalls.append(val_vowel_recall)
        self.val_consonant_recalls.append(val_cons_recall)
        self.val_kaggle_recalls.append(val_kaggle_recall)
        self.time_stamp.append(str(datetime.timedelta(seconds=loc_time)))
        self.iters.append(num_iter)
        rl = logs.get('root_loss')
        vl = logs.get('vowel_loss')
        cl = logs.get('consonant_loss')
        self.losses.append(2*rl+vl+cl)
        self.root_losses.append(rl)
        self.vowel_losses.append(vl)
        self.cons_losses.append(cl)
        self.lr_values.append(K.get_value(self.model.optimizer.lr))

        if self.logfile != None: self.do_savelog()

        if val_kaggle_recall>self.best_kr:
          self.best_kr = val_kaggle_recall
          print(f'saving weights with kr {self.best_kr}...')
          self.model.save_weights(OUTPUT_DIR+f'w_kr_{self.best_kr:.4f}_'+suffix+'.h5')
        
        preffix = 'w_best_'
        save_weights = False

        if val_root_recall>self.best_r_kr:
          self.best_r_kr = val_root_recall
          preffix = preffix + f'[r {self.best_r_kr:.5f}]'
          save_weights=True
        
        if val_vowel_recall>self.best_v_kr:
          self.best_v_kr = val_vowel_recall
          preffix = preffix + f'[v {self.best_v_kr:.5f}]'
          save_weights=True
        
        if val_cons_recall>self.best_c_kr:
          self.best_c_kr = val_cons_recall
          preffix = preffix + f'[c {self.best_c_kr:.5f}]'
          save_weights=True
        
        if save_weights==True:
          self.best_comb_kr = 0.5*self.best_r_kr+0.25*(self.best_v_kr+self.best_c_kr)
          print(f'saving weights {preffix} best combined kaggle recall: {self.best_comb_kr}...')
          self.model.save_weights(OUTPUT_DIR+preffix+'_'+suffix+'.h5')

    def on_train_begin(self, logs={}):
        self.val_root_recalls = []
        self.val_vowel_recalls =[]
        self.val_consonant_recalls = []
        self.val_kaggle_recalls = []
        self.time_stamp = []
        self.iters = []
        self.time_start = time.time()
        self.losses = []
        self.root_losses = []
        self.vowel_losses = []
        self.cons_losses = []
        self.lr_values = []
        
    def on_batch_end(self,batch,logs={}):
      if batch%self.val_steps == self.val_steps-1: 
        print(f'\nbatch {batch}: validation...')
        num_iter = self.current_epoch + batch/self.num_batches
        self.do_validation(num_iter=num_iter,logs=logs)
        gc.collect()
    
    def on_epoch_end(self, epoch, logs={}):
      self.current_epoch +=1

In [45]:
ca_shed = CosineAnnealingScheduler(T_max=T_MAX, eta_max=LR_MAX, eta_min=LR_MIN,verbose=1, epoch_offset=EP_OFFSET)

suffix = NET_NAME+\
                '_eps'+str(EPOCHS)+\
                '_offset'+str(EP_OFFSET)+\
                '_sz'+str(IMG_SIZE)+\
                '_bs'+str(BATCH_SIZE)+\
                '_do'+str(DROP_OUT)+\
                '_'+model.optimizer.get_config()['name']+\
                '_IS_fold'+str(FOLD)


kv = KaggleValidation(valid_df,
                      batch_size=512,
                      vals_per_epoch=2,
                      train_steps=pl_train_m//BATCH_SIZE+1,
                      logfile=OUTPUT_DIR+'log_kr_'+suffix+'.csv',
                      initial_epoch=EP_OFFSET)

history=model.fit(pl_cutout_datagen,
                  epochs=EPOCHS,
                  steps_per_epoch=pl_train_m//BATCH_SIZE+1,
                  callbacks=[kv],
                  #class_weight=model_cw,
                  verbose=2)


model.save_weights(OUTPUT_DIR+'w_'+suffix+'.h5')
#model.save(OUTPUT_DIR+'m_'+suffix+'.h5')
#pd.DataFrame(history.history).to_csv(OUTPUT_DIR+'h_'+suffix+'.csv', index=False)

Train for 6150 steps
Epoch 1/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[0:07:32] - kaggle:0.9717 - root:0.9596 - vowel:0.9856 - cons: 0.9821
saving weights with kr 0.9717388034436625...
saving weights w_best_[r 0.95960][c 0.98212] best combined kaggle recall: 0.971829810873579...

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[0:14:47] - kaggle:0.9717 - root:0.9597 - vowel:0.9853 - cons: 0.9820
saving weights w_best_[r 0.95968] best combined kaggle recall: 0.971871702355064...
6150/6150 - 889s - loss: 0.3038 - root_loss: 0.1132 - vowel_loss: 0.0372 - consonant_loss: 0.0400
Epoch 2/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[0:22:02] - kaggle:0.9718 - root:0.9599 - vowel:0.9856 - cons: 0.9819
saving weights with kr 0.9718495876853606...
saving weights w_best_[r 0.95995] best combined kaggle recall: 0.9720047861278455...

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[0:29:18] - kaggle:0.9717 - root:0.9595 - vowel:0.9858 - cons: 0.9822
saving weights w_best_[c 0.98219] best combined kaggle recall: 0.972021426692505...
6150/6150 - 870s - loss: 0.3026 - root_loss: 0.1121 - vowel_loss: 0.0395 - consonant_loss: 0.0409
Epoch 3/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[0:36:33] - kaggle:0.9718 - root:0.9598 - vowel:0.9853 - cons: 0.9823
saving weights w_best_[c 0.98232] best combined kaggle recall: 0.9720535836592288...

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[0:43:48] - kaggle:0.9718 - root:0.9600 - vowel:0.9856 - cons: 0.9816
saving weights w_best_[r 0.96000] best combined kaggle recall: 0.9720796248627666...
6150/6150 - 870s - loss: 0.2983 - root_loss: 0.1117 - vowel_loss: 0.0383 - consonant_loss: 0.0371
Epoch 4/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[0:51:06] - kaggle:0.9718 - root:0.9598 - vowel:0.9859 - cons: 0.9817

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[0:58:25] - kaggle:0.9719 - root:0.9599 - vowel:0.9854 - cons: 0.9825
saving weights with kr 0.9719467634180334...
saving weights w_best_[c 0.98248] best combined kaggle recall: 0.9721217748883092...
6150/6150 - 877s - loss: 0.3083 - root_loss: 0.1161 - vowel_loss: 0.0397 - consonant_loss: 0.0407
Epoch 5/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[1:05:47] - kaggle:0.9713 - root:0.9592 - vowel:0.9860 - cons: 0.9809

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[1:13:10] - kaggle:0.9718 - root:0.9598 - vowel:0.9854 - cons: 0.9822
6150/6150 - 884s - loss: 0.3006 - root_loss: 0.1109 - vowel_loss: 0.0386 - consonant_loss: 0.0383
Epoch 6/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[1:20:33] - kaggle:0.9716 - root:0.9592 - vowel:0.9860 - cons: 0.9821

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[1:27:56] - kaggle:0.9718 - root:0.9596 - vowel:0.9859 - cons: 0.9822
6150/6150 - 886s - loss: 0.3027 - root_loss: 0.1154 - vowel_loss: 0.0392 - consonant_loss: 0.0392
Epoch 7/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[1:35:14] - kaggle:0.9715 - root:0.9593 - vowel:0.9853 - cons: 0.9822

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[1:42:33] - kaggle:0.9718 - root:0.9595 - vowel:0.9861 - cons: 0.9822
saving weights w_best_[v 0.98606] best combined kaggle recall: 0.9721372625526012...
6150/6150 - 878s - loss: 0.3002 - root_loss: 0.1103 - vowel_loss: 0.0385 - consonant_loss: 0.0385
Epoch 8/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[1:49:51] - kaggle:0.9714 - root:0.9591 - vowel:0.9858 - cons: 0.9817

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[1:57:09] - kaggle:0.9717 - root:0.9594 - vowel:0.9858 - cons: 0.9822
6150/6150 - 875s - loss: 0.2965 - root_loss: 0.1083 - vowel_loss: 0.0372 - consonant_loss: 0.0393
Epoch 9/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[2:04:28] - kaggle:0.9719 - root:0.9602 - vowel:0.9854 - cons: 0.9820
saving weights w_best_[r 0.96016] best combined kaggle recall: 0.9722146200289237...

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[2:11:51] - kaggle:0.9717 - root:0.9599 - vowel:0.9855 - cons: 0.9813
6150/6150 - 882s - loss: 0.3010 - root_loss: 0.1120 - vowel_loss: 0.0393 - consonant_loss: 0.0382
Epoch 10/10

batch 3074: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[2:19:15] - kaggle:0.9718 - root:0.9598 - vowel:0.9857 - cons: 0.9821

batch 6149: validation...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


[2:26:39] - kaggle:0.9715 - root:0.9595 - vowel:0.9848 - cons: 0.9820
6150/6150 - 889s - loss: 0.2971 - root_loss: 0.1101 - vowel_loss: 0.0374 - consonant_loss: 0.0385


In [0]:
model.optimizer.get_config()

{'amsgrad': False,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'decay': 0.0,
 'epsilon': 1e-07,
 'learning_rate': 0.0003,
 'name': 'Adam'}

In [0]:
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf


@tf.keras.utils.register_keras_serializable(package='Addons')
class Lookahead(tf.keras.optimizers.Optimizer):
    """This class allows to extend optimizers with the lookahead mechanism.
    The mechanism is proposed by Michael R. Zhang et.al in the paper
    [Lookahead Optimizer: k steps forward, 1 step back]
    (https://arxiv.org/abs/1907.08610v1). The optimizer iteratively updates two
    sets of weights: the search directions for weights are chosen by the inner
    optimizer, while the "slow weights" are updated each `k` steps based on the
    directions of the "fast weights" and the two sets of weights are
    synchronized. This method improves the learning stability and lowers the
    variance of its inner optimizer.
    Example of usage:
    ```python
    opt = tf.keras.optimizers.SGD(learning_rate)
    opt = tfa.optimizers.Lookahead(opt)
    ```
    """

    def __init__(self,
                 optimizer,
                 sync_period=6,
                 slow_step_size=0.5,
                 name="Lookahead",
                 **kwargs):
        r"""Wrap optimizer with the lookahead mechanism.
        Args:
            optimizer: The original optimizer that will be used to compute
                and apply the gradients.
            sync_period: An integer. The synchronization period of lookahead.
                Enable lookahead mechanism by setting it with a positive value.
            slow_step_size: A floating point value.
                The ratio for updating the slow weights.
            name: Optional name for the operations created when applying
                gradients. Defaults to "Lookahead".
            **kwargs: keyword arguments. Allowed to be {`clipnorm`,
                `clipvalue`, `lr`, `decay`}. `clipnorm` is clip gradients
                by norm; `clipvalue` is clip gradients by value, `decay` is
                included for backward compatibility to allow time inverse
                decay of learning rate. `lr` is included for backward
                compatibility, recommended to use `learning_rate` instead.
        """
        super(Lookahead, self).__init__(name, **kwargs)

        if isinstance(optimizer, str):
            optimizer = tf.keras.optimizers.get(optimizer)
        if not isinstance(optimizer, tf.keras.optimizers.Optimizer):
            raise TypeError(
                "optimizer is not an object of tf.keras.optimizers.Optimizer")

        self._optimizer = optimizer
        self._set_hyper('sync_period', sync_period)
        self._set_hyper('slow_step_size', slow_step_size)
        self._initialized = False

    def _create_slots(self, var_list):
        self._optimizer._create_slots(var_list=var_list)  # pylint: disable=protected-access
        for var in var_list:
            self.add_slot(var, 'slow')

    def _create_hypers(self):
        self._optimizer._create_hypers()  # pylint: disable=protected-access

    def _prepare(self, var_list):
        return self._optimizer._prepare(var_list=var_list)  # pylint: disable=protected-access

    def apply_gradients(self, grads_and_vars, name=None):
        self._optimizer._iterations = self.iterations  # pylint: disable=protected-access
        return super(Lookahead, self).apply_gradients(grads_and_vars, name)

    def _init_op(self, var):
        slow_var = self.get_slot(var, 'slow')
        return slow_var.assign(
            tf.where(
                tf.equal(self.iterations,
                         tf.constant(0, dtype=self.iterations.dtype)),
                var,
                slow_var,
            ),
            use_locking=self._use_locking)

    def _look_ahead_op(self, var):
        var_dtype = var.dtype.base_dtype
        slow_var = self.get_slot(var, 'slow')
        local_step = tf.cast(self.iterations + 1, tf.dtypes.int64)
        sync_period = self._get_hyper('sync_period', tf.dtypes.int64)
        slow_step_size = self._get_hyper('slow_step_size', var_dtype)
        step_back = slow_var + slow_step_size * (var - slow_var)
        sync_cond = tf.equal(
            tf.math.floordiv(local_step, sync_period) * sync_period,
            local_step)
        with tf.control_dependencies([step_back]):
            slow_update = slow_var.assign(
                tf.where(
                    sync_cond,
                    step_back,
                    slow_var,
                ),
                use_locking=self._use_locking)
            var_update = var.assign(
                tf.where(
                    sync_cond,
                    step_back,
                    var,
                ),
                use_locking=self._use_locking)
        return tf.group(slow_update, var_update)

    @property
    def weights(self):
        return self._weights + self._optimizer.weights

    def _resource_apply_dense(self, grad, var):
        init_op = self._init_op(var)
        with tf.control_dependencies([init_op]):
            train_op = self._optimizer._resource_apply_dense(grad, var)  # pylint: disable=protected-access
            with tf.control_dependencies([train_op]):
                look_ahead_op = self._look_ahead_op(var)
        return tf.group(init_op, train_op, look_ahead_op)

    def _resource_apply_sparse(self, grad, var, indices):
        init_op = self._init_op(var)
        with tf.control_dependencies([init_op]):
            train_op = self._optimizer._resource_apply_sparse(  # pylint: disable=protected-access
                grad, var, indices)
            with tf.control_dependencies([train_op]):
                look_ahead_op = self._look_ahead_op(var)
        return tf.group(init_op, train_op, look_ahead_op)

    def get_config(self):
        config = {
            'optimizer': tf.keras.optimizers.serialize(self._optimizer),
            'sync_period': self._serialize_hyperparameter('sync_period'),
            'slow_step_size': self._serialize_hyperparameter('slow_step_size'),
        }
        base_config = super(Lookahead, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    @property
    def learning_rate(self):
        return self._optimizer._get_hyper('learning_rate')

    @learning_rate.setter
    def learning_rate(self, learning_rate):
        self._optimizer._set_hyper('learning_rate', learning_rate)

    @property
    def lr(self):
        return self.learning_rate

    @lr.setter
    def lr(self, lr):
        self.learning_rate = lr

    @classmethod
    def from_config(cls, config, custom_objects=None):
        optimizer = tf.keras.optimizers.deserialize(
            config.pop('optimizer'),
            custom_objects=custom_objects,
        )
        return cls(optimizer, **config)