In [1]:
import random, os, imagesize
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import numpy as np, pandas as pd, tensorflow as tf, efficientnet.tfkeras as efn, tensorflow.keras.backend as K, matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
from vit_keras import vit, utils, visualize, layers

In [2]:
class CFG:
    model_name = 'vit_b16'
    seed = 5
    folds = 5
    verbose = 1
    
    # FOLDS TO TRAIN
    selected_folds = [0, 1, 2, 3, 4]

    # IMAGE SIZE
    img_size = [512, 512]

    # BATCH SIZE AND EPOCHS
    batch_size  = 8
    epochs      = 5

    # LOSS
    loss      = 'BCE'
    optimizer = 'Adam'

    # LEARNING RATE SCHEDULER
    scheduler   = 'exp' # Cosine

    tab_cols    = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
                   'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']
    target_col  = ['Pawpularity']

In [3]:
def seeding(SEED):
    np.random.seed(SEED)
    random.seed(SEED)
    os.environ['PYTHONHASHSEED'] = str(SEED)
    os.environ['TF_CUDNN_DETERMINISTIC'] = str(SEED)
    tf.random.set_seed(SEED)
    print('seeding done!!!')
seeding(CFG.seed)

seeding done!!!


In [4]:
def get_imgsize(row):
    width, height = imagesize.get(row['image_path'])
    row['width']  = width
    row['height'] = height
    return row

In [5]:
GCS_PATH = 'data'
df = pd.read_csv('data/train.csv')
df['image_path'] = GCS_PATH + '/train/' + df.Id + '.jpg'
tqdm.pandas(desc='train')
df = df.progress_apply(get_imgsize, axis=1)
display(df.head(2))

train:   0%|          | 0/9912 [00:00<?, ?it/s]

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity,image_path,width,height
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,63,data/train/0007de18844b0dbbb5e1f607da0606e0.jpg,405,720
1,0009c66b9439883ba2750fb825e1d7db,0,1,1,0,0,0,0,0,0,0,0,0,42,data/train/0009c66b9439883ba2750fb825e1d7db.jpg,1032,774


In [6]:
test_df  = pd.read_csv('data/test.csv')
test_df['image_path'] = GCS_PATH + '/test/' + test_df.Id + '.jpg'
tqdm.pandas(desc='test')
test_df = test_df.progress_apply(get_imgsize, axis=1)

display(test_df.head(2))

test:   0%|          | 0/8 [00:00<?, ?it/s]

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,image_path,width,height
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1,data/test/4128bae22183829d2b5fea10effdb0c3.jpg,128,128
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0,data/test/43a2262d7738e3d420d453815151079e.jpg,128,128


In [7]:
from pandas_profiling import ProfileReport
train_profile = ProfileReport(df, title="Train Data")
test_profile  = ProfileReport(test_df, title="Test Data")

In [8]:
display(train_profile)

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [9]:
display(test_profile)

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [10]:
num_bins = int(np.floor(1 + np.log2(len(df))))
df["bins"] = pd.cut(df[CFG.target_col].values.reshape(-1), bins=num_bins, labels=False)

skf = StratifiedKFold(n_splits=CFG.folds, shuffle=True, random_state=5)
for fold, (train_idx, val_idx) in enumerate(skf.split(df, df["bins"])):
    df.loc[val_idx, 'fold'] = fold
display(df.groupby(['fold', "bins"]).size())

fold  bins
0.0   0        66
      1        84
      2       221
      3       406
      4       376
             ... 
4.0   9        55
      10       40
      11       28
      12       21
      13       72
Length: 70, dtype: int64

In [11]:
def build_decoder(with_labels=True, target_size=CFG.img_size, ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")

        img = tf.image.resize(img, target_size)
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.reshape(img, [*target_size, 3])

        return img
    
    def decode_with_labels(path, label):
        return decode(path), tf.cast(label, tf.float32)/100.0
    
    return decode_with_labels if with_labels else decode


def build_dataset(paths, labels=None, batch_size=CFG.batch_size, cache=True,
                  decode_fn=None, repeat=True, shuffle=1024, 
                  cache_dir="", drop_remainder=False):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)    
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    ds = tf.data.Dataset.from_tensor_slices(slices)
    ds = ds.map(decode_fn, num_parallel_calls=AUTO)
    ds = ds.cache(cache_dir) if cache else ds
    ds = ds.repeat() if repeat else ds
    if shuffle: 
        ds = ds.shuffle(shuffle, seed=CFG.seed)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)
    ds = ds.batch(batch_size, drop_remainder=drop_remainder)
    ds = ds.prefetch(AUTO)
    return ds

In [12]:
def display_batch(batch, size=2):
    imgs, tars = batch
    plt.figure(figsize=(size*5, 5))
    for img_idx in range(size):
        plt.subplot(1, size, img_idx+1)
        plt.title(f'{CFG.target_col[0]}: {tars[img_idx].numpy()[0]}', fontsize=15)
        plt.imshow(imgs[img_idx,:, :, :])
        plt.xticks([])
        plt.yticks([])
    plt.tight_layout()
    plt.show() 

In [13]:
fold = 0
fold_df = df.query('fold==@fold')[:1000]
paths  = fold_df.image_path.tolist()
labels = fold_df[CFG.target_col].values
ds = build_dataset(paths, labels, cache=False, batch_size=CFG.batch_size,
                   repeat=True, shuffle=True)
ds = ds.unbatch().batch(CFG.batch_size)
batch = next(iter(ds))
display_batch(batch, 5)

In [14]:
name2effnet = {
    'efficientnet_b0': efn.EfficientNetB0,
    'efficientnet_b1': efn.EfficientNetB1,
    'efficientnet_b2': efn.EfficientNetB2,
    'efficientnet_b3': efn.EfficientNetB3,
    'efficientnet_b4': efn.EfficientNetB4,
    'efficientnet_b5': efn.EfficientNetB5,
    'efficientnet_b6': efn.EfficientNetB6,
    'efficientnet_b7': efn.EfficientNetB7,
}

def build_model(model_name=CFG.model_name, DIM=CFG.img_size[0], compile_model=True, include_top=False):       

    base = getattr(vit, model_name)(image_size=(DIM, DIM),
                   include_top=False, 
                   pretrained_top=False,
                   pretrained=True, 
                   weights='imagenet21k+imagenet2012')
    inp = base.inputs
    out = base.output
    out = tf.keras.layers.Dense(64,activation='selu')(out)
    out = tf.keras.layers.Dense(1, activation='sigmoid')(out)
    model = tf.keras.Model(inputs=inp, outputs=out)
    if compile_model:
        #optimizer
        opt = tf.keras.optimizers.Adam(learning_rate=0.001)
        #loss
        loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.01)
        #metric
        rmse = tf.keras.metrics.RootMeanSquaredError()
        model.compile(optimizer=opt,
                      loss=loss,
                      metrics=[rmse])
    return model

In [15]:
tmp = build_model(CFG.model_name, DIM=CFG.img_size[0], compile_model=True)

In [16]:
def get_lr_callback(batch_size=8, plot=False):
    lr_start   = 0.000005
    lr_max     = 0.00000125 * batch_size
    lr_min     = 0.000001
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
            
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
            
        elif CFG.scheduler=='exp':
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
            
        elif CFG.scheduler=='cosine':
            decay_total_epochs = CFG.epochs - lr_ramp_ep - lr_sus_ep + 3
            decay_epoch_index = epoch - lr_ramp_ep - lr_sus_ep
            phase = math.pi * decay_epoch_index / decay_total_epochs
            cosine_decay = 0.5 * (1 + math.cos(phase))
            lr = (lr_max - lr_min) * cosine_decay + lr_min
        return lr
    if plot:
        plt.figure(figsize=(10,5))
        plt.plot(np.arange(CFG.epochs), [lrfn(epoch) for epoch in np.arange(CFG.epochs)], marker='o')
        plt.xlabel('epoch'); plt.ylabel('learnig rate')
        plt.title('Learning Rate Scheduler')
        plt.show()

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

_=get_lr_callback(CFG.batch_size, plot=True )

In [None]:
oof_pred = []; oof_tar = []; oof_val = []; oof_ids = []; oof_folds = []
preds = np.zeros((test_df.shape[0],1))

for fold in np.arange(CFG.folds):
    if fold not in CFG.selected_folds:
        continue
    
    # TRAIN AND VALID DATAFRAME
    train_df = df.query("fold!=@fold")
    valid_df = df.query("fold==@fold")
    
    # CREATE TRAIN AND VALIDATION SUBSETS
    train_paths = train_df.image_path.values; train_labels = train_df[CFG.target_col].values.astype(np.float32)
    valid_paths = valid_df.image_path.values; valid_labels = valid_df[CFG.target_col].values.astype(np.float32)
    test_paths  = test_df.image_path.values
    
    # SHUFFLE IMAGE AND LABELS
    index = np.arange(len(train_paths))
    np.random.shuffle(index)
    train_paths  = train_paths[index]
    train_labels = train_labels[index]
    
    print('#'*25); print('#### FOLD',fold)
    print('#### IMAGE_SIZE: (%i, %i) | MODEL_NAME: %s | BATCH_SIZE: %i'%
          (CFG.img_size[0],CFG.img_size[1],CFG.model_name,CFG.batch_size))
    train_images = len(train_paths)
    val_images   = len(valid_paths)
    
    print('#### NUM_TRAIN %i | NUM_VALID: %i'%(train_images, val_images))
    
    # BUILD MODEL
    K.clear_session()
    model = build_model(CFG.model_name, DIM=CFG.img_size[0], compile_model=True)

    # DATASET
    train_ds = build_dataset(train_paths, train_labels, cache=False, batch_size=CFG.batch_size,
                   repeat=False, shuffle=True)
    val_ds   = build_dataset(valid_paths, valid_labels, cache=False, batch_size=CFG.batch_size,
                   repeat=False, shuffle=False)
    
    print('#'*25)   
    # SAVE BEST MODEL EACH FOLD
    sv = tf.keras.callbacks.ModelCheckpoint(
        'fold-%i.h5'%fold, monitor='val_rmse', verbose=CFG.verbose, save_best_only=True,
        save_weights_only=False, mode='min', save_freq='epoch')
    callbacks = [sv,get_lr_callback(CFG.batch_size)]

    # TRAIN
    print('Training...')
    history = model.fit(
        train_ds, 
        epochs=CFG.epochs, 
        callbacks = callbacks, 
        steps_per_epoch=len(train_paths)/CFG.batch_size,
        validation_data=val_ds, 
        verbose=CFG.verbose
    )
    
    # Loading best model for inference
    print('Loading best model...')
    model.load_weights('fold-%i.h5'%fold)
    

#########################
#### FOLD 0
#### IMAGE_SIZE: (512, 512) | MODEL_NAME: vit_b16 | BATCH_SIZE: 8
#### NUM_TRAIN 7929 | NUM_VALID: 1983
#########################
Training...
Epoch 1/5
 86/991 [=>............................] - ETA: 12:32:54 - loss: 0.8153 - root_mean_squared_error: 0.3015