# Want Help in Debugging below error🙏🙏
# HELP!!

In [None]:
!pip install -q efficientnet
import efficientnet.tfkeras as efn

In [None]:
import os, glob, cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from kaggle_datasets import KaggleDatasets

# ML tools 
import tensorflow as tf
from keras import backend as K
from keras import layers
from keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

In [None]:
# config
SEED= 22
IMAGE_SIZE= [256, 256]
BATCH_SIZE=16
lr= 0.0001
n_epochs= 5

ENCODER_DIM= 512
DECODER_DIM= 256

In [None]:
GCS_PATH = KaggleDatasets().get_gcs_path('setibl-256x256-tfrec-dataset')
GCS_PATH

In [None]:
train_tfrec= np.sort(np.array(tf.io.gfile.glob(GCS_PATH + '/train*.tfrec')))
TEST_tfrec= np.sort(np.array(tf.io.gfile.glob(GCS_PATH + '/test*.tfrec')))

TRAIN_tfrec, VALID_tfrec= train_test_split(train_tfrec,
                            test_size=0.2, random_state= SEED)
len(TRAIN_tfrec), len(VALID_tfrec)

In [None]:
# Detect hardware, set appropriate distribution strategy (GPU/TPU)
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
        
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy

TPU=True
strategy = auto_select_accelerator()
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')
BATCH_SIZE= BATCH_SIZE*REPLICAS
print(f'BATCH_SIZE: {BATCH_SIZE}')

In [None]:
def augment(img):
        img = tf.image.random_flip_left_right(img)
        #img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        return img
    

def decode_tfrecord(record_bytes):
    feature= tf.io.parse_single_example(record_bytes, {
        'image':  tf.io.FixedLenFeature([], tf.string),
        'image_id': tf.io.FixedLenFeature([], tf.string),
        'target' : tf.io.FixedLenFeature([], tf.int64)
    })
    # decode the PNG and explicitly reshape to image size (required on TPU)
    image01 = tf.io.decode_png(feature['image'])    
    image01 = tf.cast(image01, tf.float32)
    image01 = tf.image.resize(image01, (IMAGE_SIZE[0], IMAGE_SIZE[1]))
    Target = feature['target']
    Target = tf.cast(Target, tf.uint8)
    return image01, Target

def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    return augment_with_labels

In [None]:
def get_train_dataset(bs= BATCH_SIZE):
    
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False
    augment_fn= build_augmenter()
    
    FNAMES_TRAIN_TFRECORDS = TRAIN_tfrec
    AUTO= tf.data.experimental.AUTOTUNE
    train_dataset = tf.data.TFRecordDataset(FNAMES_TRAIN_TFRECORDS, num_parallel_reads=AUTO)
    
    train_dataset = train_dataset.with_options(ignore_order)
    train_dataset = train_dataset.map(decode_tfrecord, num_parallel_calls=AUTO)  # optimize automatically
    train_dataset = train_dataset.map(augment_fn, num_parallel_calls=AUTO)
    train_dataset = train_dataset.repeat()
    train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=True).prefetch(AUTO)
    
    return train_dataset

def get_val_dataset(bs=BATCH_SIZE):
    ignore_order = tf.data.Options()
    
    FNAMES_TRAIN_TFRECORDS = VALID_tfrec
    AUTO= tf.data.experimental.AUTOTUNE
    val_dataset = tf.data.TFRecordDataset(FNAMES_TRAIN_TFRECORDS, num_parallel_reads=AUTO)
    #val_dataset = val_dataset.prefetch(AUTO)
    val_dataset = val_dataset.map(decode_tfrecord, num_parallel_calls=AUTO)
    val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=True)
    val_dataset = val_dataset.prefetch(AUTO)
    
    return val_dataset

In [None]:
def build_model():
    base = efn.EfficientNetB3(weights='imagenet',include_top=False)   
    inp = layers.Input(shape = (IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
    x= base(inp)
    x= tf.keras.layers.Reshape([-1, ENCODER_DIM], name= 'Reshapev1')(x)
    x= tf.keras.layers.Permute([2, 1], name= 'Permutev1')(x)
    x= layers.LSTM(DECODER_DIM, dropout=0.1, recurrent_dropout=0.1)(x)
    x= layers.Dropout(0.3)(x)
    x= layers.Dense(1, 'sigmoid')(x)
    return Model(inp, x)

def weighted_binary_crossentropy( y_true, y_pred, weight=2.1 ) :
    y_true = K.clip(y_true, K.epsilon(), 1-K.epsilon())
    y_pred = K.clip(y_pred, K.epsilon(), 1-K.epsilon())
    logloss = -(y_true * K.log(y_pred) * weight + (1 - y_true) * K.log(1 - y_pred))
    return K.mean( logloss, axis=-1)

In [None]:
with strategy.scope():
    # Build Datasets
    train_dataset = get_train_dataset()
    valid_dataset = get_val_dataset()
    
    # Building & Compiling Model
    model = build_model()
    model.compile(Adam(lr=lr), loss='bce', metrics=[tf.keras.metrics.AUC(multi_label=True)])

model.summary()

In [None]:
#load one iter sample
imgs, lbls = next(iter(train_dataset))
print(f'imgs.shape: {imgs.shape}, lbls.shape: {lbls.shape}')

In [None]:
# cell ref.- https://www.kaggle.com/usharengaraju/seti-eda-baseline-tensorflow-and-tpu
import re

def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

NUM_TRAINING_IMAGES = count_data_items(TRAIN_tfrec)
NUM_VALIDATION_IMAGES = count_data_items(VALID_tfrec)
NUM_TEST_IMAGES = count_data_items(TEST_tfrec)
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE
VALID_STEPS= NUM_VALIDATION_IMAGES//BATCH_SIZE
print(
    'Dataset | Training images: {} | Validation images: {} | Unlabeled test images: {}  | STEPS_PER_EPOCH: {}'.format(
        NUM_TRAINING_IMAGES, NUM_VALIDATION_IMAGES, NUM_TEST_IMAGES, STEPS_PER_EPOCH))

In [None]:
name= 'SETI_ED_model.h5'

rlr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.1, patience = 2, verbose = 1, 
                                min_delta = 1e-4, min_lr = 1e-6, mode = 'min', cooldown=1)
        
ckp = ModelCheckpoint(name,monitor = 'val_loss',
                      verbose = 1, save_best_only = True, mode = 'min')
        
es = EarlyStopping(monitor = 'val_loss', min_delta = 1e-4, patience = 5, mode = 'min', 
                    restore_best_weights = True, verbose = 1)

In [None]:
history = model.fit(train_dataset,                      
                    validation_data=valid_dataset,
                    validation_steps= VALID_STEPS,
                    epochs=n_epochs, callbacks=[rlr,es,ckp],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    verbose=1)

In [None]:
plt.figure(figsize = (12, 6))
plt.xlabel("Epochs"); plt.ylabel("Loss")
plt.plot( history.history["loss"], label = "Training Loss", marker='o')
plt.plot( history.history["val_loss"], label = "Validation Loss", marker='+')
plt.grid(True); plt.legend(); plt.show()

In [None]:
plt.figure(figsize = (12, 6))
plt.xlabel("Epochs"); plt.ylabel("AUC")
plt.plot( history.history["auc"], label = "Training AUC" , marker='o')
plt.plot( history.history["val_auc"], label = "Validation AUC", marker='+')
plt.grid(True);  plt.legend(); plt.show()