# Hello Friends 👋

Assuming this as a **Multiclass classification** task, I'm trying-out end-to-end classification (*SparseCategoricalCrossentropy Loss*) (linking KAGGLE-data cloud bucket). 


Notebook is for-
* getting started faster
* beginners who want to try out TPU training

Special thanks to-
* https://www.kaggle.com/ks2019/happywhale-arcface-baseline-tpu
* https://www.kaggle.com/docs/tpu
* https://www.kaggle.com/product-feedback/129828

In [None]:
import numpy as np
import pandas as pd
import os, sys, cv2
from kaggle_datasets import KaggleDatasets
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

In [None]:
# data.csv excluded id_freq>150
df= pd.read_csv('../input/happy-whale-and-dolphin/sample_submission.csv')
print(df.shape)
Encoder=LabelEncoder()
Encoder.classes_ = np.load('../input/happywhale-classification-using-tpu-training/classes.npy', allow_pickle=True)
df.head()

In [None]:
df.predictions=''

In [None]:
n_classes= len(Encoder.classes_)
img_size = 480
seed= 2001
batch_size=25
n_classes

## TPU Input Pipeline
Usefull links
* https://www.tensorflow.org/guide/tpu
* https://www.tensorflow.org/guide/data_performance

In [None]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy

In [None]:
def readImg(with_labels=True, target_size=(512, 512)):
    def readOnly(path):
        file_bytes = tf.io.read_file(path)
        img = tf.image.decode_jpeg(file_bytes, channels=3)
        img= tf.cast(img, tf.float32)/255.0
        return tf.image.resize(img, target_size)
    def readWithLabels(path, label):
        return readOnly(path), label
    return readWithLabels if with_labels else readOnly

def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        #img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        return img
    def augment_with_labels(img, label):
        return augment(img), label
    return augment_with_labels if with_labels else augment

def Build_dataset(paths, labels= None, batch= batch_size,
                  decode_fn=None, augment_fn=None,
                  augment= False, repeat= True, shuffle= seed):
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(batch).prefetch(AUTO)
    
    return dset

In [None]:
DATASET_NAME = "happy-whale-and-dolphin"
strategy = auto_select_accelerator()
batch_size = strategy.num_replicas_in_sync * batch_size
print('batch size', batch_size)

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path(DATASET_NAME)
test_paths = GCS_DS_PATH + "/test_images/" + df['image']
GCS_DS_PATH

In [None]:
test_decoder = readImg(with_labels=False, target_size= (img_size, img_size))
dtest = Build_dataset(paths= test_paths, decode_fn=test_decoder,
                      labels= None, augment= False, repeat=False, shuffle=False)

## Model Predictions

In [None]:
with strategy.scope():
    model= tf.keras.models.load_model('../input/fork-of-happywhale-classification-using-tpu-tra/efficientnetv2m_v0.h5')
model.summary()

In [None]:
pred= model.predict(dtest, verbose=1)

In [None]:
def process(pred, k=4):
    pred= np.argsort(pred)[:, -k:]
    p=[]
    for itm in pred:
        s=''
        for i in Encoder.inverse_transform(itm[::-1]):
            s+= (i+' ')
        s+= 'new_individual'
        p.append(s)
    return p

In [None]:
p= process(pred, k=4)
df.predictions= p

In [None]:
df.to_csv('sample_submission.csv', index=False)
df.head()