<h1 style="font-family:verdana;"> <center>Human Protein Atlas - Single Cell Classification🎖 </center> </h1>

<h3><center style="color:#159364; font-family:cursive;">Training Notebook 🦾</center></h3>




In [None]:
!pip install git+https://github.com/keras-team/keras-applications.git -q

In [None]:
import keras_applications as ka
def set_to_tf(ka):
    from tensorflow.keras import backend, layers, models, utils
    ka._KERAS_BACKEND = backend
    ka._KERAS_LAYERS = layers
    ka._KERAS_MODELS = models
    ka._KERAS_UTILS = utils
    
    
set_to_tf(ka)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# ML tools 
import tensorflow as tf
from kaggle_datasets import KaggleDatasets
from keras.models import Sequential
from keras import layers
from keras.optimizers import Adam
from tensorflow.keras import Model
# import tensorflow.keras.applications.efficientnet as efn
from tensorflow.keras.applications import Xception, ResNet50V2
import os, cv2
from keras import optimizers
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

In [None]:
df = pd.read_csv('../input/hpa-csv-file/hpa_csv.csv')
display(df.head(5))
print(df.shape)

In [None]:
def load(file):
    img= cv2.imread(file)
    img= cv2.resize(img, (600, 600))
    return img

def show(df, sub_dir= 'train/'):
    df= df.iloc[0:12, :]
    f, ax= plt.subplots(2, 6, figsize=(18, 4))
    for i, row in df.iterrows():
        file= '../input/hpaimage512-data/TarName/' + sub_dir + row['ID'] + '.jpg'
        img= load(file)
        ax[i//6][i%6].imshow(img, aspect='auto')
        try:
            ax[i//6][i%6].set_title(row['Label'])
        except:
            pass
        ax[i//6][i%6].set_xticks([]); ax[i//6][i%6].set_yticks([])
    plt.suptitle(sub_dir[:-1].capitalize(), size= 20)
    plt.show()
        

In [None]:
show(df)

In [None]:
show(pd.read_csv('../input/hpa-single-cell-image-classification/sample_submission.csv'), sub_dir='test/')

In [None]:
target_cols = df.drop(['ID','Label'], axis=1).columns.to_list()

In [None]:
############# CONFIG ##############

n_classes = len(target_cols)
img_size = 300
n_epochs = 30
lr= 0.001
seed= 35
batch_size=14

In [None]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy

In [None]:
'''
Reference
https://www.kaggle.com/xhlulu/ranzcr-efficientnet-tpu-training

'''

def build_decoder(with_labels=True, target_size=(img_size, img_size), ext='jpg'):
    def decode(path):
        #print(path)
        file_bytes = tf.io.read_file(path) # Reads and outputs the entire contents of the input filename.

        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3) # Decode a PNG-encoded image to a uint8 or uint16 tensor
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3) # Decode a JPEG-encoded image to a uint8 tensor
        else:
            raise ValueError("Image extension not supported")

        img = tf.cast(img, tf.float32) / 255.0 # Casts a tensor to the type float32 and divides by 255.
        img = tf.image.resize(img, target_size) # Resizing to target size
        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)
        img = tf.image.random_contrast(img, 0.9, 1.2)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment

def build_dataset(paths, labels=None, bsize=32, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO) # overlaps data preprocessing and model execution while training
    return dset


In [None]:
COMPETITION_NAME = "hpaimage512-data"
GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)
GCS_DS_PATH

In [None]:
strategy = auto_select_accelerator()
batch_size = strategy.num_replicas_in_sync * batch_size
print('batch size', batch_size)

In [None]:
paths = GCS_DS_PATH + "/TarName/train/" + df['ID'] + '.jpg'

# Get the multi-labels
labels = df.iloc[:, 2:].values
labels.shape

In [None]:
# Train test split
train_paths, valid_paths, train_labels, valid_labels = train_test_split(paths, labels, test_size=0.12, random_state=seed)
print(train_paths.shape, valid_paths.shape)
train_labels.sum(axis=0), valid_labels.sum(axis=0)

In [None]:
# Build the tensorflow datasets

decoder = build_decoder(with_labels=True, target_size=(img_size, img_size))

# Build the tensorflow datasets
dtrain = build_dataset(
    train_paths, train_labels, bsize=batch_size, decode_fn=decoder
)

dvalid = build_dataset(
    valid_paths, valid_labels, bsize=batch_size, 
    repeat=False, shuffle=False, augment=False, decode_fn=decoder
)

In [None]:
def build_model():
    inp = layers.Input(shape = (img_size, img_size, 3))
    base = ka.efficientnet.EfficientNetB3(input_shape=(img_size, img_size, 3),weights='imagenet',
                                                include_top=False)
    
    x= base(inp)
    x= layers.GlobalAveragePooling2D()(layers.Dropout(0.1)(x))
    x= layers.Dropout(0.3)(x)
    x= layers.Dense(n_classes, 'sigmoid')(x)
    
    return Model(inp, x)
    

In [None]:
with strategy.scope():
    model= build_model()
    loss= tf.keras.losses.BinaryCrossentropy(label_smoothing=0.0)
    model.compile(optimizers.Adam(lr=lr),loss=loss,metrics=[tf.keras.metrics.AUC(multi_label=True)])

In [None]:
model.summary()

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)

In [None]:
rlr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.1, patience = 2, verbose = 1, 
                                min_delta = 1e-4, min_lr = 1e-6, mode = 'min', cooldown=1)
        
ckp = ModelCheckpoint('hpa_effb3.h5',monitor = 'val_loss',
                      verbose = 1, save_best_only = True, mode = 'min')
        
es = EarlyStopping(monitor = 'val_loss', min_delta = 1e-4, patience = 5, mode = 'min', 
                    restore_best_weights = True, verbose = 1)

In [None]:
steps_per_epoch = train_paths.shape[0] // batch_size
steps_per_epoch

In [None]:
history = model.fit(dtrain,                      
                    validation_data=dvalid,                                       
                    epochs=n_epochs,
                    callbacks=[rlr,es,ckp],
                    steps_per_epoch=steps_per_epoch,
                    verbose=1)

In [None]:
plt.figure(figsize = (12, 6))
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.plot( history.history["loss"], label = "Training Loss", marker='o')
plt.plot( history.history["val_loss"], label = "Validation Loss", marker='+')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize = (12, 6))
plt.xlabel("Epochs")
plt.ylabel("AUC")
plt.plot( history.history["auc"], label = "Training AUC" , marker='o')
plt.plot( history.history["val_auc"], label = "Validation AUC", marker='+')
plt.grid(True)
plt.legend()
plt.show()



## 🌄 Thanks for Reading

![](https://i.gifer.com/7ImI.gif)



<div class="alert alert-block alert-info" style="font-size:20px; font-family:verdana;">
 <a target="_blank" style="color:orange;">Do UPVOTE for more Motivation🤞</a>
</div>



<hr><hr><hr>

<hr>