Thanks to https://www.kaggle.com/xhlulu/ranzcr-efficientnet-tpu-training

# Setup

In [None]:
!pip install efficientnet -q

In [None]:
import os

# Yes, efficientnet is actually a PyPI package that you can install and use. 

# Here are 2 links leading to documentation for how to use this package.
# https://github.com/qubvel/efficientnet
# https://pypi.org/project/efficientnet/#examples
import efficientnet.tfkeras as efn

import numpy as np
import pandas as pd

# kaggle_datasets is a built-in package and can be used to retrieve datasets.
# However, it is optional. You can manually navigate to the left side,
# click "Add data" and then retrieve the data via the directory:
# "/kaggle/input/dataset_name".
from kaggle_datasets import KaggleDatasets

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras

# GroupKFold is just KFold except it will produce unique and distinct
# groups so that no 2 groups are the same.
# Refer to Scikit-learn's User's Guide:
# https://scikit-learn.org/stable/auto_examples/model_selection/plot_cv_indices.html#sphx-glr-auto-examples-model-selection-plot-cv-indices-py.
from sklearn.model_selection import GroupKFold

#### Utility Functions for Building Dataset

In [None]:
# This function basically is called with setting your Kaggle notebook's accelerator to TPUv#-#.
# By setting your accelerator to TPUv#-#, you don't necessarily get access to TPUs.
# You need to additionally let TensorFlow know that you are using TPUs through this.
# Refer to this video: https://www.youtube.com/watch?v=1pdwRQ1DQfY&t=121s.
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())  # Get the Master string to be used for the session.
    except ValueError:
        # This gets the current strategy in the case the user can't use the TF TPUs.
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")  # Number of cores on the TPU board.
    
    return strategy


# This wrapper function basically returns a function (this function varies
# depending on whether you specify labels or not) that will 
# decode your image to either png or jpg with 3 color channels (RGB).
# Then it will cast everything to tf.float32 and normalize it, then resize
# it to your target size (target size is a tuple of height and width in that order).
def build_decoder(with_labels=True, target_size=(256, 256), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)

        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


# This wrapper function returns a function (this function varies
# depending on whether you specify labels or not) that will 
# flip your image left, right, up, down randomly.
def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment

# I included these 2 functions to let you run through what is going on

# train_dataset = build_dataset(
#        train_paths, train_labels, bsize=BATCH_SIZE, decode_fn=decoder
#    )

# IMSIZE[IMS] is 600
# decoder = build_decoder(with_labels=True, target_size=(IMSIZE[IMS], IMSIZE[IMS]), ext='png')


# Build the Tf.data.Dataset obj from a list of paths, labels,
# a batch size, cache, shuffle, repeat, and the decode
# and augment functions.
def build_dataset(paths, labels=None, bsize=128, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    
    # If cache is true, and a directory for where to cache is specified
    # make the cache directory.
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    # If you specify no decode function, build a default one.
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    # If you specify no augment function, build a default one.
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    # Good for letting TensorFlow pick what is right for your ML lifecycle.
    AUTO = tf.data.experimental.AUTOTUNE
    
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    
    # Repeat is called because the data passed through this function
    # will be used multiple times.
    dset = dset.repeat() if repeat else dset
    
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

#### Pre-Set Parameters

In [None]:
COMPETITION_NAME = "siimcovid19-512-img-png-600-study-png"
strategy = auto_select_accelerator()

# This is the batch size because each a TPU board has 4 parts and each part has 2 TPU cores.
# Each one will take a batch of 16 images (16 is a standard size here).
BATCH_SIZE = strategy.num_replicas_in_sync * 16

# This basically tells Kaggle to take the dataset in your
# "/kaggle/input/" directory with the name "COMPETITION_NAME"
# and put it in a Google Cloud Storage (GCS) bucket.
# Putting it in a Kaggle GCS bucket colocates the TPU board with the dataset
# for optimal TPU performance.
# Remember, this is the path to our dataset (except its not in "/kaggle/input", rather,
# it's in GCS).
GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

# Loading in the Data

In [None]:
# load_dir = f"/kaggle/input/{COMPETITION_NAME}/"  # The alternative to using kaggle_datasets.
df = pd.read_csv('../input/siim-covid19-detection/train_study_level.csv')
label_cols = df.columns[1:5]  # These are the classes.
df.head()  # 6054 unique IDs; this is the study level csv.

# Splitting the Data

In [None]:
n_splits = 5
gkf  = GroupKFold(n_splits=n_splits)
df['fold'] = -1  # This is to initialize the column.
df.head()

In [None]:
# This basically splits the data (6054) into 5 equal groups
# where each group is unique from the other 4.
for fold, (train_idx, val_idx) in enumerate(gkf.split(df, groups = df.id.tolist())):
    print(len(train_idx), len(val_idx))
    df.loc[val_idx, 'fold'] = fold

In [None]:
for i in range(n_splits):
    
    valid_paths = GCS_DS_PATH + '/study/' + df[df['fold'] == i]['id'] + '.png'
    train_paths = GCS_DS_PATH + '/study/' + df[df['fold'] != i]['id'] + '.png'
    
    valid_labels = df[df['fold'] == i][label_cols].values  # shape: (1211/1210, 4).
    train_labels = df[df['fold'] != i][label_cols].values  # shape: (4843/4844, 4).
    
    # Since this is the last value in the tuple, we may want to try even larger image sizes.
    IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600)
    IMS = 7  # IMSIZE[7] = 600. 

    decoder = build_decoder(with_labels=True, target_size=(IMSIZE[IMS], IMSIZE[IMS]), ext='png')

    valid_dataset = build_dataset(
        valid_paths, valid_labels, bsize=BATCH_SIZE, decode_fn=decoder,
        repeat=False, shuffle=False, augment=False
    )
    
    train_dataset = build_dataset(
        train_paths, train_labels, bsize=BATCH_SIZE, decode_fn=decoder
    )
    
    # Not sure why you have a try and except block for this
    # but I'll keep it.
    try:
        n_labels = train_labels.shape[1]
    except:
        n_labels = 1

    with strategy.scope():
        model = keras.Sequential([
            efn.EfficientNetB7(
                input_shape=(IMSIZE[IMS], IMSIZE[IMS], 3),
                weights='imagenet',
                include_top=False),
            keras.layers.GlobalAveragePooling2D(),
            keras.layers.Dense(n_labels, activation='softmax')
        ])
        
        model.compile(
            optimizer=keras.optimizers.Adam(),
            loss='categorical_crossentropy',
            metrics=[keras.metrics.AUC(multi_label=True)]) 

        model.summary()


    steps_per_epoch = train_paths.shape[0] // BATCH_SIZE
    
    checkpoint = keras.callbacks.ModelCheckpoint(
        f'model{i}.h5', save_best_only=True, monitor='val_loss', mode='min')
    lr_reducer = keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", patience=3, min_lr=1e-6, mode='min')

    history = model.fit(
        train_dataset, 
        epochs=20,
        verbose=1,
        callbacks=[checkpoint, lr_reducer],
        steps_per_epoch=steps_per_epoch,
        validation_data=valid_dataset)

    hist_df = pd.DataFrame(history.history)
    hist_df.to_csv(f'history{i}.csv')