# Introduction
**This notebook is based on  [Jesse Mostipak’s Tutorial](https://www.kaggle.com/jessemostipak/getting-started-tpus-cassava-leaf-disease)**  
In this notebook we check the importance of colours in the classification process.

# Set up environment

In [None]:
!pip install -q tensorflow==2.3.2 # Use 2.3.0 for built-in EfficientNet

!pip install -q git+https://github.com/keras-team/keras-tuner@master # Use github head for newly added TPU support
!pip install -q cloud-tpu-client # Needed for sync TPU version
!pip install -U tensorflow-gcs-config==2.3.0 # Needed for using private dataset

In [None]:
import random, re, math, os, json
import numpy as np, pandas as pd, seaborn as sn
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf, tensorflow.keras.backend as K
from tensorflow.keras.utils import plot_model,to_categorical
from tensorflow.keras.models import load_model
from functools import partial
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
import albumentations as A
import kerastuner as kt
import IPython
from IPython.display import FileLink
print('Tensorflow version ' + tf.__version__)

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    # Sync TPU version
    from cloud_tpu_client import Client
    c = Client()
    c.configure_tpu_version(tf.__version__, restart_type='ifNeeded')
    
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None
    

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)
print(tf.__version__)

In [None]:
from collections import Counter
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
import itertools
from tensorflow.keras import backend as K
from tensorflow.keras.applications import EfficientNetB5

As we will see below, the classes are not equally represented. Therefore, we need more than just accuracy to determine the validity of our model. Hence, we define the f1 metric.

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(y_true * y_pred, axis=0)
    possible_positives = K.sum(y_true, axis=0)
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(y_true * y_pred, axis=0)
    predicted_positives = K.sum(y_pred, axis=0)
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    y_pred = tf.one_hot(tf.argmax(y_pred,axis=-1),len(CLASSES))
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*K.mean((precision*recall)/(precision+recall+K.epsilon()))


# Set up variables
We'll set up some of our variables for our notebook here. 

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BASE_DIR = '../input/cassava-leaf-disease-classification/'
GCS_PATH = KaggleDatasets().get_gcs_path('cassava-leaf-disease-classification')
BATCH_SIZE = 16 * strategy.num_replicas_in_sync
#BATCH_SIZE = 512
IMAGE_EN = {'B0':224,'B1':240,'B2':260,'B3':300,'B4':380,'B5':456,'B6':528,'B7':600}
CLASSES = ['0', '1', '2', '3', '4']
EPOCHS = 10
PROBA_CONTRAST=1.
os.chdir(r'/kaggle/working')

## EfficientNet architecture.  
We look at the best places to conduct fine tuning.

In [None]:
base_model = EfficientNetB5(weights='imagenet', include_top=False)
IMAGE_SIZE = [IMAGE_EN['B5'],IMAGE_EN['B5']]

In [None]:
plot_model(base_model)

In [None]:
n_Add = [i for i in range(len(base_model.layers)) if isinstance(base_model.layers[i],tf.keras.layers.Add)]
print(n_Add)

# Load the data

In [None]:
TRAINING_FILENAMES, VALID_FILENAMES = train_test_split(
    tf.io.gfile.glob(GCS_PATH + '/train_tfrecords/ld_train*.tfrec'),
    test_size=0.125, random_state=5)

TEST_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/test_tfrecords/ld_test*.tfrec')

## Data visualization

In [None]:
with open(os.path.join(BASE_DIR, "label_num_to_disease_map.json")) as file:
    map_classes = json.loads(file.read())
    map_classes = {int(k) : v for k, v in map_classes.items()}
    
print(json.dumps(map_classes, indent=4))

In [None]:
input_files = os.listdir(os.path.join(BASE_DIR, "train_images"))
print(f"Number of train images: {len(input_files)}")

In [None]:
df_train = pd.read_csv(os.path.join(BASE_DIR, "train.csv"))
df_train["class_name"] = df_train["label"].map(map_classes)
plt.figure(figsize=(8, 4))
sn.countplot(y="class_name", data=df_train);

Since the classes are not evenly distributed, we need to use weights to prevent metastable results.

In [None]:
# This function returns the labels weights, compounded by a coefficient n.
def c_weights(labels,n=3/4):
    c_labels = Counter(labels)
    A=len(c_labels)/np.sum([x**-n for x in c_labels.values()])
    cw = {i:A*c_labels[i]**-n for i in range(5)}
    return cw

In [None]:
Counter(df_train["label"])

## Shows examples

In [None]:
def visualize_batch(image_ids, labels):
    plt.figure(figsize=(20, 15))
    
    for ind, (image_id, label) in enumerate(zip(image_ids, labels)):
        plt.subplot(5, 4, ind + 1)
        image = cv2.imread(os.path.join(BASE_DIR, "train_images", image_id))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        plt.imshow(image)
        plt.title(f"Class: {label}", fontsize=12)
        plt.axis("off")
    
    plt.show()

In [None]:
tmp_index = []
for i in range(5):
    tmp_index += list(df_train.loc[df_train['label']==i].sample(4).index)
    
image_ids = df_train["image_id"].loc[tmp_index].values
labels = df_train["label"].loc[tmp_index].values

visualize_batch(image_ids, labels)

## Decode the data

In [None]:
def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

In [None]:
def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    image = tf.image.resize(image, IMAGE_SIZE)
    return image

In [None]:
def read_tfrecord(example, labeled):
    tfrecord_format = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    } if labeled else {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example['image'])
    if labeled:
        label = tf.cast(example['target'], tf.int32)
        return image, label
    idnum = example['image_name']
    return image, idnum

In [None]:
def load_dataset(filenames, labeled=True, ordered=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE) # automatically interleaves reads from multiple files
    dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(partial(read_tfrecord, labeled=labeled), num_parallel_calls=AUTOTUNE)
    return dataset

## Train-Validation class repartition

In [None]:
dataset = load_dataset(TRAINING_FILENAMES, labeled=True, ordered=True)

In [None]:
#dataset = tf.data.TFRecordDataset(TRAINING_FILENAMES[0], num_parallel_reads=AUTOTUNE)
train_labels = []
for _, labels in dataset.take(-1):  # only take first element of dataset
    train_labels.append(labels.numpy())

In [None]:
dataset = load_dataset(VALID_FILENAMES, labeled=True, ordered=True)
#dataset = tf.data.TFRecordDataset(TRAINING_FILENAMES[0], num_parallel_reads=AUTOTUNE)
valid_labels = []
for images, labels in dataset.take(-1):  # only take first element of dataset
    valid_labels.append(labels.numpy())

In [None]:
c_train = Counter(train_labels)
x_train = [0,1,2,3,4]
y_train = [c_train[i] for i in x_train]

c_valid = Counter(valid_labels)
x_valid = [0,1,2,3,4]
y_valid = [c_valid[i] for i in x_valid]

We verify that the train and valid distributions are similar.

In [None]:
plt.barh(x_train,y_train)

In [None]:
plt.barh(x_valid,y_valid)

## Adding in augmentations 

In [None]:
def data_treat(image,label):
    label = tf.one_hot(label,len(CLASSES))
    image = tf.cast(image, tf.float32)
    return image,label

In [None]:
def data_treat_test(image,label):
    image = tf.cast(image, tf.float32)
    image = image-tf.math.reduce_min(image)
    image = image/tf.math.reduce_max(image)
    image = image*255
    return image,label

In [None]:
def data_augment(image,label):
    # Thanks to the dataset.prefetch(AUTO) statement in the following function this happens essentially for free on TPU. 
    # Data pipeline code is executed on the "CPU" part of the TPU while the TPU itself is computing gradients.
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    return image,label

In [None]:
# This function returns a function with variable probability of applying standardisation.
def func_standard(p=1):
    def data_standard(image,label):
        if tf.random.uniform(shape=(), minval=0, maxval=1)<p:
            image = image-tf.math.reduce_min(image)
            image = image/tf.math.reduce_max(image)
            image = image*255
        return image,label
    return data_standard

In [None]:
def func_BAW(p=PROBA_CONTRAST):
# random black and white
    def data_BAW(image,label):
        if tf.random.uniform(shape=(), minval=0, maxval=1)<p:
            image = tf.image.rgb_to_grayscale(image)
            image = tf.math.round(image)
            image = tf.image.grayscale_to_rgb(image)
        return image,label
    return data_BAW

## Define data loading methods
The following functions will be used to load our `training`, `validation`, and `test` datasets, as well as print out the number of images in each dataset.

In [None]:
def get_training_dataset(ordered=False):
    dataset = load_dataset(TRAINING_FILENAMES, labeled=True, ordered=ordered)  
    dataset = dataset.map(data_treat, num_parallel_calls=AUTOTUNE)  
    dataset = dataset.map(data_augment, num_parallel_calls=AUTOTUNE)  
    dataset = dataset.map(func_standard(p=1),num_parallel_calls=AUTOTUNE)  
    dataset = dataset.map(func_BAW(p=1./5.),num_parallel_calls=AUTOTUNE)  
    dataset = dataset.repeat()
    dataset = dataset.shuffle(1024)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

Black and white transformation is not performed on the validation dataset.

In [None]:
def get_validation_dataset(ordered=False):
    dataset = load_dataset(VALID_FILENAMES, labeled=True, ordered=ordered) 
    dataset = dataset.map(data_treat, num_parallel_calls=AUTOTUNE)  
    dataset = dataset.map(func_standard(p=1),num_parallel_calls=AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:
def get_test_dataset(ordered=False):
    dataset = load_dataset(TEST_FILENAMES, labeled=False, ordered=ordered)
    dataset = dataset.map(data_treat_test, num_parallel_calls=AUTOTUNE)  
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:
NUM_TRAINING_IMAGES = count_data_items(TRAINING_FILENAMES)
NUM_VALIDATION_IMAGES = count_data_items(VALID_FILENAMES)
NUM_TEST_IMAGES = count_data_items(TEST_FILENAMES)

print('Dataset: {} training images, {} validation images, {} (unlabeled) test images'.format(
    NUM_TRAINING_IMAGES, NUM_VALIDATION_IMAGES, NUM_TEST_IMAGES))

## Learning rate schedule.  

In [None]:
def loop_schedule(l):
    def scheduler(epoch,lr):
      if epoch>0:
        return lr/1.01
      else :
        return l
    schedule = tf.keras.callbacks.LearningRateScheduler(scheduler)
    return schedule

## Early stopping. 

In [None]:
# We create a callback to stop training if the validity loss does not deacrease for 4 epochs.
early_stop=tf.keras.callbacks.EarlyStopping(monitor='val_f1_m', min_delta=0, patience=4, verbose=0,
    mode='max', baseline=None, restore_best_weights=False)

## Optimization

In [None]:
# load data
train_dataset = get_training_dataset()
valid_dataset = get_validation_dataset()

In [None]:
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE
VALID_STEPS = NUM_VALIDATION_IMAGES // BATCH_SIZE

In [None]:
# We create a function to instanciate our model.
def createModel(dropout_rate=0.5,
                unit_1 = 997,
                unit_2 = len(CLASSES),
                trainable_loops=0):
    base_model = EfficientNetB5(weights='imagenet', include_top=False,input_shape=[*IMAGE_SIZE,3])
    # Last trainable layer.
    last_trainable = n_Add[-trainable_loops-1]
    for layer in base_model.layers: layer.trainable = False
    for i in range(last_trainable,len(base_model.layers)):
            base_model.layers[i].trainable=True
            
    model = tf.keras.Sequential([
            base_model,
            tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dropout(dropout_rate),
            tf.keras.layers.Dense(unit_1, activation='relu'),
            tf.keras.layers.Dropout(dropout_rate),
            tf.keras.layers.Dense(unit_2, activation='relu'),
            tf.keras.layers.Dropout(dropout_rate),
            tf.keras.layers.Dense(len(CLASSES), activation='softmax')  
        ])
    
    return model

In [None]:
def random_search(num):
    # No loop variation.
    # Tune the number of units in the two dense layer.
    values_mem = {}
    results = {}
    for i in range(num):
        # initial learning rate.
        lr = random.random()*np.log(0.01/0.000001)+np.log(0.000001)
        lr = np.exp(lr)
        # dropout rate.
        dp = random.random()*np.log(0.5/0.01)+np.log(0.01)
        dp = np.exp(dp)
        # number of nodes first layer.
        units_1 = random.random()*np.log(4096/4)+np.log(4)
        units_1 = int(np.round(np.exp(units_1)))
        # number of nodes second layer.
        units_2 = random.random()*np.log(4096/4)+np.log(4)
        units_2 = int(np.round(np.exp(units_2)))
        # exponent
        wt = random.random()
        
        values_mem[i] = (dp,units_1,units_2,lr,wt) 
    
        with strategy.scope():  
            model = createModel(dropout_rate=dp,unit_1 = units_1,unit_2 = units_2)

            model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002),
                    loss = tf.keras.losses.CategoricalCrossentropy(), metrics = ['categorical_accuracy',f1_m])
        
        model.fit(train_dataset, 
                    steps_per_epoch=STEPS_PER_EPOCH, 
                    epochs=15,
                    class_weight=c_weights(df_train["label"],n=wt),
                    callbacks=[early_stop,loop_schedule(lr)])
        
        results[i] = model.evaluate(valid_dataset,steps=VALID_STEPS)
    results = [list(values_mem[i])+results[i] for i in range(len(results))]
    results = pd.DataFrame({'Dropout_rate':[r[0] for r in results],
                       'units_1':[r[1] for r in results],
                       'units_2':[r[2] for r in results],
                       'learning_rate':[r[3] for r in results],
                       'weight_exponant':[r[4] for r in results],
                       'loss':[r[5] for r in results],
                       'categorical_accuracy':[r[6] for r in results],
                       'f1_m':[r[7] for r in results],})
    
    return results

In [None]:
# for memory reason it may be needed to run this search many times, while restarting the kernel.
results = random_search(2)

In [None]:
# Easy to use download button.
results.to_pickle(r'opti.pkl')
FileLink(r'opti.pkl')

In [None]:
results

In [None]:
plt.semilogx(results['Dropout_rate'],results['f1_m'],'.')
plt.xlabel('Dropout_rate')
plt.ylabel('f1_m')

In [None]:
plt.semilogx(results['units_1'],results['f1_m'],'.')
plt.xlabel('units_1')
plt.ylabel('f1_m')

## Building the final model
In order to ensure that our model is trained on the TPU, we build it using `with strategy.scope()`.    

This model was built using transfer learning, meaning that we have a _pre-trained model_ (ResNet50) as our base model and then the customizable model built using `tf.keras.Sequential`.

Note that we're using `sparse_categorical_crossentropy` as our loss function, because we did _not_ one-hot encode our labels.

In [None]:
with strategy.scope():       
    model = createModel(dropout_rate=0.35,unit_1 = 256,
                unit_2 = 16,trainable_loops=0)
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002),
        loss='categorical_crossentropy',  
        metrics=['categorical_accuracy',f1_m])

We check the resulting network to be sure that the added layers are trainable, while the efficientNet5 is not trained.

In [None]:
model.summary()

# Train the model on pictures
After checking the importance of colours, we train our model with B&W pictures, in this case we duplicate 3 times the same black and white image to create a false RGB image. This image is then fed into resnet50.  
The B&W transformation is added as an image augmentation, as latter on we will want to train models on colored images with some B&W images.

In [None]:
# load data
train_dataset = get_training_dataset()
valid_dataset = get_validation_dataset()

In [None]:
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE
VALID_STEPS = NUM_VALIDATION_IMAGES // BATCH_SIZE

history = model.fit(train_dataset, 
                    steps_per_epoch=STEPS_PER_EPOCH, 
                    epochs=20,
                    class_weight=c_weights(df_train["label"],n=3/4),
                    validation_data=valid_dataset,
                    validation_steps=VALID_STEPS,
                    callbacks=[early_stop,loop_schedule(0.00025)])
model.save('EffNet5_0.h5')

# Visualizing training
We visualize evolution of loss and accuracy over epochs. 

In [None]:
# print out variables available to us
print(history.history.keys())

In [None]:
# create learning curves to evaluate model performance
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['f1_m', 'val_f1_m']].plot()
history_frame.loc[:, ['categorical_accuracy', 'val_categorical_accuracy']].plot();

## Result distribution.

In [None]:
train_dataset = get_training_dataset(ordered=True)
valid_dataset = get_validation_dataset(ordered=True)

In [None]:
# this code will convert our test image data to a float32 
def to_float32(image, label):
    return tf.cast(image, tf.float32), label

In [None]:
valid_ds = valid_dataset.map(to_float32)
fit_valid_label = model.predict(valid_ds)
fit_valid_label = np.argmax(fit_valid_label,axis=1)

## Confusion matrix.

In [None]:
def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        nm = np.sqrt(np.array([[sum(cm[i,:])*sum(cm[j,:]) for i in range(5)] for j in range(5)]))
        #cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = cm.astype('float') / nm
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)


    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, np.round(1000*cm[i, j])/1000,
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
cm = confusion_matrix(valid_labels[:len(fit_valid_label)],fit_valid_label)

In [None]:
plot_confusion_matrix(cm, [0,1,2,3,4],normalize=True)

## Fine tuning. 
We will now fine tune the model. 
We will compare fine tuning of the last block, and off the last two blocks.

In [None]:
# 1 trainable block.
with strategy.scope():       
    model = createModel(dropout_rate=0.35,unit_1 = 256,
                unit_2 = 16,trainable_loops=1)
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.00002),
        loss='categorical_crossentropy',  
        metrics=['categorical_accuracy',f1_m])

In [None]:
model.summary()

### Training

In [None]:
train_dataset = get_training_dataset()
valid_dataset = get_validation_dataset()

In [None]:
STEPS_PER_EPOCH = NUM_TRAINING_IMAGES // BATCH_SIZE
VALID_STEPS = NUM_VALIDATION_IMAGES // BATCH_SIZE

history = model.fit(train_dataset, 
                    steps_per_epoch=STEPS_PER_EPOCH, 
                    epochs=30,
                    class_weight=c_weights(df_train["label"],n=3/4),
                    validation_data=valid_dataset,
                    validation_steps=VALID_STEPS,
                    callbacks=[early_stop,loop_schedule(0.00015)])
model.save('EffNet5_1.h5')

## 2 loops

In [None]:
train_dataset = get_training_dataset()
valid_dataset = get_validation_dataset()

In [None]:
# 2 trainable block.
with strategy.scope():       
    model = model = createModel(dropout_rate=0.35,unit_1 = 256,
                unit_2 = 16,trainable_loops=2)
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.00002),
        loss='categorical_crossentropy',  
        metrics=['categorical_accuracy',f1_m])

In [None]:
model.summary()

In [None]:
history = model.fit(train_dataset, 
                    steps_per_epoch=STEPS_PER_EPOCH, 
                    epochs=15,
                    class_weight=c_weights(df_train["label"],n=3/4),
                    validation_data=valid_dataset,
                    validation_steps=VALID_STEPS,
                    callbacks=[early_stop,loop_schedule(0.0001)]
                   )
model.save('EffNet5_2.h5')

# Making predictions
Now that we've trained our model we can use it to make predictions! 

In [None]:
test_ds = get_test_dataset(ordered=True) 

In [None]:
test_ds = get_test_dataset(ordered=True) 
test_ds = test_ds.map(to_float32)

print('Computing predictions...')
test_images_ds = test_ds
test_images_ds = test_ds.map(lambda image, idnum: image)
probabilities = model.predict(test_images_ds)
predictions = np.argmax(probabilities, axis=-1)
print(predictions)

# Creating a submission file
Now that we've trained a model and made predictions we're ready to submit to the competition! You can run the following code below to get your submission file.

In [None]:
print('Generating submission.csv file...')
test_ids_ds = test_ds.map(lambda image, idnum: idnum).unbatch()
test_ids = next(iter(test_ids_ds.batch(NUM_TEST_IMAGES))).numpy().astype('U') # all in one batch
np.savetxt('submission.csv', np.rec.fromarrays([test_ids, predictions]), fmt=['%s', '%d'], delimiter=',', header='id,label', comments='')
!head submission.csv

Be aware that because this is a code competition with a hidden test set, internet and TPUs cannot be enabled on your submission notebook. Therefore TPUs will only be available for training models. For a walk-through on how to train on TPUs and run inference/submit on GPUs, see our [TPU Docs](https://www.kaggle.com/docs/tpu#tpu6).