## Installation

In [None]:
!python -m pip install keras_cv datasets transformers tensorboard tensorflow ipywidgets opencv-python tensorflow-datasets
!git-lfs --version

Login to huggingface if first time

In [None]:
from huggingface_hub import notebook_login

notebook_login()


Show that the GPU is being used

In [None]:
from tensorflow.config.experimental import list_physical_devices
print(list_physical_devices('GPU'))

model_id = "google/vit-base-patch16-224-in21k"

Now create the database, also this is the time to define data augmentation

In [None]:
from transformers import ViTImageProcessor
from datasets import load_dataset
from tensorflow import keras
from tensorflow.keras import layers
from keras_cv.layers import RandAugment
import tensorflow as tf

# Define the model ID and other parameters
num_layers = 2
magnitude = 0.15

# Load the ViTImageProcessor
image_processor = ViTImageProcessor.from_pretrained(model_id)

# Create RandAugment transformation
rand_augment = RandAugment(
    value_range=[0,1],
    augmentations_per_image=num_layers,
    magnitude=magnitude,
)


def transform(batch):
    inputs = image_processor([x for x in batch["image"]], return_tensors="tf")
    inputs["labels"] = batch["label"]
    return inputs

def augment(batch):
    inputs = image_processor([x for x in batch["image"]], return_tensors="tf")
    transposed = tf.transpose(inputs["pixel_values"], perm=[0,3,2,1])
    augmented = rand_augment(transposed)
    inputs["pixel_values"] = tf.transpose(augmented, perm=[0,3,2,1])
    inputs["labels"] = batch["label"]
    return inputs

dataset = load_dataset("streetview_images_cropped", data_dir="./")

test_size=.15

dataset = dataset["train"].shuffle().train_test_split(test_size=test_size)

dataset['train'] = dataset['train'].with_transform(augment)
dataset['test'] = dataset['test'].with_transform(transform)
processed_dataset = dataset

Specify hyperparameters

In [None]:
from huggingface_hub import HfFolder
import tensorflow as tf

class_labels = processed_dataset['train'].features["label"].names
num_images_train = processed_dataset['train'].num_rows
id2label = {str(i): label for i, label in enumerate(class_labels)}
label2id = {v: k for k, v in id2label.items()}

num_train_epochs = 10
train_batch_size = 32
eval_batch_size = 32
learning_rate = 6e-5
weight_decay_rate=0.01
num_warmup_steps=0
output_dir=model_id.split("/")[1]
hub_token = HfFolder.get_token()
hub_model_id = f'dl-au-tamas-jedrek/{model_id.split("/")[1]}-street-view'


Get model, specify loss and metrics

In [None]:
from transformers import TFViTForImageClassification, create_optimizer
import tensorflow as tf

# create optimizer wight weigh decay
num_train_steps = num_images_train * num_train_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=num_warmup_steps,
)

# load pre-trained ViT model
model = TFViTForImageClassification.from_pretrained(
    model_id,
    num_labels=len(class_labels),
    id2label=id2label,
    label2id=label2id,
)

# define loss
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# define metrics 
metrics=[
    tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
    tf.keras.metrics.SparseTopKCategoricalAccuracy(3, name="top-3-accuracy"),
]

# compile model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

Transform dataset for training

In [None]:
tf_train_dataset = model.prepare_tf_dataset(processed_dataset['train'], batch_size=train_batch_size, shuffle=True)
tf_eval_dataset = model.prepare_tf_dataset(processed_dataset['test'], batch_size=eval_batch_size, shuffle=True)

Run to display train images

In [None]:
import matplotlib.pyplot as plt

sample_images, sample_labels = next(iter(tf_train_dataset))
plt.figure(figsize=(10, 10))
for i, image in enumerate(sample_images[:9]):
    ax = plt.subplot(3, 3, i + 1)
    transposed = tf.transpose(image)
    plt.imshow(transposed.numpy())
    plt.axis("off")

Push metrics to hub after every epoch

In [None]:
import os
from transformers.keras_callbacks import PushToHubCallback
from tensorflow.keras.callbacks import TensorBoard as TensorboardCallback, EarlyStopping

callbacks = []
callbacks.append(TensorboardCallback(log_dir=os.path.join(output_dir, "logs")))
callbacks.append(EarlyStopping(monitor="val_accuracy",patience=1))
callbacks.append(PushToHubCallback(
    output_dir,
    hub_model_id=hub_model_id,
    hub_token=hub_token,
))



Train model

In [None]:
from transformers import logging as transformers_logging
transformers_logging.set_verbosity_info()
train_results = model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=callbacks,
    epochs=num_train_epochs,
    verbose=1
)