In [1]:
import keras.optimizers
import numpy as np
%matplotlib inline
from transformers import ViTImageProcessor
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import os
import datasets
from transformers import DefaultDataCollator
from transformers import TFViTForImageClassification, create_optimizer
from IPython.display import HTML
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [2]:

model_id = "google/vit-base-patch16-224-in21k"

def create_image_folder_dataset(root_path):
  """creates `Dataset` from image folder structure"""

  # get class names by folders names
  _CLASS_NAMES= os.listdir(root_path)
  # defines `datasets` features`
  features=datasets.Features({
                      "img": datasets.Image(),
                      "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                  })
  # temp list holding datapoints for creation
  img_data_files=[]
  label_data_files=[]
  # load images into list for creation
  for img_class in os.listdir(root_path):
    for img in os.listdir(os.path.join(root_path,img_class)):
      path_=os.path.join(root_path,img_class,img)
      img_data_files.append(path_)
      label_data_files.append(img_class)
  # create dataset
  ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
  return ds

In [3]:
train_imgs = create_image_folder_dataset("train")
img_class_labels = train_imgs.features["label"].names

In [4]:
feature_extractor = ViTImageProcessor.from_pretrained(model_id)

# learn more about data augmentation here: https://www.tensorflow.org/tutorials/images/data_augmentation
data_augmentation = keras.Sequential(
    [
        layers.Resizing(feature_extractor.size["height"], feature_extractor.size["width"]),
        layers.Rescaling(1./255),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.2),
        layers.RandomBrightness(factor=0.2),
        layers.RandomContrast(factor=0.2),
    ],
    name="data_augmentation",
)
# use keras image data augementation processing
def augmentation(examples):
    examples["pixel_values"] =  [data_augmentation(image) for image in examples["img"]]
    process(examples)
    return examples


# basic processing (only resizing)
def process(examples):
    examples.update(feature_extractor(examples['img'], ))
    #examples["pixel_values"] = examples["pixel_values"][0]
    return examples

# we are also renaming our label col to labels to use `.to_tf_dataset` later
train_imgs = train_imgs.rename_column("label", "labels")


In [5]:
test_size=.1
train_val_set = train_imgs.train_test_split(test_size=test_size)

In [6]:
train_val_set["test"] = train_val_set["test"].map(process, batch_size=16, batched=True)
train_val_set["test"].save_to_disk("test.hf")

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/408 [00:00<?, ? examples/s]

In [7]:
#processed_dataset_train = train_imgs.map(process)
ds = train_val_set["train"].map(process, batch_size=16, batched=True)
while True:
    ds = datasets.concatenate_datasets([ds, train_val_set["train"].map(augmentation, batch_size=16, batched=True)])
    ds.save_to_disk("train.hf")

Map:   0%|          | 0/3672 [00:00<?, ? examples/s]

Map:   0%|          | 0/3672 [00:00<?, ? examples/s]



ValueError: Exception encountered when calling layer "resizing" "                 f"(type Resizing).

Attempt to convert a value (<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x765 at 0x15C08B704C0>) with an unsupported type (<class 'PIL.JpegImagePlugin.JpegImageFile'>) to a Tensor.

Call arguments received by layer "resizing" "                 f"(type Resizing):
  • inputs=<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1024x765 at 0x15C08B704C0>

In [None]:
train_val_set = {}
train_val_set["train"] = datasets.load_from_disk("train.hf")
train_val_set["test"] = datasets.load_from_disk("test.hf")

In [None]:
id2label = {str(i): label for i, label in enumerate(img_class_labels)}
label2id = {v: k for k, v in id2label.items()}

num_train_epochs = 5
train_batch_size = 32
eval_batch_size = 32
learning_rate = 3e-5
weight_decay_rate=0.01
num_warmup_steps=0
output_dir=model_id.split("/")[1]
hub_model_id = f'{model_id.split("/")[1]}-eyes'
fp16=True

# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
if fp16:
  tf.keras.mixed_precision.set_global_policy("mixed_float16")

In [None]:


# Data collator that will dynamically pad the inputs received, as well as the labels.
data_collator = DefaultDataCollator(return_tensors="tf")

# converting our train dataset to tf.data.Dataset
tf_train_dataset = train_val_set["train"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=train_batch_size,
   collate_fn=data_collator)

# converting our test dataset to tf.data.Dataset
tf_eval_dataset = train_val_set["test"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=eval_batch_size,
   collate_fn=data_collator)
processed_dataset = None

In [None]:
tf_train_dataset

In [None]:


# create optimizer wight weigh decay
num_train_steps = len(tf_train_dataset) * num_train_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=num_warmup_steps,
)

# load pre-trained ViT model
model = TFViTForImageClassification.from_pretrained(
    model_id,
    num_labels=len(img_class_labels),
    id2label=id2label,
    label2id=label2id,
)

# define loss
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# define metrics
metrics=[
    tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
]
callbacks=[tf.keras.callbacks.EarlyStopping(patience=0)]
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=metrics,)

In [None]:
history = model.fit(
    tf_train_dataset.prefetch(1),
    validation_data=tf_eval_dataset.prefetch(1),
    callbacks=callbacks,
    epochs=100,
)

In [None]:
model.fit(
    tf_train_dataset.prefetch(3),
    validation_data=tf_eval_dataset.prefetch(3),
    callbacks=callbacks,
    epochs=100,
)

In [None]:
test_imgs = create_image_folder_dataset("test")

In [None]:
test_imgs = test_imgs.rename_column("label", "labels")
processed_dataset_test = test_imgs.map(process, batched=True, batch_size=32)

In [None]:
tf_test_dataset = processed_dataset_test.to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=False,
   batch_size=eval_batch_size,
   collate_fn=data_collator)

In [None]:
y_pred = model.predict(tf_test_dataset.prefetch(2))

In [None]:
y_pred_norm = []
for vector in y_pred["logits"]:
    y_pred_norm.append(np.argmax(vector))

In [None]:

def evaluate(y_t, y_p):
    cm = confusion_matrix(y_t, y_p)
    ax = sn.heatmap(cm, annot=True, fmt='g') # font size
    ax.set(xlabel="Predicted Number", ylabel="True Number")
    plt.show()

    prfs = pd.DataFrame()
    precision, recall, fscore, support = precision_recall_fscore_support(y_t, y_p)
    prfs["Digit"] = range(0, 5)
    prfs["Precision"] = precision
    prfs["Recall"] = recall
    prfs["F1-score"] = fscore
    prfs["Occurances"] = support
    print("The accuracy of the model is " + str(round(100 * accuracy_score(y_t, y_p),2)) + "%.")
    return prfs.to_html(index=False)

In [None]:
HTML(evaluate(list(processed_dataset_test["labels"]), y_pred_norm))

In [None]:
model.save_weights("augmented.hdf5")