# Installation of Packages

In [47]:
# !pip install -q tensorflow_datasets
# !pip install datasets
# !pip install keras_cv
# !pip install keras_hub
!pip install -q evaluate

  pid, fd = os.forkpty()


# Download Dataset from external source

# Importing packages

In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import  metrics
from tensorflow.keras import losses
from tensorflow.keras import utils
from tensorflow.keras import callbacks
from tensorflow.keras import ops
from tensorflow.keras import regularizers

from transformers import AutoImageProcessor, AutoProcessor 
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

# from tensorflow.keras.datasets import mnist
# import tensorflow_datasets as tfds
from datasets import load_dataset
# from keras.applications import EfficientNetB0
from transformers import DefaultDataCollator
# import keras_cv
# import keras_hub
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

# Custom Functions

# Reading Data

In [30]:
food = load_dataset("food101", split="train[:5000]")

# Data Inspection / Visualization

In [31]:
food

Dataset({
    features: ['image', 'label'],
    num_rows: 5000
})

In [32]:
food = food.train_test_split(test_size=0.2)

In [33]:
food

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 4000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 1000
    })
})

In [34]:
food["train"][0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=512x384>,
 'label': 53}

In [35]:
categories = food["train"].features["label"].names
print(categories)

['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio', 'beef_tartare', 'beet_salad', 'beignets', 'bibimbap', 'bread_pudding', 'breakfast_burrito', 'bruschetta', 'caesar_salad', 'cannoli', 'caprese_salad', 'carrot_cake', 'ceviche', 'cheesecake', 'cheese_plate', 'chicken_curry', 'chicken_quesadilla', 'chicken_wings', 'chocolate_cake', 'chocolate_mousse', 'churros', 'clam_chowder', 'club_sandwich', 'crab_cakes', 'creme_brulee', 'croque_madame', 'cup_cakes', 'deviled_eggs', 'donuts', 'dumplings', 'edamame', 'eggs_benedict', 'escargots', 'falafel', 'filet_mignon', 'fish_and_chips', 'foie_gras', 'french_fries', 'french_onion_soup', 'french_toast', 'fried_calamari', 'fried_rice', 'frozen_yogurt', 'garlic_bread', 'gnocchi', 'greek_salad', 'grilled_cheese_sandwich', 'grilled_salmon', 'guacamole', 'gyoza', 'hamburger', 'hot_and_sour_soup', 'hot_dog', 'huevos_rancheros', 'hummus', 'ice_cream', 'lasagna', 'lobster_bisque', 'lobster_roll_sandwich', 'macaroni_and_cheese', 'macarons', 'miso_sou

# Data Preprocessing

In [36]:
id2label = {k:v for k,v in enumerate(categories)}
label2id = {v:k for k,v in enumerate(categories)}

In [37]:
print(id2label)

{0: 'apple_pie', 1: 'baby_back_ribs', 2: 'baklava', 3: 'beef_carpaccio', 4: 'beef_tartare', 5: 'beet_salad', 6: 'beignets', 7: 'bibimbap', 8: 'bread_pudding', 9: 'breakfast_burrito', 10: 'bruschetta', 11: 'caesar_salad', 12: 'cannoli', 13: 'caprese_salad', 14: 'carrot_cake', 15: 'ceviche', 16: 'cheesecake', 17: 'cheese_plate', 18: 'chicken_curry', 19: 'chicken_quesadilla', 20: 'chicken_wings', 21: 'chocolate_cake', 22: 'chocolate_mousse', 23: 'churros', 24: 'clam_chowder', 25: 'club_sandwich', 26: 'crab_cakes', 27: 'creme_brulee', 28: 'croque_madame', 29: 'cup_cakes', 30: 'deviled_eggs', 31: 'donuts', 32: 'dumplings', 33: 'edamame', 34: 'eggs_benedict', 35: 'escargots', 36: 'falafel', 37: 'filet_mignon', 38: 'fish_and_chips', 39: 'foie_gras', 40: 'french_fries', 41: 'french_onion_soup', 42: 'french_toast', 43: 'fried_calamari', 44: 'fried_rice', 45: 'frozen_yogurt', 46: 'garlic_bread', 47: 'gnocchi', 48: 'greek_salad', 49: 'grilled_cheese_sandwich', 50: 'grilled_salmon', 51: 'guacamole

In [39]:
checkpoint = "google/vit-base-patch16-224-in21k"

image_processor = AutoImageProcessor.from_pretrained(checkpoint, use_fast=True)

In [40]:
image_processor

ViTImageProcessorFast {
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_processor_type": "ViTImageProcessorFast",
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 224,
    "width": 224
  }
}

In [41]:
small_train_dataset = food["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = food["test"].shuffle(seed=42).select(range(300))

In [43]:
small_train_dataset, small_eval_dataset

(Dataset({
     features: ['image', 'label'],
     num_rows: 1000
 }),
 Dataset({
     features: ['image', 'label'],
     num_rows: 300
 }))

In [19]:
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
augmentation = Compose([RandomResizedCrop(size), ToTensor(), normalize])


In [20]:
def transforms(examples):
    examples["pixel_values"] = [augmentation(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

In [47]:
def setTransform(example):
    return image_processor(examples["image"], return_tensor='pt')

In [52]:
small_train_dataset = small_train_dataset.with_transform(transforms)
small_eval_dataset = small_eval_dataset.with_transform(transforms)

In [21]:
food = food.with_transform(transforms)

# Data Postprocessing

# Model

In [54]:
model = AutoModelForImageClassification.from_pretrained(
    checkpoint,
    num_labels=len(categories),
    id2label=id2label,
    label2id=label2id,
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Compile

In [55]:
training_args = TrainingArguments(
    output_dir="my_awesome_food_model",
    remove_unused_columns=False,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy", 
)

# Fit

In [57]:
data_collator = DefaultDataCollator()

In [66]:
import evaluate

accuracy = evaluate.load("accuracy")

In [58]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [59]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset, 
    compute_metrics=compute_metrics,
)

In [60]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:


Abort: 

# Evaluation

# Inference

In [None]:
ds = load_dataset("food101", split="validation[:10]")
image = ds["image"][0]

In [None]:
from transformers import pipeline

classifier = pipeline("image-classification", model="my_awesome_food_model")
classifier(image)

In [None]:
from transformers import AutoImageProcessor
import torch

image_processor = AutoImageProcessor.from_pretrained("my_awesome_food_model")
inputs = image_processor(image, return_tensors="pt")



In [None]:
from transformers import AutoModelForImageClassification

model = AutoModelForImageClassification.from_pretrained("my_awesome_food_model")
with torch.no_grad():
    logits = model(**inputs).logits

In [None]:
predicted_label = logits.argmax(-1).item()
model.config.id2label[predicted_label]