In [None]:
# Installing Dependencies
#!pip3 install --upgrade pip
#!pip3 install datasets
#!pip3 install evaluate
#!pip3 install gradio
#!pip3 install matplotlib
#!pip3 install torch
#!pip3 install torchvison
#!pip3 install torchaudio
#!pip3 install transformers
#!pip3 install wget

In [None]:
# Importing Dependencies
import evaluate
import matplotlib.pyplot as plt
import numpy as np
import os
import tarfile
import torch
import torchvision
import wget

from datasets import load_dataset

from torch.utils.data import DataLoader

from torchvision.transforms import Compose
from torchvision.transforms import Normalize
from torchvision.transforms import RandomHorizontalFlip
from torchvision.transforms import RandomResizedCrop
from torchvision.transforms import ToTensor
from torchvision.transforms import Resize
from torchvision.transforms import CenterCrop

from transformers import AutoModelForImageClassification
from transformers import AutoFeatureExtractor
from transformers import AutoTokenizer
from transformers import pipeline
from transformers import Trainer
from transformers import TrainingArguments

In [None]:
# Constants
DATASET_API_URL = f'https://github.com/jonfernandes/flowers-dataset/raw/main/flower_photos.tgz'

DATA_DIR = '/Volumes/Data 1'
HUGGINGFACE_FILE_NAME = 'huggingface'

DATASET_FILE_NAME = 'flower_images_dataset'
DATASET_FILE_PATH = os.path.join(DATA_DIR, HUGGINGFACE_FILE_NAME, DATASET_FILE_NAME)

#ImageNet Model with 1000 pre-trained classes of images
MODEL_ID = 'google/vit-base-patch16-224'
MODEL_FILE_PATH = os.path.join(DATA_DIR, HUGGINGFACE_FILE_NAME)

DEVICE = ''

BATCH_SIZE = 32
METRIC_NAME = 'accuracy'
MODEL_NAME = 'vit-base-path16-224-finetuned-flower'
FINETUNED_MODEL_FILE_PATH = os.path.join(DATA_DIR, HUGGINGFACE_FILE_NAME, MODEL_NAME)

In [None]:
# Training Accleration Hardware Selection

# FOR PC PyTorch CUDA GPU training acceleration
if torch.cuda.is_available():
    DEVICE = 'cuda'

# For Mac PyTorch Metal Performance Shaders (MPS) GPU training acceleration
elif torch.backends.mps.is_available():
    DEVICE = 'mps'

# Default to CPU training
else:
    DEVICE = 'cpu'

print(DEVICE)

In [None]:
# Data Retrieval

In [None]:
# Loading In Dataset
dataset = load_dataset('imagefolder', data_files = DATASET_API_URL, cache_dir = DATASET_FILE_PATH)

In [None]:
dataset

In [None]:
for i in range(5):
    display(dataset['train'][i]['image'])

In [None]:
dataset['train']

In [None]:
dataset['train'].features

In [None]:
labels = dataset['train'].features['label'].names

In [None]:
labels

In [None]:
# Splitting The Dataset

In [None]:
dataset_train_validation = dataset['train'].train_test_split(test_size = 0.1, seed = 1, shuffle = True)

In [None]:
dataset_train_validation

In [None]:
dataset_train_validation['validation'] = dataset_train_validation.pop('test')

In [None]:
dataset_train_validation

In [None]:
dataset.update(dataset_train_validation)

In [None]:
dataset

In [None]:
dataset_train_test = dataset['train'].train_test_split(test_size = 0.1, seed = 1, shuffle = True)

In [None]:
dataset_train_test

In [None]:
dataset.update(dataset_train_test)

In [None]:
dataset

In [None]:
# Utilizing a pre-trained model without fine-tuning

In [None]:
model = AutoModelForImageClassification.from_pretrained(pretrained_model_name_or_path = MODEL_ID, 
                                                        cache_dir = MODEL_FILE_PATH).to(DEVICE)

In [None]:
model.eval

In [None]:
feature_extractor = AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path = MODEL_ID, cache_dir = MODEL_FILE_PATH)

In [None]:
feature_extractor

In [None]:
training_image_id = 3
test_image = dataset['train'][training_image_id]['image']

In [None]:
test_image

In [None]:
image_numpy_representation = feature_extractor(images = test_image, return_tensors = 'pt').to(DEVICE)
output = model(**image_numpy_representation)

In [None]:
# Modeled logits of the 1000 potential classes the input image could be
output

In [None]:
output.logits.shape

In [None]:
# Highest output class
torch.argmax(output.logits, dim = 1)

In [None]:
# Predicted label for the input image
prediction = torch.argmax(output.logits, dim = 1).item()

In [None]:
# ID 738 is mapped to the label of flowepot
prediction

In [None]:
# Mapping between the IDs and labels
model.config

In [None]:
model.config.id2label[prediction]

In [None]:
for label in labels:
    print(f"'{label}' label is present within the pre-trained model: {label in model.config.label2id}")

In [None]:
# Defining A Model
id2label = {key: value for key, value in enumerate(labels)}
label2id = {value:key for key, value in enumerate(labels)}

In [None]:
print(f'ID:Label Mapping - {id2label}')
print(f'Label:ID Mapping - {label2id}')

In [None]:
# Defining the pre-trained model to suit the current usecase, giving it the new number of labels,
# mapping of these labels to their appropriate IDs and vice verse, and to ignore the fact that it
# had previously been trained to distinguish between 1000 classes of objects
model = AutoModelForImageClassification.from_pretrained(pretrained_model_name_or_path = MODEL_ID, 
                                                        cache_dir = MODEL_FILE_PATH,
                                                        num_labels = len(labels),
                                                        id2label = id2label,
                                                        label2id = label2id,
                                                        ignore_mismatched_sizes = True)

In [None]:
# Pre-processing images

In [None]:
# Normalizing the images to avoid issues with extremes and outliers
normalize = Normalize(mean = feature_extractor.image_mean, std = feature_extractor.image_std)

In [None]:
# Expected image dimensions
feature_extractor.size

In [None]:
list(feature_extractor.size.values())

In [None]:
type(list(feature_extractor.size.values())[1])

In [None]:
# Data Augmentation

# Applying various random transformations to a limited set of images from the flowers dataset to produce
# a greater variety of 'new' images that can be trained upon, with the images now being bigger, smaller, 
# rotated every which way, lighter, darker, etc.
training_transformation = Compose([
    RandomResizedCrop(list(feature_extractor.size.values())[0]),
    RandomHorizontalFlip(),
    ToTensor(),
    normalize
])

# For the validation portion of the dataset, these images are then placed back into their proper formatting
# as inputs that will be fed into the model, meaning orienting them correctly, resizing them to an appropriate
# resolution, etc.
validation_transformation = Compose([
        Resize(list(feature_extractor.size.values())[0]),
        CenterCrop(list(feature_extractor.size.values())[0]),
        ToTensor(),
        normalize,
])

In [None]:
def transform_training_images(images):
    images["pixel_values"] = [training_transformation(image.convert("RGB")) for image in images["image"]]
    
    return images

In [None]:
def transform_validation_images(images):
    images["pixel_values"] = [validation_transformation(image.convert("RGB")) for image in images["image"]]
    
    return images

In [None]:
transformed_dataset = dataset.with_transform(transform_training_images)
transformed_dataset['train'] = dataset['train'].with_transform(transform_training_images)
transformed_dataset['validation'] = dataset['validation'].with_transform(transform_validation_images)
transformed_dataset['test'] = dataset['test'].with_transform(transform_validation_images)

In [None]:
# Transformed Images

In [None]:
transformed_test_image = training_transformation(test_image)

In [None]:
# PyTorch tensor format: (channels, rows, columns)
# Matplotlib tensor format: (rows, columns, channels)
# Rearranging of the test image's tensor's format is require to avoid an error
plt.imshow(transformed_test_image.permute(1, 2, 0))

In [None]:
transformed_test_image = validation_transformation(test_image)

In [None]:
plt.imshow(transformed_test_image.permute(1, 2, 0))

In [None]:
# Fromatting images for input
four_test_images = [transformed_dataset['train'][i] for i in range(4)]

In [None]:
four_test_images

In [None]:
four_test_images[0]

In [None]:
print(type(four_test_images))
print(type(four_test_images[0]))

In [None]:
for image in four_test_images:
    print(image['pixel_values'].shape)

In [None]:
four_test_images_labels = [image['label'] for image in four_test_images]

In [None]:
four_test_images_labels

In [None]:
# Labels must be formatted into the PyTorch tensor type
four_test_images_labels = torch.tensor([image['label'] for image in four_test_images])

In [None]:
four_test_images_labels

In [None]:
four_test_images_pixel_values = torch.stack([image['pixel_values'] for image in four_test_images])

In [None]:
# Correct input shape for tensorflow is (batch_size, number_of_channels, height_in_pixels, width_in_pixels)
four_test_images_pixel_values.shape

In [None]:
# Collating images together for batch processing
def batch_collate_images(images):
    labels = torch.tensor([image['label'] for image in images])
    pixel_values = torch.stack([image['pixel_values'] for image in images])
    return {'pixel_values': pixel_values, 'labels': labels}

In [None]:
training_dataloader = DataLoader(
    transformed_dataset['train'],
    batch_size = 4,
    collate_fn = batch_collate_images,
    shuffle = True)

validation_dataloader = DataLoader(
    transformed_dataset['validation'],
    batch_size = 4,
    collate_fn = batch_collate_images,
    shuffle = False)

testing_dataloader = DataLoader(
    transformed_dataset['test'],
    batch_size = 4,
    collate_fn = batch_collate_images,
    shuffle = False)

In [None]:
batch = next(iter(training_dataloader))

In [None]:
for key, value in batch.items():
    print(key, value.shape)

In [None]:
# Model Training Arguments

In [None]:
model_training_arguments = TrainingArguments(
    MODEL_NAME,
    evaluation_strategy = 'steps',
    learning_rate = 5e-5,
    per_device_train_batch_size = BATCH_SIZE,
    per_device_eval_batch_size = BATCH_SIZE,
    num_train_epochs = 5,
    load_best_model_at_end = True,
    metric_for_best_model = METRIC_NAME,
    remove_unused_columns = False,
    #logging_dir = './logs',
    push_to_hub = False
)

In [None]:
# Model Training

In [None]:
evaluation_metric = evaluate.load(METRIC_NAME, cache_dir = DATASET_FILE_PATH)

In [None]:
def compute_metrics(batch):
    return evaluation_metric.compute(
        references = batch.label_ids,
        predictions = np.argmax(batch.predictions, axis = 1))

In [None]:
model_trainer = Trainer(
    model = model,
    args = model_training_arguments,
    train_dataset = transformed_dataset['train'],
    eval_dataset = transformed_dataset['validation'],
    tokenizer = feature_extractor,
    data_collator = batch_collate_images,
    compute_metrics = compute_metrics
)

In [None]:
# TrainOutput
# global_step = 279
# training_loss = 0.14041148633512546
# metrics = {'train_runtime': 1247.4732, 'train_samples_per_second': 7.147, 'train_steps_per_second': 0.224, 
# 'total_flos': 6.909371568577659e+17, 'train_loss': 0.14041148633512546, 'epoch': 3.0}
model_trainer.train()

In [None]:
model_trainer.predict(transformed_dataset['test'])

In [None]:
# Model Evaluation

In [None]:
# eval_loss: 0.051226019859313965
# eval_runtime: 165.6979
# eval_samples_per_second: 17.936
# eval_steps_per_second: 0.561
# epoch: 3.0
model_trainer.evaluate(transformed_dataset['train'])

In [None]:
# eval_loss: 0.0882396474480629
# eval_runtime: 20.3162
# eval_samples_per_second: 18.064
# eval_steps_per_second: 0.591
# epoch: 3.0
model_trainer.evaluate(transformed_dataset['validation'])

In [None]:
# eval_loss: 0.0770476683974266
# eval_runtime: 20.3162
# eval_samples_per_second: 17.326
# eval_steps_per_second: 0.576
# epoch: 3.0
model_trainer.evaluate(transformed_dataset['test'])

In [None]:
model_trainer.save_model(FINETUNED_MODEL_FILE_PATH)

In [None]:
# Image Classification & Inference

In [None]:
# Utilizing argmax will return only the largest value associated with a class of flower
# Softmax, on the other hand, will return all of the associated probabilities for all classes of flowers
def classify_image(image):
    #tokenizer = AutoTokenizer.from_pretrained(FINETUNED_MODEL_FILE_PATH)
    model = AutoModelForImageClassification.from_pretrained(FINETUNED_MODEL_FILE_PATH).to('mps')
    feature_extractor = AutoFeatureExtractor.from_pretrained(FINETUNED_MODEL_FILE_PATH)
    inputs = feature_extractor(image, return_tensors = 'pt').to('mps')
    outputs = model(**inputs)
    
    # Argmax Approach
    #predictions = torch.argmax(outputs.logits, dim = -1).item()
    #return model.config.id2label[predictions]
    
    # Softmax Approach
    predictions = torch.nn.functional.softmax(outputs.logits, dim = -1)
    predictions = predictions[0].cpu().detach().numpy()
    confidences = {label: float(predictions[i]) for i, label in enumerate(labels)}
    return confidences
    

In [None]:
test_image2 = dataset['test'][-1]['image']

In [None]:
test_image2

In [None]:
classify_image(test_image2)

In [None]:
print(FINETUNED_MODEL_FILE_PATH)

In [None]:
# Model inputs and weights must be on the same device
model = AutoModelForImageClassification.from_pretrained(FINETUNED_MODEL_FILE_PATH).to('cpu')
feature_extractor = AutoFeatureExtractor.from_pretrained(FINETUNED_MODEL_FILE_PATH)

In [None]:
image_classifier = pipeline('image-classification', model = model, feature_extractor = feature_extractor)

In [None]:
image_classifier(test_image2)