In [3]:
import numpy as np
import matplotlib.pyplot as plt
from helper_methods import get_train_val, create_dataset, show_images, load_and_transform_image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision.models import ResNet18_Weights
from torch.utils.data import TensorDataset, DataLoader
from transformers import AutoImageProcessor, ViTForImageClassification
from transformers import ViTImageProcessor




In [None]:
# Step 1: Uninstall existing libraries
#pip uninstall transformers accelerate -y

# Step 2: Install required libraries
#pip install transformers[torch] accelerate

# Restart the runtime manually via the "Runtime" menu or by running:
import os
os.kill(os.getpid(), 9)

: 

In [4]:
df_train, df_val = get_train_val(filepath='C:\\Users\\alexa\\Documents\\Deep Learning\\Project\\datasets\\annotations\\annotations\\trainval.txt', val_size=0.2)
# C:\\Users\\alexa\\Documents\\Deep Learning\\Project\\datasets\\images\\images\\

X_train, Y_train = create_dataset(df_train, base_path='C:\\Users\\alexa\\Documents\\Deep Learning\\Project\\datasets\\images\\images\\')
X_val, Y_val = create_dataset(df_val, base_path='C:\\Users\\alexa\\Documents\\Deep Learning\\Project\\datasets\\images\\images\\')

Y_train = Y_train.long()
Y_val = Y_val.long()

In [5]:
from datasets import Dataset

print(type(df_train))
print(type(df_val))
train_dataset = Dataset.from_pandas(df_train)
val_dataset = Dataset.from_pandas(df_val)
print(type(train_dataset))
print(type(val_dataset))
print(train_dataset)


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'datasets.arrow_dataset.Dataset'>
<class 'datasets.arrow_dataset.Dataset'>
Dataset({
    features: ['image_id', 'class_id', 'species', 'breed', '__index_level_0__'],
    num_rows: 2944
})


In [6]:
model_name_or_path = 'google/vit-base-patch16-224-in21k'
from transformers import ViTForImageClassification
labels = torch.unique(Y_train)
labels = labels.tolist()
model = ViTForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=torch.max(Y_train) + 1,
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
from transformers import ViTImageProcessor
processor = ViTImageProcessor.from_pretrained(model_name_or_path)

def transform(example_batch):
    #print(example_batch) # Normally we want a tensor of 'pixel_values': tensor and 'labels': AS INPUTS
    # Take a list of PIL images and turn them to pixel values
    #inputs = processor([x for x in example_batch['image']], return_tensors='pt')
    images_tensors = []
    #Y = []
    base_path = "C:\\Users\\alexa\\Documents\\Deep Learning\\Project\\datasets\\images\\images\\"
    for im_id in example_batch['image_id']:
        #print("APOEL: ", im_id)
        #image_id, species_id = row[0], row[2]
        image_path = f"{base_path}{im_id}.jpg"  # Adjust format as needed
        image_tensor = load_and_transform_image(image_path)
        images_tensors.append(image_tensor)
    #    Y.append(class_id)
    
    # Stack all tensors to create a single tensor
    inputs = {}
    inputs['pixel_values'] = torch.stack(images_tensors)
    # Don't forget to include the labels!
    inputs['labels'] = example_batch['class_id']
    return inputs
prepared_df_train = train_dataset.with_transform(transform)
prepared_df_train = val_dataset.with_transform(transform)


In [8]:
#collate_fn,compute_metrics,prepared_ds["train"],prepared_ds["validation"],processor
#from transformers import ViTImageProcessor
#model_name_or_path = 'google/vit-base-patch16-224-in21k'
#processor = ViTImageProcessor.from_pretrained(model_name_or_path)
#def transform(example_batch):
#    # Take a list of PIL images and turn them to pixel values
#    inputs = processor([x for x in example_batch['image']], return_tensors='pt')
#    # Don't forget to include the labels!
#    inputs['labels'] = example_batch['labels']
#    return inputs
#prepared_ds = ds.with_transform(transform)

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }
from datasets import load_metric
metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)


  metric = load_metric("accuracy")
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [9]:
import transformers
import accelerate

print(f"Transformers version: {transformers.__version__}")
print(f"Accelerate version: {accelerate.__version__}")

Transformers version: 4.40.2
Accelerate version: 0.30.1


In [16]:

from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="C:\\Users\\alexa\\Documents\\Deep Learning\\Project\\outputs",
  per_device_train_batch_size=4, # 16
  evaluation_strategy="steps",
  num_train_epochs=4,
  fp16=True,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
  load_best_model_at_end=True,
)
  

In [17]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    # <class 'datasets.arrow_dataset.Dataset'> should be given while we give <class 'pandas.core.frame.DataFrame'>
    #train_dataset=prepared_ds["train"], 
    train_dataset=prepared_df_train, #df_train, train_dataset

    #eval_dataset=prepared_ds["validation"],
    eval_dataset=prepared_df_train, #df_val, val_dataset
    tokenizer=processor,
)


In [18]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()
#metrics = trainer.evaluate(prepared_ds['validation'])
metrics = trainer.evaluate(val_dataset) #df_val

trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

  0%|          | 0/184 [00:53<?, ?it/s]
  0%|          | 0/736 [00:00<?, ?it/s]

RuntimeError: GET was unable to find an engine to execute this computation