In [None]:
!nvidia-smi

: 

In [None]:


import os
import torch

# Set CUDA_VISIBLE_DEVICES to a specific 7g.80gb MIG device
#os.environ["CUDA_VISIBLE_DEVICES"] = "1:0"  # Use MIG 7g.80gb Device 0 under GPU 6

# Check the available GPU devices
if torch.cuda.is_available():
    num_devices = torch.cuda.device_count()
    print(f"Number of available GPU devices: {num_devices}")
    for i in range(num_devices):
        device_name = torch.cuda.get_device_name(i)
        device_properties = torch.cuda.get_device_properties(i)
        total_memory = device_properties.total_memory / (1024 * 1024)  # Convert bytes to MB
        device = torch.device(f"cuda:{i}")
        print(f"Device {i}: {device}")
        print(f"Device {i}: {device_name}")
        print(f"  Total GPU Memory: {total_memory} MB")
else:
    print("CUDA is not available")

In [None]:
import os
from   datasets import load_dataset
import datasets


dataset_name = "food101"
path_dataset = "/raid/scratch/tuchsanai/food101"


dataset  = load_dataset(dataset_name, split="train[:1000]")

if not os.path.exists(path_dataset):
     dataset.save_to_disk(path_dataset)

# dataset            =  datasets.load_from_disk(path_dataset)
dataset            = dataset.shuffle(seed=42)
# Rename the 'label' column to 'labels'
dataset = dataset.rename_column("label", "labels")


train_val_dataset  = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset      = train_val_dataset["train"]
val_dataset        = train_val_dataset["test"]

In [None]:
dataset

In [None]:
train_val_dataset

In [None]:
train_dataset 

In [None]:
val_dataset  

In [None]:
labels = train_val_dataset["train"].features["labels"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoImageProcessor, AutoModelForImageClassification, TrainingArguments, Trainer
from torchvision import transforms


# Load the pre-trained model and image processor
model_name = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(model_name)
model = AutoModelForImageClassification.from_pretrained(model_name,num_labels=len(labels),id2label=id2label,label2id=label2id)



In [None]:
def transforms(examples):
    examples["pixel_values"] = [jitter(image.convert("RGB")) for image in examples["image"]]
    return examples

dataset.set_transform(transforms)

In [None]:
def train_transforms(example_batch):
    images = [jitter(x) for x in example_batch["image"]]
    labels = [x for x in example_batch["annotation"]]
    inputs = image_processor(images, labels)
    return inputs


def val_transforms(example_batch):
    images = [x for x in example_batch["image"]]
    labels = [x for x in example_batch["annotation"]]
    inputs = image_processor(images, labels)
    return inputs



In [None]:


# Preprocess the dataset
def preprocess_function(examples):
    inputs = image_processor(examples["image"], return_tensors="pt")
    inputs["labels"] = examples["labels"]
    return inputs



processed_dataset = train_val_dataset.map(preprocess_function, batched=True, num_proc=100)


In [None]:
processed_dataset 

In [None]:
model 

In [None]:
image_processor 

In [None]:
import numpy as np
import evaluate
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

metric = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

# Define the data collator
def collate_fn(examples):
    pixel_values = torch.stack([torch.tensor(example["pixel_values"]) for example in examples])
    labels = torch.tensor([example["labels"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}


# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    fp16=True,
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=64,
    gradient_accumulation_steps=1,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    logging_steps=10,
    load_best_model_at_end=True,
    push_to_hub=False,
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["test"] ,
    tokenizer=image_processor,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
   
)

# Train the model
trainer.train()

In [None]:


trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()