In [1]:
pip install transformers datasets torch accelerate evaluate torchvision

Note: you may need to restart the kernel to use updated packages.


In [1]:
from datasets import load_dataset
from transformers import ViTForImageClassification, ViTImageProcessor, TrainingArguments, Trainer
import torch
from torchvision import transforms

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from datasets import load_dataset, DatasetDict

# Load a small subset directly instead of loading everything
dataset = load_dataset("itsLeen/deepfake_vs_real_image_detection", split="train[:200]")

# Split dataset into train (80%) and test (20%)
split_ratio = int(0.8 * len(dataset))
train_dataset = dataset.select(range(split_ratio))  # First 80%
test_dataset = dataset.select(range(split_ratio, len(dataset)))  # Last 20%

# Wrap into DatasetDict
dataset = DatasetDict({
    "train": train_dataset,
    "test": test_dataset
})

In [3]:
# Image Processor
processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")

In [4]:
# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),
    transforms.Normalize(mean=processor.image_mean, std=processor.image_std)
])

In [5]:
from datasets import Dataset

In [6]:
# Preprocess Function
def preprocess(example):
    example["pixel_values"] = transform(example["image"])
    return example

dataset = dataset.map(preprocess, remove_columns=["image"])
dataset.set_format("torch", columns=["pixel_values", "label"])

In [7]:
from transformers import ViTForImageClassification, ViTConfig

# Define new config for binary classification
config = ViTConfig.from_pretrained("google/vit-base-patch16-224", num_labels=2)

# Load model with updated config
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224",
    config=config,  # Use the new configuration
    ignore_mismatched_sizes=True
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
#training_args = TrainingArguments(
#    output_dir="./vit-deepfake",  
#    per_device_train_batch_size=4,  
#    per_device_eval_batch_size=4,  
#    gradient_accumulation_steps=4,  
#    num_train_epochs=3,  # Reduced for speed
#    save_strategy="epoch",
#    evaluation_strategy="epoch",
#    fp16=torch.cuda.is_available(),  
#    dataloader_num_workers=4,  # Use multiple CPU threads
#    logging_dir="./logs",
#    logging_steps=50,
#    report_to="none",
#    load_best_model_at_end=True,
#)
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./fine_tuned_vit_deepfake",
    per_device_train_batch_size=4,  # Adjust based on VRAM
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    fp16=True,  # Enable mixed precision
    save_total_limit=2,
    optim="adamw_torch",  # Use standard optimizer (not DeepSpeed)
    deepspeed=None,  # Explicitly disable DeepSpeed
)

In [11]:
# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=processor,  
)

  trainer = Trainer(


In [12]:
# Train
trainer.train()

AttributeError: 'HfTrainerDeepSpeedConfig' object has no attribute 'is_zero3'

In [None]:
# Save the fine-tuned model
save_path = "./fine_tuned_vit_deepfake"
model.save_pretrained(save_path)
processor.save_pretrained(save_path)

print(f"Model saved at: {save_path}")