In [None]:
!pip install torch torchvision transformers datasets accelerate scikit-learn


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import os
import json
import torch
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
from collections import Counter

from transformers import (
    ViTForImageClassification,
    ViTImageProcessor,
    Trainer,
    TrainingArguments,
    EvalPrediction
)

In [None]:
from transformers.trainer_utils import get_last_checkpoint
from google.colab import drive

from torchvision.datasets import ImageFolder
from torchvision import transforms
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import WeightedRandomSampler

In [None]:
# ‚úÖ Mount Drive
drive.mount('/content/drive')

# ‚úÖ Dataset paths
BASE_PATH = '/content/drive/MyDrive/MultiBanFake/Dataset'
TRAIN_DIR = os.path.join(BASE_PATH, 'Train')
VAL_DIR   = os.path.join(BASE_PATH, 'Validation')
TEST_DIR  = os.path.join(BASE_PATH, 'Test')


Mounted at /content/drive


In [None]:
# ‚úÖ Load processor
processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

In [None]:
# ‚úÖ Define transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=processor.image_mean, std=processor.image_std),
])

eval_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=processor.image_mean, std=processor.image_std),
])

In [None]:
# ‚úÖ Load datasets
train_dataset = ImageFolder(root=TRAIN_DIR, transform=train_transform)
val_dataset   = ImageFolder(root=VAL_DIR, transform=eval_transform)
test_dataset  = ImageFolder(root=TEST_DIR, transform=eval_transform)

label_names = train_dataset.classes
id2label = {i: label for i, label in enumerate(label_names)}
label2id = {label: i for i, label in enumerate(label_names)}

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        return {"pixel_values": image, "labels": label}

train_ds = CustomDataset(train_dataset)
val_ds   = CustomDataset(val_dataset)
test_ds  = CustomDataset(test_dataset)


In [None]:
# ‚úÖ Class weights and sampler
train_targets = train_dataset.targets
class_weights = compute_class_weight('balanced', classes=np.unique(train_targets), y=train_targets)
sample_weights = [class_weights[t] for t in train_targets]
sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

# ‚úÖ Metrics
def compute_metrics(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    report = classification_report(labels, preds, target_names=label_names, output_dict=True)
    print(json.dumps(report, indent=2))
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": report['weighted avg']['f1-score'],
    }

In [None]:
# ‚úÖ Load model
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=len(label_names),
    id2label=id2label,
    label2id=label2id,
)
model.gradient_checkpointing_enable()

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# ‚úÖ Training Arguments (compatible)
training_args = TrainingArguments(
    output_dir="./vit_mresults",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-5,
    warmup_steps=500,
    weight_decay=0.01,
    save_strategy="epoch",
    save_total_limit=1,
    logging_dir="./logs",
    logging_steps=10,
    fp16=True,
    do_train=True,
    do_eval=True,
    label_smoothing_factor=0.1
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=processor,
    compute_metrics=compute_metrics,
    data_collator=None
)


  trainer = Trainer(


In [None]:
# ‚úÖ Train with checkpoint support
checkpoint = get_last_checkpoint("./vit_mresults")
if checkpoint:
    print(f"Resuming from checkpoint: {checkpoint}")
    trainer.train(resume_from_checkpoint=checkpoint)
else:
    print("Starting training from scratch...")
    trainer.train()

Starting training from scratch...


Step,Training Loss
10,0.6926
20,0.6871
30,0.6863
40,0.6905
50,0.693
60,0.6829
70,0.6947
80,0.6912
90,0.7032
100,0.6994


Step,Training Loss
10,0.6926
20,0.6871
30,0.6863
40,0.6905
50,0.693
60,0.6829
70,0.6947
80,0.6912
90,0.7032
100,0.6994


In [None]:
# ‚úÖ Save final model
model.save_pretrained("/content/vit_final_model")

In [None]:
# ‚úÖ Evaluate and save results
preds = trainer.predict(test_ds)
pred_labels = np.argmax(preds.predictions, axis=1)
true_labels = preds.label_ids

pred_counts = dict(Counter(pred_labels))
print("\nüîç Predicted Class Distribution:", pred_counts)

report = classification_report(true_labels, pred_labels, target_names=label_names, digits=4)
print("\nüìä Fake vs Non-Fake Class Report:\n", report)

with open("vit_classification_report.json", "w") as f:
    json.dump(report, f, indent=4)

probs = torch.nn.functional.softmax(torch.tensor(preds.predictions), dim=-1).numpy().tolist()

with open("vit_stack_probs.json", "w") as f:
    json.dump({
        "vit_probs": probs,
        "true_labels": true_labels.tolist()
    }, f)

print("\n‚úÖ Saved model, classification report, and stacking probabilities.")

{
  "Fake": {
    "precision": 0.6676056338028169,
    "recall": 0.4968553459119497,
    "f1-score": 0.5697115384615384,
    "support": 477.0
  },
  "Real": {
    "precision": 0.6013289036544851,
    "recall": 0.7541666666666667,
    "f1-score": 0.6691312384473198,
    "support": 480.0
  },
  "accuracy": 0.625914315569488,
  "macro avg": {
    "precision": 0.634467268728651,
    "recall": 0.6255110062893081,
    "f1-score": 0.6194213884544291,
    "support": 957.0
  },
  "weighted avg": {
    "precision": 0.634363386706475,
    "recall": 0.625914315569488,
    "f1-score": 0.6195772187051904,
    "support": 957.0
  }
}

üîç Predicted Class Distribution: {np.int64(1): 602, np.int64(0): 355}

üìä Fake vs Non-Fake Class Report:
               precision    recall  f1-score   support

        Fake     0.6676    0.4969    0.5697       477
        Real     0.6013    0.7542    0.6691       480

    accuracy                         0.6259       957
   macro avg     0.6345    0.6255    0.6194  