In [13]:
# Creating a Dataset Class for PyTorch (No Augmentation)

import os
from PIL import Image
from torch.utils.data import Dataset
from transformers import ViTImageProcessor

class FraudDataset(Dataset):
    def __init__(self, image_dir, processor, label_map, transform=None):
        self.image_paths = []
        self.labels = []
        self.processor = processor

        for label_name in os.listdir(image_dir):
            class_dir = os.path.join(image_dir, label_name)
            for fname in os.listdir(class_dir):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                    self.image_paths.append(os.path.join(class_dir, fname))
                    self.labels.append(label_map[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        label = self.labels[idx]
        processed = self.processor(images=image, return_tensors="pt")
        item = {key: val.squeeze(0) for key, val in processed.items()}
        item["labels"] = label
        return item


In [14]:
# Creating a Dataset Class with Augmentation for Fraud Images

from torchvision import transforms
from PIL import Image
import random

# Define transforms for augmentation
fraud_augmentation = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor()
])

class FraudDatasetWithAugmentation(Dataset):
    def __init__(self, image_dir, processor, label_map, augment_fraud=False):
        self.image_paths = []
        self.labels = []
        self.processor = processor
        self.augment_fraud = augment_fraud

        for label_name in os.listdir(image_dir):
            class_dir = os.path.join(image_dir, label_name)
            for fname in os.listdir(class_dir):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                    self.image_paths.append(os.path.join(class_dir, fname))
                    self.labels.append(label_map[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(path).convert("RGB")

        # Only augment Fraud images during training
        if self.augment_fraud and label == 1:
            image = fraud_augmentation(image)

            # Re-process to match expected ViT input
            processed = self.processor(images=image, return_tensors="pt", do_rescale=False)
        else:
            processed = self.processor(images=image, return_tensors="pt")

        item = {key: val.squeeze(0) for key, val in processed.items()}
        item["labels"] = label
        return item

In [15]:
# Class Weights for Imbalanced Dataset

from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import torch
import torch.nn as nn

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array([0, 1]),
    y=[0]*4000 + [1]*160
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

weights = torch.tensor(class_weights, dtype=torch.float).to(device)
criterion = nn.CrossEntropyLoss(weight=weights)

Using device: cpu


In [16]:
# Loading Data

from torch.utils.data import DataLoader
from transformers import ViTForImageClassification

# Define class names → labels
label_map = {"Non-Fraud": 0, "Fraud": 1}

# Load processor
processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")

# Create datasets
train_dataset = FraudDataset("data/train", processor, label_map)
val_dataset = FraudDataset("data/val", processor, label_map)

# Create dataloaders
train_loader = DataLoader(
    train_dataset, 
    batch_size=32,
    shuffle=True,
    num_workers=0,        # Set to 0 to disable multiprocessing
    pin_memory=False      # Disable pin_memory
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=32,
    shuffle=False,
    num_workers=0,        # Set to 0 to disable multiprocessing  
    pin_memory=False      # Disable pin_memory
)

In [17]:
import torch
from torch import nn
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from transformers import ViTForImageClassification
import time

# Configurations
num_epochs = 10
batch_size = 32
max_grad_norm = 1.0

# Loading Model
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224",
    num_labels=len(label_map),
    id2label={v: k for k, v in label_map.items()},
    label2id=label_map,
    ignore_mismatched_sizes=True
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5, weight_decay=0.01, eps=1e-8)
criterion = nn.CrossEntropyLoss(weight=weights)

# Learning rate scheduler with warmup

total_steps = len(train_loader) * num_epochs
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=100,
    num_training_steps=total_steps
)

# Training Loop
train_losses = []
val_losses = []
best_val_auc = 0.0
patience = 3
patience_counter = 0

print(f"Training for {num_epochs} epochs")
print(f"Total training steps: {total_steps}")
print(f"Class weights - Non-fraud: {weights[0]:.3f}, Fraud: {weights[1]:.3f}")

for epoch in range(num_epochs):
    epoch_start_time = time.time()

    # ===== TRAINING PHASE =====
    model.train()
    total_loss = 0
    train_preds = []
    train_labels = []

    train_pbar = tqdm(
        train_loader, 
        desc=f"Epoch {epoch+1}/{num_epochs} [TRAIN]",
        leave=True,
        ncols=100,
        unit="batch"
    )

    batch_losses = []
    for batch_idx, batch in enumerate(train_pbar):
        batch_start_time = time.time()

        inputs = {k: v.to(device) for k, v in batch.items() if k != "labels"}
        labels = batch["labels"].to(device)

        optimizer.zero_grad()
        outputs = model(**inputs)
        loss = criterion(outputs.logits, labels)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()

        batch_loss = loss.item()
        total_loss += batch_loss
        batch_losses.append(batch_loss)

        with torch.no_grad():
            probs = torch.softmax(outputs.logits, dim=-1)
            preds = torch.argmax(probs, dim=-1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(labels.cpu().numpy())

        # Calculate batch time and ETA
        batch_time = time.time() - batch_start_time

        # Update progress bar with detailed info
        train_pbar.set_postfix({
            'Loss': f"{batch_loss:.4f}",
            'Avg': f"{np.mean(batch_losses[-10:]):.4f}",  # Running average of last 10
            'LR': f"{scheduler.get_last_lr()[0]:.1e}",
            'Time': f"{batch_time:.2f}s"
        })

    avg_loss = total_loss / len(train_loader)
    train_losses.append(avg_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_loss:.4f}")

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using device: cpu
Training for 10 epochs
Total training steps: 1300
Class weights - Non-fraud: 0.520, Fraud: 13.000


Epoch 1/10 [TRAIN]: 100%|█| 130/130 [17:37<00:00,  8.14s/batch, Loss=0.1273, Avg=0.7869, LR=1.9e-05,


Epoch 1/10, Training Loss: 0.6392


Epoch 2/10 [TRAIN]: 100%|█| 130/130 [11:53<00:00,  5.49s/batch, Loss=0.0106, Avg=0.0279, LR=1.7e-05,


Epoch 2/10, Training Loss: 0.2799


Epoch 3/10 [TRAIN]: 100%|█| 130/130 [10:52<00:00,  5.02s/batch, Loss=0.0040, Avg=0.0475, LR=1.5e-05,


Epoch 3/10, Training Loss: 0.0601


Epoch 4/10 [TRAIN]: 100%|█| 130/130 [11:24<00:00,  5.27s/batch, Loss=0.0014, Avg=0.0057, LR=1.3e-05,


Epoch 4/10, Training Loss: 0.0155


Epoch 5/10 [TRAIN]: 100%|█| 130/130 [11:30<00:00,  5.31s/batch, Loss=0.0005, Avg=0.0007, LR=1.1e-05,


Epoch 5/10, Training Loss: 0.0022


Epoch 6/10 [TRAIN]: 100%|█| 130/130 [11:20<00:00,  5.23s/batch, Loss=0.0003, Avg=0.0004, LR=8.7e-06,


Epoch 6/10, Training Loss: 0.0006


Epoch 7/10 [TRAIN]: 100%|█| 130/130 [14:03<00:00,  6.49s/batch, Loss=0.0002, Avg=0.0004, LR=6.5e-06,


Epoch 7/10, Training Loss: 0.0004


Epoch 8/10 [TRAIN]: 100%|█| 130/130 [14:58<00:00,  6.91s/batch, Loss=0.0001, Avg=0.0003, LR=4.3e-06,


Epoch 8/10, Training Loss: 0.0003


Epoch 9/10 [TRAIN]: 100%|█| 130/130 [11:11<00:00,  5.16s/batch, Loss=0.0002, Avg=0.0003, LR=2.2e-06,


Epoch 9/10, Training Loss: 0.0003


Epoch 10/10 [TRAIN]: 100%|█| 130/130 [11:48<00:00,  5.45s/batch, Loss=0.0003, Avg=0.0003, LR=0.0e+00

Epoch 10/10, Training Loss: 0.0003





In [19]:
# Save model checkpoint
torch.save(model.state_dict(), "vit_base_pre_tuning.pth")
print("Model saved as vit_base_pre_tuning.pth")

Model saved as vit_base_pre_tuning.pth


In [20]:
# Validation Loop

model.eval()
total_val_loss = 0
val_preds = []
val_labels = []
val_probs = []

val_pbar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation", leave=False)

with torch.no_grad():
    for batch in val_loader:
        inputs = {k: v.to(device) for k, v in batch.items() if k != "labels"}
        labels = batch["labels"].to(device)

        outputs = model(**inputs)
        loss = criterion(outputs.logits, labels)
        total_val_loss += loss.item()

        probs = torch.softmax(outputs.logits, dim=-1)
        preds = torch.argmax(probs, dim=-1)

        val_preds.extend(preds.cpu().numpy())
        val_labels.extend(labels.cpu().numpy())
        val_probs.extend(probs[:, 1].cpu().numpy())  # Probability of the positive class

        # Update progress bar
        val_pbar.set_postfix({
            'val_loss': f"{loss.item():.4f}"
        })

avg_val_loss = total_val_loss / len(val_loader)
val_losses.append(avg_val_loss)
print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {avg_val_loss:.4f}")

                                                                                 

Epoch 10/10, Validation Loss: 0.0861


In [21]:
# Calculate AUC
val_auc = roc_auc_score(val_labels, val_probs)

print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {avg_val_loss:.4f}, Validation AUC: {val_auc:.4f}")

print("\nClassification Report:")
print(" ")
print(classification_report(val_labels, val_preds, target_names=label_map.keys()))

Epoch 10/10, Validation Loss: 0.0861, Validation AUC: 0.9840

Classification Report:
 
              precision    recall  f1-score   support

   Non-Fraud       0.99      1.00      0.99      1000
       Fraud       0.91      0.80      0.85        40

    accuracy                           0.99      1040
   macro avg       0.95      0.90      0.92      1040
weighted avg       0.99      0.99      0.99      1040



In [22]:
# Model Saving (Approach 1)

torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'val_auc': best_val_auc
}, "best_model.pth")