In [1]:
import sys
import os
from dotenv import load_dotenv
load_dotenv()
ROOT_DIR_PATH = os.environ.get('ROOT_PATH')
sys.path.append(os.path.abspath(ROOT_DIR_PATH))  # Adds root directory to sys.path

In [2]:
from utils.data_loader import DatasetLoader
from utils.config_loader import load_config
print('loading config')
# Load config
config = load_config(f"{ROOT_DIR_PATH}/config/vit_config.yaml")

# *************  choosing the DATASET & MODEL *************

dataset_config = config["data"]['CALTECH256']
trainingConfig = config['training_dummy']

# **********************************************************

# data
DATASET = dataset_config["dataset"]
DATA_DIR =f'{ROOT_DIR_PATH}/data/{DATASET}/'
BATCH = dataset_config["batch_size"]
NUM_WORKERS = dataset_config["num_workers"]
IMAGE = dataset_config["img_size"]
NUM_CLASSES = dataset_config["num_classes"]
CHANNELS = dataset_config["channels"]
if DATASET == 'TINYIMAGENET200':
    SUBSET_ENABLED = dataset_config['subset_enabled']
    SUBSET_SIZE = dataset_config['subset_size']

# loading data
print(f'loading dataset : {DATASET}')
loader = DatasetLoader(training_config=trainingConfig,
                        dataset_name=DATASET,
                        data_dir=DATA_DIR,
                        batch_size=BATCH,
                        num_workers=NUM_WORKERS,
                        img_size=IMAGE)
train_loader, val_loader = loader.get_loaders()
print(f"Train batches: {len(train_loader)}, Validation batches: {len(val_loader)}")
print('data sanity check')
for images, labels in train_loader:
    print(f'image shape and labels shape in training data - one batch : {images.shape}, {labels.shape}')
    break

loading config
loading dataset : CALTECH256
Dataset directory /home/wd/Documents/work_stuff/ViT_REPLICATION/data/CALTECH256 already exists, zip downloaded.
Dataset directory /home/wd/Documents/work_stuff/ViT_REPLICATION/data/CALTECH256 already exists, zip downloaded.
training size  : 100
validation size : 6056
Subset contains 10 unique classes
Sample label: 0
Train batches: 1, Validation batches: 48
data sanity check
image shape and labels shape in training data - one batch : torch.Size([100, 3, 224, 224]), torch.Size([100])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import transforms, datasets
from timm.models.vision_transformer import vit_tiny_patch16_224
import os
import gc
from collections import defaultdict

torch.cuda.empty_cache()
torch.cuda.ipc_collect()
gc.collect()

from timm.models.vision_transformer import vit_tiny_patch16_224

BATCH_SIZE = 32
NUM_CLASSES = 10
EPOCHS = 25
LR = 0.001
IMG_SIZE = 224

# ========== Model ==========
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = vit_tiny_patch16_224(pretrained=False, num_classes=NUM_CLASSES).to(DEVICE)

# ========== Loss & Optimizer ==========
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

# ========== Train Loop ==========
for epoch in range(EPOCHS):
    model.train()
    total, correct = 0, 0
    for images, labels in train_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = 100. * correct / total
    print(f"Epoch {epoch+1:02d} | Train Acc: {acc:.2f}%")

    # stop early if we hit 100% train acc
    if acc == 100.0:
        print("Model has successfully overfitted the subset.")
        break


  from .autonotebook import tqdm as notebook_tqdm


Epoch 01 | Train Acc: 5.00%
Epoch 02 | Train Acc: 20.00%
Epoch 03 | Train Acc: 12.00%
Epoch 04 | Train Acc: 21.00%
Epoch 05 | Train Acc: 31.00%
Epoch 06 | Train Acc: 33.00%
Epoch 07 | Train Acc: 34.00%
Epoch 08 | Train Acc: 36.00%
Epoch 09 | Train Acc: 35.00%
Epoch 10 | Train Acc: 35.00%
Epoch 11 | Train Acc: 35.00%
Epoch 12 | Train Acc: 39.00%
Epoch 13 | Train Acc: 46.00%
Epoch 14 | Train Acc: 41.00%
Epoch 15 | Train Acc: 43.00%
Epoch 16 | Train Acc: 46.00%
Epoch 17 | Train Acc: 46.00%
Epoch 18 | Train Acc: 49.00%
Epoch 19 | Train Acc: 48.00%
Epoch 20 | Train Acc: 55.00%
Epoch 21 | Train Acc: 52.00%
Epoch 22 | Train Acc: 55.00%
Epoch 23 | Train Acc: 58.00%
Epoch 24 | Train Acc: 56.00%
Epoch 25 | Train Acc: 59.00%


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import transforms, datasets
from timm.models.vision_transformer import vit_tiny_patch16_224
import os
import gc
from collections import defaultdict

torch.cuda.empty_cache()
torch.cuda.ipc_collect()
gc.collect()

BATCH_SIZE = 32
NUM_CLASSES = 10
EPOCHS = 25
LR = 0.001
IMG_SIZE = 224

# ========== Model ==========
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class TwoLayerCNN(nn.Module):
    def __init__(self, num_classes, img_size):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # (B, 32, 64, 64)
            nn.ReLU(),
            nn.MaxPool2d(2),  # (B, 32, 32, 32)
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # (B, 64, 32, 32)
            nn.ReLU(),
            nn.MaxPool2d(2),  # (B, 64, 16, 16)
            nn.Flatten(),  # (B, 64*16*16)
            nn.Linear(64 * (img_size // 4) * (img_size // 4), 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.model(x)
model = TwoLayerCNN(num_classes=NUM_CLASSES, img_size=IMG_SIZE).to(DEVICE)

# ========== Loss & Optimizer ==========
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

# ========== Train Loop ==========
for epoch in range(EPOCHS):
    model.train()
    total, correct = 0, 0
    for images, labels in train_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = 100. * correct / total
    print(f"Epoch {epoch+1:02d} | Train Acc: {acc:.2f}%")

    # stop early if we hit 100% train acc
    if acc == 100.0:
        print("Model has successfully overfitted the subset.")
        break


  from .autonotebook import tqdm as notebook_tqdm


Epoch 01 | Train Acc: 8.00%
Epoch 02 | Train Acc: 18.00%
Epoch 03 | Train Acc: 25.00%
Epoch 04 | Train Acc: 32.00%
Epoch 05 | Train Acc: 36.00%
Epoch 06 | Train Acc: 43.00%
Epoch 07 | Train Acc: 50.00%
Epoch 08 | Train Acc: 60.00%
Epoch 09 | Train Acc: 77.00%
Epoch 10 | Train Acc: 80.00%
Epoch 11 | Train Acc: 94.00%
Epoch 12 | Train Acc: 91.00%
Epoch 13 | Train Acc: 94.00%
Epoch 14 | Train Acc: 96.00%
Epoch 15 | Train Acc: 97.00%
Epoch 16 | Train Acc: 99.00%
Epoch 17 | Train Acc: 100.00%
Model has successfully overfitted the subset.


# Mean / STD DEV

In [4]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

# Path to your Caltech256 training set (change if needed)
data_path = "/home/wd/Documents/work_stuff/ViT_REPLICATION/data/CALTECH256/train"

# Resize and convert to tensor (no normalization yet)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to a fixed size
    transforms.ToTensor()
])

# Load dataset
dataset = datasets.ImageFolder(root=data_path, transform=transform)
loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=2)

# Initialize sums
mean = 0.
std = 0.
nb_samples = 0.

# Accumulate mean and std across batches
for data, _ in tqdm(loader, desc="Computing mean/std"):
    batch_samples = data.size(0)
    data = data.view(batch_samples, data.size(1), -1)  # [B, C, H*W]
    mean += data.mean(2).sum(0)  # Sum of mean across batch
    std += data.std(2).sum(0)    # Sum of std across batch
    nb_samples += batch_samples

# Final average
mean /= nb_samples
std /= nb_samples

print(f"\nDataset Mean: {mean.tolist()}")
print(f"Dataset Std : {std.tolist()}")

Computing mean/std: 100%|██████████| 742/742 [00:32<00:00, 23.05it/s]


Dataset Mean: [0.5531906485557556, 0.5342377424240112, 0.5071621537208557]
Dataset Std : [0.23687367141246796, 0.2358572781085968, 0.2385127693414688]



