# One Epoch

In [33]:
import sys
import os
from dotenv import load_dotenv
load_dotenv()
ROOT_DIR_PATH = os.environ.get('ROOT_PATH')
sys.path.append(os.path.abspath(ROOT_DIR_PATH))  # Adds root directory to sys.path

from utils.config_loader import load_config
from utils.data_loader import DatasetLoader

import torch
from model.vit import VisionTransformerSmall
from tqdm import tqdm

import torch.nn as nn
import torch.optim as optim

In [3]:
# Load config
config = load_config(f"{ROOT_DIR_PATH}/config/vit_config.yaml")
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [10]:
### Data
print('loading training testing data')
# loading cifar100
cifar100_config = config["data"]['CIFAR100']
DATASET = cifar100_config["dataset"]
DATA_DIR = cifar100_config["data_path"]
BATCH = cifar100_config["batch_size"]
NUM_WORKERS = cifar100_config["num_workers"]
IMAGE = cifar100_config["img_size"]

# loading data
print(f'loading dataset : {DATASET}')
loader = DatasetLoader(dataset_name=DATASET,
                        data_dir=DATA_DIR,
                        batch_size=BATCH,
                        num_workers=NUM_WORKERS,
                        img_size=IMAGE)
train_loader, val_loader = loader.get_loaders()
print(f"Count of Train batches: {len(train_loader)}, Count of Validation batches: {len(val_loader)}")
print(f'Count of Train Images in all {len(train_loader)} batches, {len(train_loader.dataset)}')
print(f'Count of Validation Images in all {len(val_loader)} batches, {len(val_loader.dataset)}')
print('data sanity check')
for images, labels in train_loader:
    print('Training Data : Bacth 1')
    print(f'Image Tensor shape and Label Tensor shape : {images.shape}, {labels.shape}')
    print(f'There are {images.shape[0]} images in this Batch and total of {labels.shape[0]} Labels corresponding to those images.')
    break


loading training testing data
loading dataset : CIFAR100
Count of Train batches: 782, Count of Validation batches: 157
Count of Train Images in all 782 batches, 50000
Count of Validation Images in all 157 batches, 10000
data sanity check
Training Data : Bacth 1
Image Tensor shape and Label Tensor shape : torch.Size([64, 3, 32, 32]), torch.Size([64])
There are 64 images in this Batch and total of 64 Labels corresponding to those images.


In [34]:
### Model
modelConfig = config["model"]
vitSmall_config = modelConfig['VIT_SMALL']
MODEL_NAME = vitSmall_config["name"]
NUM_CLASSES = vitSmall_config["num_classes"]

# training config
trainingConfig = config['training']
LEARNING_RATE = trainingConfig['lr']
EPOCHS = trainingConfig['epochs']
WEIGHT_DECAY = trainingConfig['weight_decay']
# # mixup config
# mixupConig = trainingConfig['mixup']
# MIXUP_ALPHA = mixupConig["mixup_alpha"]
# CUTMIX_ALPHA = mixupConig["cutmix_alpha"]
# LABEL_SMOOTHENING = mixupConig["label_smoothing"]
# USE_MIXUP = mixupConig["enabled"]


In [15]:
print(config['model']['VIT_SMALL'])
model = VisionTransformerSmall(config).to(device)

{'name': 'vit_small', 'img_size': 32, 'patch_size': 4, 'in_channels': 3, 'emb_size': 128, 'depth': 4, 'num_heads': 2, 'mlp_ratio': 2.0, 'num_classes': 100, 'dropout': 0.4}


## train

In [16]:
model.train()
running_loss = 0.0
correct = 0
total = 0

In [40]:
progress_bar = tqdm(train_loader, desc="Training", leave=True)

Training:   0%|          | 0/782 [00:00<?, ?it/s]

In [41]:
len(progress_bar)

782

In [31]:
mixup_fn = None

In [None]:
for  inputs, targets in progress_bar:
    print(f'input shape : {inputs.shape}, taget_shape : {targets.shape}')
    print(targets.ndim)


Training:   0%|          | 0/782 [02:49<?, ?it/s]

input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1





In [37]:
inputs.shape

torch.Size([64, 3, 32, 32])

In [42]:
model = VisionTransformerSmall(config).to(device)

train_criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
val_criterion = train_criterion  # same
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

for  inputs, targets in progress_bar:
    print(f'input shape : {inputs.shape}, taget_shape : {targets.shape}')
    print(targets.ndim)
    inputs, targets = inputs.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = train_criterion(outputs, targets)
    
    break

Training:   0%|          | 0/782 [00:15<?, ?it/s]

input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1





In [47]:
inputs.shape

torch.Size([64, 3, 32, 32])

In [45]:
outputs.shape

torch.Size([64, 100])

In [51]:
_, predicted = outputs.max(1)
predicted.shape

torch.Size([64])

In [52]:
predicted.eq(targets)

tensor([False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False], device='cuda:0')

In [55]:
predicted.eq(targets).sum()

tensor(0, device='cuda:0')

In [56]:
loss

tensor(4.7559, device='cuda:0', grad_fn=<AddBackward0>)

In [61]:
model = VisionTransformerSmall(config).to(device)

train_criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
val_criterion = train_criterion  # same
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

for  inputs, targets in progress_bar:
    print(f'input shape : {inputs.shape}, taget_shape : {targets.shape}')
    print(targets.ndim)
    inputs, targets = inputs.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = train_criterion(outputs, targets)
    loss.backward()
    optimizer.step()

    running_loss += loss.item() * inputs.size(0)
    
    _, predicted = outputs.max(1)
    correct += predicted.eq(targets).sum().item()
    total += targets.size(0)
    # Update progress bar with metrics
    if total > 0:
        # these are losses, total image size, corrects - so far since the beginnning.
        avg_loss = running_loss / total
        accuracy = 100. * correct / total
        progress_bar.set_postfix({
            "Loss": f"{avg_loss:.4f}",
            "Acc": f"{accuracy:.2f}%"
        })
    else : raise Exception(f'Expected non-zero batch size, but got 0 targets. Check if the dataset is empty or DataLoader is misconfigured.')


input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape : torch.Size([64, 3, 32, 32]), taget_shape : torch.Size([64])
1
input shape 

In [62]:
avg_loss

4.410562469787598

In [63]:
accuracy

3.496

### fresh 

In [9]:
def train_one_epoch(model, loader, criterion, optimizer, device, mixup_fn=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    progress_bar = tqdm(loader, desc="Training", leave=True)
    for  inputs, targets in progress_bar:
        #print(f'input shape : {inputs.shape}, taget_shape : {targets.shape}, target dim : {targets.ndim}')
        inputs, targets = inputs.to(device), targets.to(device)
        if mixup_fn is not None:
            inputs, targets = mixup_fn(inputs, targets)

        if targets.ndim == 2:
            targets = targets.type_as(inputs)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

        if targets.ndim == 2:
            # MixUp with soft labels
            _, predicted = outputs.max(1)
            _, true_classes = targets.max(1)  # Take argmax of soft labels as true class
            correct += predicted.eq(true_classes).sum().item()
            total += targets.size(0)
        else :
            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()
            total += targets.size(0)

        # Update progress bar with metrics
        if total > 0:
            avg_loss = running_loss / total
            accuracy = 100. * correct / total
            progress_bar.set_postfix({
                "Loss": f"{avg_loss:.4f}",
                "Acc": f"{accuracy:.2f}%"
            })

        else : raise Exception(f'Expected non-zero batch size, but got 0 targets. Check if the dataset is empty or DataLoader is misconfigured.')

    
    return avg_loss, accuracy

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(loader, desc="Validation", leave=True)
    with torch.no_grad():
        for inputs, labels in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # If labels are soft (e.g., using BCEWithLogitsLoss), convert to float
            if labels.ndim == 2:
                labels = labels.type_as(inputs)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            # Compute accuracy
            _, predicted = outputs.max(1)

            if labels.ndim == 2:
                # Soft labels → convert to class index
                _, true_classes = labels.max(1)
                correct += predicted.eq(true_classes).sum().item()

            else:
                # Hard labels
                correct += predicted.eq(labels).sum().item()


            total += labels.size(0)

            # Avoid division by zero on first step
            if total > 0:
                avg_loss = running_loss / total
                accuracy = 100. * correct / total

                progress_bar.set_postfix({
                    "Loss": f"{avg_loss:.4f}",
                    "Acc": f"{accuracy:.2f}%"
                })
                
    return avg_loss, accuracy

In [2]:
import sys
import os
from dotenv import load_dotenv
load_dotenv()
ROOT_DIR_PATH = os.environ.get('ROOT_PATH')
sys.path.append(os.path.abspath(ROOT_DIR_PATH))  # Adds root directory to sys.path

from utils.config_loader import load_config
from utils.data_loader import DatasetLoader

import torch
from model.vit import VisionTransformerSmall
from tqdm import tqdm

import torch.nn as nn
import torch.optim as optim
# Load config
config = load_config(f"{ROOT_DIR_PATH}/config/vit_config.yaml")
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

### Data
print('loading training testing data')
# loading cifar100
cifar100_config = config["data"]['CIFAR100']
DATASET = cifar100_config["dataset"]
DATA_DIR = cifar100_config["data_path"]
BATCH = cifar100_config["batch_size"]
NUM_WORKERS = cifar100_config["num_workers"]
IMAGE = cifar100_config["img_size"]

# loading data
print(f'loading dataset : {DATASET}')
loader = DatasetLoader(dataset_name=DATASET,
                        data_dir=DATA_DIR,
                        batch_size=BATCH,
                        num_workers=NUM_WORKERS,
                        img_size=IMAGE)
train_loader, val_loader = loader.get_loaders()
print(f"Count of Train batches: {len(train_loader)}, Count of Validation batches: {len(val_loader)}")
print(f'Count of Train Images in all {len(train_loader)} batches, {len(train_loader.dataset)}')
print(f'Count of Validation Images in all {len(val_loader)} batches, {len(val_loader.dataset)}')
print('data sanity check')
for images, labels in train_loader:
    print('Training Data : Bacth 1')
    print(f'Image Tensor shape and Label Tensor shape : {images.shape}, {labels.shape}')
    print(f'There are {images.shape[0]} images in this Batch and total of {labels.shape[0]} Labels corresponding to those images.')
    break

### Model
modelConfig = config["model"]
vitSmall_config = modelConfig['VIT_SMALL']
MODEL_NAME = vitSmall_config["name"]
NUM_CLASSES = vitSmall_config["num_classes"]

# training config
trainingConfig = config['training']
LEARNING_RATE = trainingConfig['lr']
EPOCHS = trainingConfig['epochs']
WEIGHT_DECAY = trainingConfig['weight_decay']
# # mixup config
# mixupConig = trainingConfig['mixup']
# MIXUP_ALPHA = mixupConig["mixup_alpha"]
# CUTMIX_ALPHA = mixupConig["cutmix_alpha"]
# LABEL_SMOOTHENING = mixupConig["label_smoothing"]
# USE_MIXUP = mixupConig["enabled"]


Using device: cuda
loading training testing data
loading dataset : CIFAR100
Count of Train batches: 782, Count of Validation batches: 157
Count of Train Images in all 782 batches, 50000
Count of Validation Images in all 157 batches, 10000
data sanity check
Training Data : Bacth 1
Image Tensor shape and Label Tensor shape : torch.Size([64, 3, 32, 32]), torch.Size([64])
There are 64 images in this Batch and total of 64 Labels corresponding to those images.


In [10]:
model = VisionTransformerSmall(config).to(device)

train_criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
val_criterion = train_criterion  # same
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

mixup_fn = None

train_loss, train_acc = train_one_epoch(model, train_loader, train_criterion, optimizer, device, mixup_fn=mixup_fn)
val_loss, val_acc = validate(model, val_loader, val_criterion, device)

Training: 100%|██████████| 782/782 [00:17<00:00, 44.83it/s, Loss=4.4036, Acc=3.70%]
Validation: 100%|██████████| 157/157 [00:01<00:00, 108.79it/s, Loss=4.3701, Acc=3.69%]


# MixUp / CutMix

In [1]:
import sys
import os
from dotenv import load_dotenv
load_dotenv()
ROOT_DIR_PATH = os.environ.get('ROOT_PATH')
sys.path.append(os.path.abspath(ROOT_DIR_PATH))  # Adds root directory to sys.path

from utils.config_loader import load_config
from utils.data_loader import DatasetLoader

import torch
from model.vit import VisionTransformerSmall
from tqdm import tqdm

import torch.nn as nn
import torch.optim as optim
# Load config
config = load_config(f"{ROOT_DIR_PATH}/config/vit_config.yaml")
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

### Data
print('loading training testing data')
# loading cifar100
cifar100_config = config["data"]['CIFAR100']
DATASET = cifar100_config["dataset"]
DATA_DIR = cifar100_config["data_path"]
BATCH = cifar100_config["batch_size"]
NUM_WORKERS = cifar100_config["num_workers"]
IMAGE = cifar100_config["img_size"]

# loading data
print(f'loading dataset : {DATASET}')
loader = DatasetLoader(dataset_name=DATASET,
                        data_dir=DATA_DIR,
                        batch_size=BATCH,
                        num_workers=NUM_WORKERS,
                        img_size=IMAGE)
train_loader, val_loader = loader.get_loaders()
print(f"Count of Train batches: {len(train_loader)}, Count of Validation batches: {len(val_loader)}")
print(f'Count of Train Images in all {len(train_loader)} batches, {len(train_loader.dataset)}')
print(f'Count of Validation Images in all {len(val_loader)} batches, {len(val_loader.dataset)}')
print('data sanity check')
for images, labels in train_loader:
    print('Training Data : Bacth 1')
    print(f'Image Tensor shape and Label Tensor shape : {images.shape}, {labels.shape}')
    print(f'There are {images.shape[0]} images in this Batch and total of {labels.shape[0]} Labels corresponding to those images.')
    break

### Model
modelConfig = config["model"]
vitSmall_config = modelConfig['VIT_SMALL']
MODEL_NAME = vitSmall_config["name"]
NUM_CLASSES = vitSmall_config["num_classes"]

# training config
trainingConfig = config['training']
LEARNING_RATE = trainingConfig['lr']
EPOCHS = trainingConfig['epochs']
WEIGHT_DECAY = trainingConfig['weight_decay']
# # mixup config
# mixupConig = trainingConfig['mixup']
# MIXUP_ALPHA = mixupConig["mixup_alpha"]
# CUTMIX_ALPHA = mixupConig["cutmix_alpha"]
# LABEL_SMOOTHENING = mixupConig["label_smoothing"]
# USE_MIXUP = mixupConig["enabled"]


Using device: cuda
loading training testing data
loading dataset : CIFAR100
Count of Train batches: 782, Count of Validation batches: 157
Count of Train Images in all 782 batches, 50000
Count of Validation Images in all 157 batches, 10000
data sanity check
Training Data : Bacth 1
Image Tensor shape and Label Tensor shape : torch.Size([64, 3, 32, 32]), torch.Size([64])
There are 64 images in this Batch and total of 64 Labels corresponding to those images.


In [None]:
def train_one_epoch(model, loader, criterion, optimizer, device, mixup_fn=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    progress_bar = tqdm(loader, desc="Training", leave=True)
    for  inputs, targets in progress_bar:
        #print(f'input shape : {inputs.shape}, taget_shape : {targets.shape}, target dim : {targets.ndim}')
        inputs, targets = inputs.to(device), targets.to(device)
        if mixup_fn is not None:
            inputs, targets = mixup_fn(inputs, targets)

        if targets.ndim == 2:
            targets = targets.type_as(inputs)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

        if targets.ndim == 2:
            # MixUp with soft labels
            _, predicted = outputs.max(1)
            _, true_classes = targets.max(1)  # Take argmax of soft labels as true class
            correct += predicted.eq(true_classes).sum().item()
            total += targets.size(0)
        else :
            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()
            total += targets.size(0)

        # Update progress bar with metrics
        if total > 0:
            avg_loss = running_loss / total
            accuracy = 100. * correct / total
            progress_bar.set_postfix({
                "Loss": f"{avg_loss:.4f}",
                "Acc": f"{accuracy:.2f}%"
            })

        else : raise Exception(f'Expected non-zero batch size, but got 0 targets. Check if the dataset is empty or DataLoader is misconfigured.')

    
    return avg_loss, accuracy

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(loader, desc="Validation", leave=True)
    with torch.no_grad():
        for inputs, labels in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # If labels are soft (e.g., using BCEWithLogitsLoss), convert to float
            if labels.ndim == 2:
                labels = labels.type_as(inputs)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            # Compute accuracy
            _, predicted = outputs.max(1)

            if labels.ndim == 2:
                # Soft labels → convert to class index
                _, true_classes = labels.max(1)
                correct += predicted.eq(true_classes).sum().item()

            else:
                # Hard labels
                correct += predicted.eq(labels).sum().item()


            total += labels.size(0)

            # Avoid division by zero on first step
            if total > 0:
                avg_loss = running_loss / total
                accuracy = 100. * correct / total

                progress_bar.set_postfix({
                    "Loss": f"{avg_loss:.4f}",
                    "Acc": f"{accuracy:.2f}%"
                })
                
    return avg_loss, accuracy