In [4]:
from pathlib import Path
import torch
import torch.optim as optim
import torch.nn as nn
from src.model import CustomResNet
from torchvision.models import resnet50

In [2]:
torch.cuda.is_available()

True

In [1]:
import time
import functools

def time_it(func):
    @functools.wraps(func)
    def wrapper_time_it(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Function '{func.__name__}' executed in {end_time - start_time:.4f} seconds")
        return result
    return wrapper_time_it

In [4]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm

# Dummy dataset
data = torch.randn(10000, 3, 224, 224)  # 10000 images, 3 channels, 224x224 size
labels = torch.randint(0, 2, (10000,))  # Binary labels

dataset = TensorDataset(data, labels)
batch_size = 32

def calc_accuracy(correct, total):
    return 100 * correct / total

def calc_conf_per_class(class_label, preds, labels):
    tp = ((preds == class_label) & (labels == class_label)).sum().item()
    fp = ((preds == class_label) & (labels != class_label)).sum().item()
    fn = ((preds != class_label) & (labels == class_label)).sum().item()
    return tp, fp, fn

def calc_f1(tp, fp, fn):
    precision = tp / (tp + fp + 1e-10)
    recall = tp / (tp + fn + 1e-10)
    return 2 * (precision * recall) / (precision + recall + 1e-10)

@time_it
def train_loop(model, train_loader, criterion, optimizer, num_classes, device, accumulation_steps=4):
    model.train()
    scaler = GradScaler()

    train_running_loss = 0
    train_total_samples = 0
    train_correct = 0

    tp = np.zeros(num_classes)
    fp = np.zeros(num_classes)
    fn = np.zeros(num_classes)

    for batch_idx, (inputs, labels) in enumerate(tqdm(train_loader, total=len(train_loader))):
        inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

        optimizer.zero_grad()

        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()

        if (batch_idx + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            torch.cuda.empty_cache()

        _, predicted = torch.max(outputs, 1)
        train_running_loss += loss.item()
        train_total_samples += labels.size(0)
        train_correct += (predicted == labels).sum().item()

        for class_label in range(num_classes):
            true_pos, false_pos, false_neg = calc_conf_per_class(class_label, predicted, labels)
            tp[class_label] += true_pos
            fp[class_label] += false_pos
            fn[class_label] += false_neg

    loss = train_running_loss / len(train_loader)
    accuracy = calc_accuracy(train_correct, train_total_samples)
    f1 = calc_f1(tp, fp, fn)
    avg_f1 = f1.mean()

    return avg_f1.item(), loss, accuracy

# Initialize model, criterion, optimizer
model = torch.nn.Sequential(
    torch.nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=2),
    torch.nn.Flatten(),
    torch.nn.Linear(16 * 112 * 112, 2)
)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Measure performance without pin_memory
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=False)
print("Training without pin_memory:")
train_loop(model, train_loader, criterion, optimizer, num_classes=2, device=device)

# Measure performance with pin_memory
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
print("Training with pin_memory:")
train_loop(model, train_loader, criterion, optimizer, num_classes=2, device=device)


Training without pin_memory:


100%|██████████| 313/313 [00:03<00:00, 98.64it/s] 


Function 'train_loop' executed in 3.1836 seconds
Training with pin_memory:


100%|██████████| 313/313 [00:02<00:00, 112.48it/s]

Function 'train_loop' executed in 2.7848 seconds





(0.5481895902381606, 0.6857553305336461, 54.82)