### Imports

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from transformers import AutoImageProcessor, SwinForImageClassification
from sklearn.metrics import accuracy_score

In [None]:
import torch
torch.cuda.is_available(), torch.cuda.get_device_name(0)

### Hyperparameters

In [None]:
batch_size = 16
learning_rate = 1e-4
num_epochs = 200
save_interval = 10
num_classes = 4

In [None]:
import wandb

sweep_config = {
    'method': 'random', 
    'metric': {'name': 'val_loss', 'goal': 'minimize'},
    'parameters': {
        'batch_size': {'values': [16, 32, 64]},
        'learning_rate': {'values': [1e-5, 1e-4, 5e-4]}, 
        'num_epochs': {'value': num_epochs},  
        'num_classes': {'value': num_classes}, 
    }
}

sweep_id = wandb.sweep(sweep_config, project="swinv2-bc-tuning")

### Data Preprocessing

In [None]:
image_processor = AutoImageProcessor.from_pretrained("microsoft/swinv2-base-patch4-window8-256")
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
])

### Load Datasets

In [None]:
train_dataset = datasets.ImageFolder("dataset/train", transform=transform)
val_dataset = datasets.ImageFolder("dataset/val", transform=transform)
test_dataset = datasets.ImageFolder("dataset/test", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

### Load Pre-trained Model

In [None]:
model = SwinForImageClassification.from_pretrained("microsoft/swinv2-base-patch4-window8-256", num_labels=num_classes, ignore_mismatched_sizes=True)
model.to("cuda" if torch.cuda.is_available() else "cpu")

### Loss and Optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=sweep_config.learning_rate)

### Training Function

In [2]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images).logits
        print(f"Image batch shape: {images.shape}")
        print(f"Model output shape: {outputs.shape}")
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    avg_loss = total_loss / len(train_loader)
    wandb.log({"train_loss": avg_loss})
    return total_loss / len(train_loader)

### Validation Function

In [None]:
def validate(model, val_loader, criterion):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images).logits
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    avg_loss = total_loss / len(val_loader)
    wandb.log({"val_loss": avg_loss, "val_accuracy": accuracy})
    return total_loss / len(val_loader), accuracy

### Training and Validation Loop

In [None]:
import os

os.makedirs('models', exist_ok=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer)
    val_loss, val_accuracy = validate(model, val_loader, criterion)
    # Removed validation step
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}")
    wandb.log({'epoch': epoch+1})
    if (epoch + 1) % 10 == 0:
        torch.save(model.state_dict(), f'models-base/model_lr{config.learning_rate}_bs{config.batch_size}.pth')
        print(f"Model saved at {model_path}")


Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: torch.Size([16, 3, 256, 256])
Model output shape: torch.Size([16, 4])
Image batch shape: to

### Evaluate on Test Set

In [1]:
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images).logits
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

test_accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {test_accuracy:.4f}")

NameError: name 'model' is not defined