In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import timm

In [2]:
mean, std = (0.2861,), (0.3530,)

transform = transforms.Compose([
    transforms.Resize(224),           
    transforms.Grayscale(num_output_channels=3), 
    transforms.ToTensor(),
    transforms.Normalize(mean * 3, std * 3)      
])

train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

100%|██████████| 26.4M/26.4M [00:02<00:00, 12.8MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 203kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.78MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 9.72MB/s]


In [3]:
# 1. Load pre-trained model
model = timm.create_model('convnext_tiny', pretrained=True)

# 2. Modify the classification head
# ConvNeXt-Tiny has a head with 768 input features
n_features = model.head.fc.in_features
model.head.fc = nn.Linear(n_features, 10) # 10 classes for Fashion MNIST

# 3. Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)



model.safetensors:   0%|          | 0.00/114M [00:00<?, ?B/s]

In [4]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.05)

In [7]:
import torch
from tqdm import tqdm

def train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, device, epochs=2):
    for epoch in range(epochs):
       
        model.train()
        train_loss = 0
        correct_train = 0
        total_train = 0
        
        train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]", leave=False)
        
        for images, labels in train_loop:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total_train += labels.size(0)
            correct_train += predicted.eq(labels).sum().item()
            
            train_loop.set_postfix(loss=train_loss/len(train_loader), acc=100.*correct_train/total_train)

        model.eval()
        test_loss = 0
        correct_test = 0
        total_test = 0
        
        test_loop = tqdm(test_loader, desc=f"Epoch {epoch+1}/{epochs} [Test]", leave=True)
        
        with torch.no_grad():
            for images, labels in test_loop:
                images, labels = images.to(device), labels.to(device)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total_test += labels.size(0)
                correct_test += predicted.eq(labels).sum().item()
                
                test_loop.set_postfix(loss=test_loss/len(test_loader), acc=100.*correct_test/total_test)
        
        print(f"\nSummary Epoch {epoch+1}: Train Acc: {100.*correct_train/total_train:.2f}% | Test Acc: {100.*correct_test/total_test:.2f}%\n")

train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, device)

Epoch 1/2 [Test]: 100%|██████████| 313/313 [00:54<00:00,  5.77it/s, acc=92.8, loss=0.201]   



Summary Epoch 1: Train Acc: 92.62% | Test Acc: 92.82%



Epoch 2/2 [Test]: 100%|██████████| 313/313 [00:52<00:00,  5.98it/s, acc=93.5, loss=0.177]    


Summary Epoch 2: Train Acc: 94.22% | Test Acc: 93.54%




