18th b Cnn Adam Adamw


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

# Define CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

# Train function
def train(model, optimizer, criterion, epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.train()
    
    start_time = time.time()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader):.4f}")
    
    print("Training Time: {:.2f} seconds".format(time.time() - start_time))

# Evaluate function
def evaluate(model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

# Training with Adam
model_adam = CNN()
optimizer_adam = optim.Adam(model_adam.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
print("Training with Adam...")
train(model_adam, optimizer_adam, criterion, epochs=10)
acc_adam = evaluate(model_adam)

# Training with AdamW
model_adamw = CNN()
optimizer_adamw = optim.AdamW(model_adamw.parameters(), lr=0.001)
print("Training with AdamW...")
train(model_adamw, optimizer_adamw, criterion, epochs=10)
acc_adamw = evaluate(model_adamw)

# Compare results
print(f"Adam Test Accuracy: {acc_adam:.2f}%")
print(f"AdamW Test Accuracy: {acc_adamw:.2f}%")


Training with Adam...
Epoch 1, Loss: 1.3165
Epoch 2, Loss: 0.9437
Epoch 3, Loss: 0.7624
Epoch 4, Loss: 0.6194
Epoch 5, Loss: 0.4856
Epoch 6, Loss: 0.3635
Epoch 7, Loss: 0.2543
Epoch 8, Loss: 0.1737
Epoch 9, Loss: 0.1179
Epoch 10, Loss: 0.0887
Training Time: 1379.08 seconds
Test Accuracy: 71.62%
Training with AdamW...
Epoch 1, Loss: 1.3238
Epoch 2, Loss: 0.9255
Epoch 3, Loss: 0.7516
Epoch 4, Loss: 0.6114
Epoch 5, Loss: 0.4766
Epoch 6, Loss: 0.3522
Epoch 7, Loss: 0.2505
Epoch 8, Loss: 0.1745
Epoch 9, Loss: 0.1214
Epoch 10, Loss: 0.0967
Training Time: 1006.32 seconds
Test Accuracy: 71.87%
Adam Test Accuracy: 71.62%
AdamW Test Accuracy: 71.87%
