In [1]:
import torch
import torchvision.models as models
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter


In [2]:
# class CustomDataset(torch.utils.data.Dataset):
#     def __init__(self):
#         pass
    
#     def __getitem__(self, idx):
#         pass
    
#     def __len__(self):
#         pass

In [3]:
# train_dataset = CustomDataset()
# val_dataset = CustomDataset()

# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [4]:
# Define the transformations to apply to the CIFAR-10 data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the image tensors
])

# Define the training and test datasets
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Define the dataloaders to load the data in batches during training and testing
batch_size = 64

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [5]:
print(f"Number of training batches: {len(train_loader)}")
print(f"Number of testing batches: {len(test_loader)}")

Number of training batches: 782
Number of testing batches: 157


In [6]:
# the network to test
model = models.resnet18(pretrained=False)
num_classes = 10

learning_rate = 1e-3

# Modify the last fully connected layer
fc_input = model.fc.in_features
model.fc = nn.Linear(fc_input, num_classes)

# print(model)

# Step 5: Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
num_params = sum(torch.numel(p) for p in model.parameters())
print(f"Number of parameters: {num_params}")

Number of parameters: 11181642


In [8]:
log_dir = "./logs/scratch"  # Set the directory for storing the logs
writer = SummaryWriter(log_dir)

2023-05-29 21:10:24.709537: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
def model_eval(model, dataloader):
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in dataloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

In [10]:
num_epochs = 15
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

log_train_every = 150
log_test_every = 250

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

        if (i+1) % 100 == 0:
            print(f"[Epoch: {epoch + 1}, Batch: {i + 1}] Loss: {running_loss/100:.3f}")
            running_loss = 0.0
        
        # compute training & testing accuracy every couple of iterations        
        if (i+1) % log_train_every == 0:
            train_accuracy = model_eval(model, train_loader)
            
            # Log the loss
            writer.add_scalar('Loss/train', loss.cpu().item(), epoch * len(train_loader) + i)
        
            # Log the training accuracy
            writer.add_scalar('Accuracy/train', train_accuracy, epoch * len(train_loader) + i)
        
        if (i+1) % log_test_every == 0:
            test_accuracy = model_eval(model, test_loader)
            
            # Log the test accuracy
            writer.add_scalar('Accuracy/test', test_accuracy, epoch * len(train_loader) + i)
        
writer.close()
print("Training finished.")

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[Epoch: 1, Batch: 100] Loss: 1.812
[Epoch: 1, Batch: 200] Loss: 1.634
[Epoch: 1, Batch: 300] Loss: 1.530
[Epoch: 1, Batch: 400] Loss: 1.411
[Epoch: 1, Batch: 500] Loss: 1.338
[Epoch: 1, Batch: 600] Loss: 1.275
[Epoch: 1, Batch: 700] Loss: 1.268
[Epoch: 2, Batch: 100] Loss: 1.168
[Epoch: 2, Batch: 200] Loss: 1.136
[Epoch: 2, Batch: 300] Loss: 1.096
[Epoch: 2, Batch: 400] Loss: 1.095
[Epoch: 2, Batch: 500] Loss: 1.048
[Epoch: 2, Batch: 600] Loss: 1.050
[Epoch: 2, Batch: 700] Loss: 1.015
[Epoch: 3, Batch: 100] Loss: 0.911
[Epoch: 3, Batch: 200] Loss: 0.897
[Epoch: 3, Batch: 300] Loss: 0.900
[Epoch: 3, Batch: 400] Loss: 0.903
[Epoch: 3, Batch: 500] Loss: 0.883
[Epoch: 3, Batch: 600] Loss: 0.863
[Epoch: 3, Batch: 700] Loss: 0.871
[Epoch: 4, Batch: 100] Loss: 0.704
[Epoch: 4, Batch: 200] Loss: 0.761
[Epoch: 4, Batch: 300] Loss: 0.755
[Epoch: 4, Batch: 400] Loss: 0.736
[Epoch: 4, Batch: 500] Loss: 0.755
[Epoch: 4, Batch: 600] Loss: 0.731
[Epoch: 4, Batch: 700] Loss: 0.786
[Epoch: 5, Batch: 10