In [1]:
# Comet imports for logging
from comet_ml import Experiment

# All the necessary torch imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable

# Other imports
import json
import math

In [2]:
# Setup the Comet Experiment
experiment = Experiment(api_key="tHDbEydFQGW7F1MWmIKlEvrly", project_name="example-cnn-logging", workspace="aguerra")

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/aguerra/example-cnn-logging/d7a26ceb53ce4fc8a3376b8df62ad86b



In [3]:
# Set the device so that we can easily utilize the GPU power
device = 'cuda' if torch.cuda.is_available() else 'cpu'    #CPU or GPU

In [4]:
# Here is our data pre-processing pipeline, made easy with Pytorch transforms
transform_train = transforms.Compose([
    # The below two transforms are a form of data augmentation
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# How many images we will take in every iteration, known as the batch_size
batch_size = 128

# Load the training dataset, and put it into a dataloader so it can be managed efficiently
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

# Load the test dataset, and put it into a dataloader so it can be managed efficiently
valset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=2)

# These are the potential labels we can assign our images
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Declaring all the layers of the network
        self.conv1 = nn.Conv2d(3, 32, 5, padding=0)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 32, 5, padding=0)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.bn2 = nn.BatchNorm2d(32)
        
        self.drop1 = nn.Dropout(p=0.3)
        self.fc1 = nn.Linear(25 * 32, 128)
        
        self.drop2 = nn.Dropout(p=0.3)
        self.fc2 = nn.Linear(128, 10)
        
    # This is the function of the nn.Module class which tells the model what to do with an input
    def forward(self, x):
        # Start by sending it through the convolutional layer
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.bn1(x)
        # Now introduce the nonlinearity
        x = F.relu(x)
        
        # Repeat for second convolutional layer
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.bn2(x)
        x = F.relu(x)
    
        # Now we flatten our input so that we can use fully connnected layers
        x = x.view(-1, 25 * 32)
        
        # Now use dropout with fully connected layers
        x = self.drop1(x)
        x = self.fc1(x)
        x = F.relu(x)
        
        x = self.drop2(x)
        x = self.fc2(x)
    
        # Return the log_softmax of the output to get the probabilities for each class
        return F.log_softmax(x, dim=1)

In [6]:
# Create a randomly initialized model
model = Net().to(device)

# Use negative log-likelihood loss when using a log-softmax for the final activation
criterion = nn.NLLLoss() 

# Instantiate the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.01) 

In [None]:
num_epochs = 25

for epoch in range(num_epochs):
    total = 0
    correct = 0
    avg_train_loss = 0
    # For every batch of the training data
    for i, (images, labels) in enumerate(trainloader):

        images = images.to(device) #Transfer images to GPU
        labels = labels.to(device) #Transfer labels to GPU
        
        # Extra step so pytorch understands what kind of tensors the images/labels are
        images = Variable(images)
        labels = Variable(labels)
        
        # Run the forward pass
        outputs = model(images)
        
        # Compute the loss
        loss = criterion(outputs, labels)

        # Zero out the old gradient
        optimizer.zero_grad()
        # Compute the new gradient based on loss
        loss.backward()
        # Update the weights
        optimizer.step()

         # Track the accuracy
        total += labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()
        
        # Add to the total loss for the epoch, normalize by batch size
        avg_train_loss += loss.item() / batch_size
    
    # After one training epoch, compute the validation statistics
    correct_val = 0
    total_val = 0
    avg_val_loss = 0
    for images_val, labels_val in valloader:
        images_val = images_val.to(device)
        labels_val = labels_val.to(device)

        images_val = Variable(images_val)
        labels_val = Variable(labels_val)

        outputs_val = model(images_val)
        _, predicted_val = torch.max(outputs_val.data, 1)
        
        avg_val_loss += criterion(outputs_val.detach(), labels_val).item() / batch_size
        
        total_val += labels_val.size(0)
        correct_val += (predicted_val == labels_val).sum().item()
    
    
    # Now, we log the training statistics to Comet
    experiment.log_metric("Train Loss", avg_train_loss, step=epoch)
    experiment.log_metric("Train Accuracy", (correct / total) * 100, step=epoch)
    experiment.log_metric("Validation Loss", avg_val_loss, step=epoch)
    experiment.log_metric("Validation Accuracy", (correct_val / total_val) * 100, step=epoch)
    
    # We can also save the model after every epoch, useful for early stopping!
    tmp_file = torch.save(model.state_dict(), "tmp/temp.model")
    experiment.log_asset("tmp/temp.model", file_name="model_" + str(epoch) + ".model")
    
    # Can also print to the console 
    print('Epoch [{}/{}], Validation Loss: {:.4f}, Validation Accuracy: {:.2f}%'
                  .format(epoch + 1, num_epochs, avg_val_loss,
                          (correct_val / total_val) * 100))

Epoch [1/25], Validation Loss: 0.9385, Validation Accuracy: 45.24%
Epoch [2/25], Validation Loss: 0.8181, Validation Accuracy: 53.31%
Epoch [3/25], Validation Loss: 0.7994, Validation Accuracy: 52.92%


In [None]:
# end the experiment
experiment.end()