# The workshop demo!

We will convert this file from being an ordinary PyTorch code that runs on the host cpu to being one that runs on the Graphcore accelerator, the 'ipu'.

For conversion, we shall follow the comments marked with "#DEMO". These are instructions to insert and delete, and some general chat. The actual lines to insert begin "#I.

However, before doing the conversion, we shall run it on the CPU!

#DEMO: This is an original - make a working copy!  
#DEMO: DELETE next line - we are headed for the IPU  
This runs on the CPU without using the IPU accelerator

The main code is taken from https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html and is covered by the licence in the correspondig Githib repository, reproduced the accomanying file here, PYTORCH_EXAMPLES_LIBRARY.

#DEMO: and "insert" this line, i.e. delete the #I characters to make the line official  
#I This file has been modified following the instructions at [Pytorch to Poptorch](https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/pytorch_to_poptorch.html?highlight=pytorch%20to%20poptorch) ... 

In [None]:
#DEMO: I have added some time taken recording 
import time
event_times = [("Start", time.time())]

In [None]:
#DEMO: Make sure that tke kernel selected is the one from the python environment you created
import torch
#DEMO: "poptorch" is Graphcore's verison of pytorch - it uses their "poplar" library.
#DEMO: INSERT this line:
#I import poptorch
import torchvision
import torchvision.transforms as transforms

#DEMO: this records the time of our first milestone
event_times.append(("Torch imports done", time.time()))

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4


trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
#DEMO: Poptorch has its own configration options - this one is for an optimisation
#DEMO: Insert these lines:
#I opts = poptorch.Options()
#I devIterations = 10
#I opts.deviceIterations(devIterations)  # ??? what does this do - we shall come back to it

#DEMO: Poptorch has its own data loader - ??? how is it different
#DEMO: Delete this line
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
#I trainloader = poptorch.DataLoader(options=opts, dataset=trainset, batch_size=batch_size,
#I                                           shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

#DEMO: again use Poptorch's dataloader
#DEMO: delete this line
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)
#I testloader = poptorch.DataLoader(options=opts, dataset=testset, batch_size=batch_size,
#I                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
event_times.append(("Data loaders created", time.time()))

In [None]:
#DEMO: This plots some of the downloaded images, and is unchanged.
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
event_times.append(("Training batch displayed", time.time()))

In [None]:

import torch.nn as nn
import torch.nn.functional as F

#DEMO: this is the original model
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x  # This model has no final max - see below - loss crtierion is CrossEntropy so inlcudes a SoftMax and prediction uses max

#DEMO: but we are going to wrap that model to tweak the output of the forward() function
#DEMO: Delete this line - the variable net will be set, as altered below, to the wrapped version  
net = Net()

event_times.append(("Base model declared", time.time()))

#DEMO: Insert this wrapper, and note the difference in output between training and inference stages
#I class PoptorchNet(nn.Module):
#I     def __init__(self):
#I         super().__init__()
#I         self.model = Net()
#I     
#I     def forward(self, input, target=None):  # No target when evaluating
#I         out = self.model(input)
#I         # https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/pytorch_to_poptorch.html?highlight=pytorch%20to%20poptorch#training
#I         # adds a target parameter
#I         # also compare https://docs.graphcore.ai/projects/tutorials/en/latest/pytorch/basics/README.html#build-the-model
#I         if self.training:
#I             return (torch.nn.functional.softmax(out),
#I                     torch.nn.CrossEntropyLoss(reduction="mean")(out, target))
#I         return out  # because prediction below uses a max on this output
#I     
#I net = PoptorchNet()

In [None]:
#DEMO: We use the original optimizer functions
import torch.optim as optim

#DEMO: Delete the next line - because it is now specified in the model wrapper
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

event_times.append(("Optimzer declared", time.time()))

#DEMO: Wrap it again - wht two steps? maybe partly inference does not need an optimizer
#DEMO: INSERT This line for the 2nd wrapping
#I poptorch_model = poptorch.trainingModel(net, options=opts, optimizer=optimizer)

In [None]:

for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        #DEMO: For what popTorch.trainingModel does for you in the training loop
        #DEMO: see https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/pytorch_to_poptorch.html?highlight=pytorch#the-training-loop
        # zero the parameter gradients
        #DEMO: delete next line it gets done in the __call__ to the model
        optimizer.zero_grad()

        # forward + backward + optimize
        #DEMO: a more complex call to the training model - so delete the next line
        outputs = net(inputs)
        #DEMO: and INSERT this line:
        #I outputs, loss = poptorch_model(inputs,labels)
        #DEMO: and DELETE the next three lines, because they are included in the training model __call__
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        #DEMO: as small addjustment here to deal with effect of devIternations optimization parameter
        #DEMO: DELETE next line
        if i % 2000 == 1999:    # print every 2000 mini-batches
        #DEMO: and replace with:
        #I if i % 2000 == 2000 - devIterations * batch_size:    # print every 2000 mini-batches      
            print(f'[{epoch + 1}, {(i + 1) * batch_size:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0
            event_times.append((f'Epoch {epoch + 1}, Image {(i + 1) * batch_size:5d} trained', time.time()))

print('Finished Training')
#DEMO: identify major part of training (in this case)
#DEM): DELETE the next line
event_times.append(("Training complete", time.time()))
#DEMO: and replace with:
#I event_times.append(("Training complete (inluding complilation)", time.time()))

In [None]:

# save the model 
#DEMO: save the model to a diffent file. So DELETE next line and insert the one after that
PATH = './cifar_net.pth'
#I PATH = './cifar_net_graphcore.pth'
torch.save(net.state_dict(), PATH)
event_times.append(("Model saved", time.time()))

In [None]:
dataiter = iter(testloader)
images, labels = next(dataiter)

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))
event_times.append(("Test batch displayed", time.time()))

In [None]:
# Load the saved model
#DEMO: allow for wrqpping, DELETE the next to lines
net = Net()
net.load_state_dict(torch.load(PATH))
#DEMO and replace with
#I poptorchNet = PoptorchNet()
#I inferenceNet = poptorch.inferenceModel(poptorchNet)  
#I inferenceNet.load_state_dict(torch.load(PATH))
#I inferenceNet.eval()
event_times.append(("Model loaded", time.time()))

In [None]:
#DEMO: more of the same, delete the next line
outputs = net(images)
#DEMO and replace with 
#I outputs = inferenceNet(images)

#DEMO describe the content of the period more accurately, so DELETE the next line
event_times.append(("Test images evaluated", time.time()))
#DEMO and replace
#I event_times.append(("Test images evaluated (including compilation)", time.time()))

In [None]:
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}'
                              for j in range(4)))
event_times.append(("Test image classes displayed", time.time()))

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        #DEMO more allowing for the wrapping and new variable name, delete the next line
        outputs = net(images)
        #DEMO and replace with
        #I outputs = inferenceNet(images)
        
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
event_times.append(("Test image class accuracy", time.time()))

In [None]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        #DEMO more allowing for the wrapping and new variable name, delete the next line
        outputs = net(images)
        #DEMO and replace with
        #I outputs = inferenceNet(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

event_times.append(("Test image accuracy by class", time.time()))

In [None]:
import json
event_times = [(label, event_time - event_times[0][1]) for label, event_time in event_times]

#DEMO save the IPU results in theor own file, so DELETE the next line
with open("times_cpu.json", "w") as f:
#DEMO and replace with:
#I with open("times_ipu_deviter{}.json".format(devIterations), "w") as f:
    json.dump(event_times, f)

import matplotlib.pyplot as plt
labels, times = list(zip(*event_times))
x = times
y = list(range(len(labels)))

plt.plot(x, y)
#DEMO update title - it's now th IPU, so DELETE the next line
plt.title("Event progression (snake) - CPU")
#DEMO and replace with:
#I plt.title("Event progression (snake) - IPU")
plt.xlabel('Seconds')
plt.ylabel('Event progression')
plt.yticks(ticks=y, labels=labels)
plt.show()

#DEMO: More detailed information on converting existing codes
#DEMO: https://docs.graphcore.ai/projects/differences-ipu-gpu/en/latest/index.html