In [None]:
import os
import numpy as np
import glob
import PIL.Image as Image
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# import pandas as pd
# import seaborn as sn

We always check that we are running on a GPU

In [None]:
if torch.cuda.is_available():
    print("The code will run on GPU.")
else:
    print("The code will run on CPU. Go to Edit->Notebook Settings and choose GPU as the hardware accelerator")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

We provide you with a class that can load the *hotdog/not hotdog* dataset you should use from /dtu/datasets1/02516/

In [None]:
class Hotdog_NotHotdog(torch.utils.data.Dataset):
    def __init__(self, train, transform, data_path='/zhome/21/8/156030/02516/hotdog_nothotdog'):
        'Initialization'
        self.transform = transform
        data_path = os.path.join(data_path, 'train' if train else 'test')
        image_classes = [os.path.split(d)[1] for d in glob.glob(data_path +'/*') if os.path.isdir(d)]
        image_classes.sort()
        self.name_to_label = {c: id for id, c in enumerate(image_classes)}
        self.image_paths = glob.glob(data_path + '/*/*.jpg')
        
    def __len__(self):
        'Returns the total number of samples'
        return len(self.image_paths)

    def __getitem__(self, idx):
        'Generates one sample of data'
        image_path = self.image_paths[idx]
        
        image = Image.open(image_path)
        c = os.path.split(os.path.split(image_path)[0])[1]
        y = self.name_to_label[c]
        X = self.transform(image)
        return X, y

Below is the simple way of converting the images to something that can be fed through a network.
Feel free to use something other than $128\times128$ images.

In [None]:
# Rotating, scaling, flipping, noise, color, crop
size = 128
# crop_size = (80, 80)
degree = 20
color_jit = [0.2, 0.15, 0.1, 0.15]
affine = [20, (0.2,0.3), None, 30]
gaus_kernel = (5, 5)
train_transform = transforms.Compose([transforms.RandomRotation(degrees=degree),
                                    # transforms.RandomAffine(*affine),
                                    # transforms.RandomCrop(crop_size),
                                    transforms.GaussianBlur(gaus_kernel, sigma=(0.01, 2.0)),
                                    transforms.RandomHorizontalFlip(p=0.3),
                                    transforms.RandomVerticalFlip(p=0.3),
                                    transforms.ColorJitter(*color_jit),  
                                    transforms.Resize((size, size)), 
                                    transforms.ToTensor()])

test_transform = transforms.Compose([transforms.RandomRotation(degrees=degree),
                                    # transforms.RandomAffine(*affine),
                                    # transforms.RandomCrop(crop_size),
                                    transforms.GaussianBlur(gaus_kernel, sigma=(0.01, 2.0)),
                                    transforms.RandomHorizontalFlip(p=0.3),
                                    transforms.RandomVerticalFlip(p=0.3),
                                    transforms.ColorJitter(*color_jit),  
                                    transforms.Resize((size, size)), 
                                    transforms.ToTensor()])
# size = 128
# train_transform = transforms.Compose([transforms.Resize((size, size)), 
#                                     transforms.ToTensor()])
# test_transform = transforms.Compose([transforms.Resize((size, size)), 
#                                     transforms.ToTensor()])
batch_size = 64
trainset = Hotdog_NotHotdog(train=True, transform=train_transform)
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=3)
testset = Hotdog_NotHotdog(train=False, transform=test_transform)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=3)

Let's look at some images from our data 

In [None]:
images, labels = next(iter(train_loader))
plt.figure(figsize=(20,20))

for i in range(len(images)):
    plt.subplot(int(len(images)/7) + 1,7,i+1)
    plt.imshow(np.swapaxes(np.swapaxes(images[i].numpy(), 0, 2), 0, 1))
    plt.title(['hotdog', 'not hotdog'][labels[i].item()])
    plt.axis('off')


Remember to save the state of your model AND optimizer regularly in case the Colab runtime times out.
You can save your model to your google drive, so you can get it from there in a new colab session. 

If you only save it in the colab notebook, there's no way to get it into a new session.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Now create a model and train it!


In [None]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Sequential(
                nn.Conv2d(3, 8, 3, 1, padding=1),
                nn.BatchNorm2d(8),
                nn.ReLU(),
                nn.Conv2d(8, 8, 3, 1, padding=1),
                nn.BatchNorm2d(8),
                nn.ReLU(),
                nn.MaxPool2d(2),
                nn.Conv2d(8, 16, 3, 1, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.Conv2d(16, 16, 3, 1, padding=1),
                nn.ReLU()
        )

        self.fc1 = nn.Sequential(
                nn.Linear(65536, 500),
                nn.ReLU(),
                nn.Linear(500, 1),
                nn.Sigmoid())
    def forward(self, x):
        x = self.conv1(x)
        #reshape x so it becomes flat, except for the first dimension (which is the minibatch)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return torch.squeeze(x)

In [None]:
model = Network()
model.to(device)
#Initialize the optimizer
optimizer_SGD = torch.optim.SGD(model.parameters(), lr=0.1)
optimizer_Adam = torch.optim.Adam(model.parameters())
criterion = nn.BCELoss()

In [None]:
#We define the training as a function so we can easily re-use it.
def train(model, optimizer, num_epochs=10):
    def loss_fun(output, target):
        return criterion(output, target)
        # return F.nll_loss(torch.log(output), target)
    out_dict = {'train_acc': [],
              'test_acc': [],
              'train_loss': [],
              'test_loss': []}
  
    for epoch in tqdm(range(num_epochs), unit='epoch'):
        model.train()
        #For each epoch
        train_correct = 0
        train_loss = []
        for minibatch_no, (data, target) in tqdm(enumerate(train_loader), total=len(train_loader)):
            data, target = data.to(device), target.to(device)
            #Zero the gradients computed for each weight
            optimizer.zero_grad()
            #Forward pass your image through the network
            output = model(data)
            target = target.float()
            #Compute the loss
            # print(output.shape)
            # print(target.shape)
            loss = loss_fun(output, target)
            #Backward pass through the network
            loss.backward()
            #Update the weights
            optimizer.step()

            train_loss.append(loss.item())
            #Compute how many were correctly classified
            predicted = output.round() == target
            train_correct += (target==predicted).sum().cpu().item()
        #Comput the test accuracy
        test_loss = []
        test_correct = 0
        model.eval()
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            target = target.float()
            with torch.no_grad():
                output = model(data)
            test_loss.append(loss_fun(output, target).cpu().item())
            predicted = output.round() == target
            test_correct += (target==predicted).sum().cpu().item()
        out_dict['train_acc'].append(train_correct/len(trainset))
        out_dict['test_acc'].append(test_correct/len(testset))
        out_dict['train_loss'].append(np.mean(train_loss))
        out_dict['test_loss'].append(np.mean(test_loss))
        print(f"Loss train: {np.mean(train_loss):.3f}\t test: {np.mean(test_loss):.3f}\t",
              f"Accuracy train: {out_dict['train_acc'][-1]*100:.1f}%\t test: {out_dict['test_acc'][-1]*100:.1f}%")
    return out_dict

In [None]:
num_epochs = 10
out_dict_SGD = train(model, optimizer_SGD, num_epochs=num_epochs)
out_dict_adam = train(model, optimizer_Adam, num_epochs=num_epochs)

In [None]:
fig, axs = plt.subplots(2,2)
epochs = np.arange(num_epochs)
fig.suptitle('Training results')

axs[0, 0].plot(epochs, out_dict_SGD['train_acc'], label="Train")
axs[0, 0].plot(epochs, out_dict_SGD['test_acc'], label="Test")
axs[0, 0].set_title("SGD")
axs[0, 0].set(xlabel="Epochs", ylabel='Accuracy')

axs[1, 0].plot(epochs, out_dict_SGD['train_loss'], label="Train")
axs[1, 0].plot(epochs, out_dict_SGD['test_loss'], label="Test")
axs[1, 0].set(xlabel="Epochs", ylabel='Loss')

axs[0, 1].plot(epochs, out_dict_adam['train_acc'], label="Train")
axs[0, 1].plot(epochs, out_dict_adam['test_acc'], label="Test")
axs[0, 1].set_title("Adam")
axs[0, 1].set(xlabel="Epochs", ylabel='Accuracy')

axs[1, 1].plot(epochs, out_dict_adam['train_loss'], label="Train")
axs[1, 1].plot(epochs, out_dict_adam['test_loss'], label="Test")
axs[1, 1].set(xlabel="Epochs", ylabel='Loss')

for ax in axs.flat:
    ax.set_xlim(0, num_epochs)
    ax.set_ylim(0, 1)
    ax.label_outer()
fig.legend(["Train", "Test"])

In [None]:
# Confusion matrix
CM = np.array([[0,0],[0,0]])

for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    target = target.float()
    with torch.no_grad():
        output = model(data)
    # test_loss.append(loss_fun(output, target).cpu().item())
    predicted = output.round() == target
    for p, t in zip(predicted, target):
        p = int(p.cpu().item())
        t = int(t.cpu().item())
        CM[p, t] += 1
    # test_correct += (target==predicted).sum().cpu().item()
print(CM)

In [None]:
plt.matshow(CM, cmap="binary")

In [None]:
images, labels = next(iter(test_loader))
data, target = images.to(device), labels.to(device)

# target = target.float()
with torch.no_grad():
    output = model(data)
# test_loss.append(loss_fun(output, target).cpu().item())
predicted = output.round() == target

In [None]:
print(predicted)
print(labels)

In [None]:
images, labels = next(iter(test_loader))
data, target = images.to(device), labels.to(device)

# target = target.float()
with torch.no_grad():
    output = model(data)
# test_loss.append(loss_fun(output, target).cpu().item())
predicted = output.round() == target

plt.figure(figsize=(20,20))

for i in range(len(images)):
    plt.subplot(int(len(images)/7) + 1,7,i+1)
    plt.imshow(np.swapaxes(np.swapaxes(images[i].numpy(), 0, 2), 0, 1))
    if predicted[i].item() == labels[i].item():
        plt.title(['hotdog', 'not hotdog'][predicted[i].item()], dict(color="green"))
    else:
        plt.title(['hotdog', 'not hotdog'][predicted[i].item()], dict(color="red"))
    plt.axis('off')

In [None]:
data, target = data.to(device), target.to(device)
target = target.float()
with torch.no_grad():
    output = model(data)
test_loss.append(loss_fun(output, target).cpu().item())
predicted = output.round() == target
test_correct += (target==predicted).sum().cpu().item()