In [1]:
!pip install torchviz
from torch.utils.data.sampler import SubsetRandomSampler
from torch.autograd import Variable
from torchviz import make_dot
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

import os
import matplotlib.pyplot as plt
import numpy as np # we always love numpy
import time



In [None]:
import numpy as np
import pandas as pd
import glob
waldo_images = glob.glob("/content/drive/My Drive/Colab Notebooks/wheres-waldo/Hey-Waldo/64/waldo/*.jpg")
not_waldo_images = glob.glob("/content/drive/My Drive/Colab Notebooks/wheres-waldo/Hey-Waldo/64/notwaldo/*.jpg")

labels = []
for image in waldo_images:
  labels.append((image, 1))
for image in not_waldo_images:
  labels.append([image, 0])
a = np.asarray(labels)
pd.DataFrame(a).to_csv("/content/drive/My Drive/Colab Notebooks/labels.csv")


In [None]:
import matplotlib.image as mpimg

csv_frame = pd.read_csv("/content/drive/My Drive/Colab Notebooks/labels.csv")
n = 65
image_name = csv_frame.iloc[n, 1]
image_label = csv_frame.iloc[n, 2]

print(image_name)
print(image_label)

def show_label(image, label):
  plt.imshow(image)
  plt.title(label)

plt.figure()
show_label(mpimg.imread(image_name), image_label)

In [None]:
from skimage import io, transform

class WaldoDataset(Dataset):
  def __init__(self, csv_file, root_dir, transform=None):
    self.annotations_frame = pd.read_csv(csv_file)
    self.root_dir = root_dir
    self.transform = transform

  def __len__(self):
    return len(self.annotations_frame)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
        idx = idx.tolist()
    img_name = os.path.join(self.root_dir,self.annotations_frame.iloc[idx, 1])
    image = io.imread(img_name)
    label = self.annotations_frame.iloc[idx, 2]
    label = label.astype('long')
    #annotations = self.annotations_frame.iloc[idx, 2]
    #annotations = np.array([annotations])
    #annotations = annotations.astype('float').reshape(-1, 2)
    sample = {'image': image, 'label': label}

    if self.transform:
      sample = self.transform(sample)

    return sample

class ToTensor(object):
  """Convert ndarrays in sample to Tensors."""
  def __call__(self, sample):
    image, label = sample['image'], sample['label']
    image = image.transpose((2, 0, 1))
    return {'image': torch.from_numpy(image),
            'label': torch.tensor(label)}


In [None]:
waldo_dataset = WaldoDataset(csv_file="/content/drive/My Drive/Colab Notebooks/labels.csv", root_dir="content/drive/My Drive/Colab Notebooks/wheres-waldo/Hey-Waldo/64/", transform=ToTensor())
fig = plt.figure()

print(waldo_dataset)

In [None]:
image_dims = [3, 64, 64]
classes = ('notwaldo', 'waldo')

class MyCNN(nn.Module):
    # The init funciton in Pytorch classes is used to keep track of the parameters of the model
    # specifically the ones we want to update with gradient descent + backprop
    # So we need to make sure we keep track of all of them here
    def __init__(self):
        super(MyCNN, self).__init__()
        # layers defined here

        # Make sure you understand what this convolutional layer is doing.
        # E.g., considering looking at help(nn.Conv2D).  Draw a picture of what
        # this layer does to the data.

        # note: image_dims[0] will be 3 as there are 3 color channels (R, G, B)
        num_kernels = 16
        self.conv1 = nn.Conv2d(image_dims[0], num_kernels, kernel_size=5, stride=1, padding=2)

        # Make sure you understand what this MaxPool2D layer is doing.
        # E.g., considering looking at help(nn.MaxPool2D).  Draw a picture of
        # what this layer does to the data.

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # maxpool_output_size is the total amount of data coming out of that
        # layer.  We have an exercise that asks you to explain why the line of
        # code below computes this quantity.
        self.maxpool_output_size = int(num_kernels * (image_dims[1] / 2) * (image_dims[2] / 2))

        # Add on a fully connected layer (like in our MLP)
        # fc stands for fully connected
        fc1_size = 64
        self.fc1 = nn.Linear(self.maxpool_output_size, fc1_size)

        # we'll use this activation function internally in the network
        self.activation_func = torch.nn.ReLU()

        # Convert our fully connected layer into outputs that we can compare to the result
        fc2_size = len(classes)
        self.fc2 = nn.Linear(fc1_size, fc2_size)

        # Note: that the output will not represent the probability of the
        # output being in each class.  The loss function we will use
        # `CrossEntropyLoss` will take care of convering these values to
        # probabilities and then computing the log loss with respect to the
        # true label.  We could break this out into multiple steps, but it turns
        # out that the algorithm will be more numerically stable if we do it in
        # one go.  We have included a cell to show you the documentation for
        # `CrossEntropyLoss` if you'd like to check it out.
        
    # The forward function in the class defines the operations performed on a given input to the model
    # and returns the output of the model
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(x)
        x = self.activation_func(x)
        # this code flattens the output of the convolution, max pool,
        # activation sequence of steps into a vector
        x = x.view(-1, self.maxpool_output_size)
        x = self.fc1(x)
        x = self.activation_func(x)
        x = self.fc2(x)
        return x

    # The loss function (which we chose to include as a method of the class, but doesn't need to be)
    # returns the loss and optimizer used by the model
    def get_loss(self, learning_rate):
      # Loss function
      loss = nn.CrossEntropyLoss()
      # Optimizer, self.parameters() returns all the Pytorch operations that are attributes of the class
      optimizer = optim.Adam(self.parameters(), lr=learning_rate)
      return loss, optimizer

# Define what device we want to use
device = 'cuda' # 'cpu' if we want to not use the gpu
# Initialize the model, loss, and optimization function
net = MyCNN()
# This tells our model to send all of the tensors and operations to the GPU (or keep them at the CPU if we're not using GPU)
net.to(device)

In [None]:
"""print(type(waldo_dataset))
reduced_waldo_dataset = []
for i in range(100):
  reduced_waldo_dataset.append(waldo_dataset[i])"""


"""reduced_train_size = int(0.75 * len(reduced_waldo_dataset))
reduced_test_size = len(reduced_waldo_dataset) - reduced_train_size
reduced_waldo_dataset_train, reduced_waldo_dataset_test = torch.utils.data.random_split(reduced_waldo_dataset, [reduced_train_size, reduced_test_size])
"""


train_size = int(0.05 * len(waldo_dataset))
test_size = len(waldo_dataset) - train_size
waldo_dataset_train, waldo_dataset_test = torch.utils.data.random_split(waldo_dataset, [train_size, test_size])

train_loader = DataLoader(waldo_dataset_train, batch_size=4, num_workers=2)
test_loader = DataLoader(waldo_dataset_test, batch_size=4)

learning_rate = 1e-2
n_epochs = 2

def train_model(net):
    """ Train a the specified network.

        Outputs a tuple with the following four elements
        train_hist_x: the x-values (batch number) that the training set was 
            evaluated on.
        train_loss_hist: the loss values for the training set corresponding to
            the batch numbers returned in train_hist_x
        test_hist_x: the x-values (batch number) that the test set was 
            evaluated on.
        test_loss_hist: the loss values for the test set corresponding to
            the batch numbers returned in test_hist_x
    """ 
    loss, optimizer = net.get_loss(learning_rate)
    # Define some parameters to keep track of metrics
    print_every = 50
    idx = 0
    train_hist_x = []
    train_loss_hist = []
    test_hist_x = []
    test_loss_hist = []

    training_start_time = time.time()
    # Loop for n_epochs
    for epoch in range(n_epochs):
        running_loss = 0.0
        start_time = time.time()

        for i, data in enumerate(train_loader, 0):
          #print(i, data)

            # Get inputs in right form
           # print(data)
            #print(data['image']) #rint(data['inputs'])
          inputs, labels = data['image'], data['label']
            #print(inputs)
          #print(labels)
          inputs, labels = inputs.to(device, dtype=torch.float), labels.to(device, dtype=torch.long)
          
          # In Pytorch, We need to always remember to set the optimizer gradients to 0 before we recompute the new gradients
          optimizer.zero_grad()

          # Forward pass
          outputs = net(inputs)
          
          # Compute the loss and find the loss with respect to each parameter of the model
          loss_size = loss(outputs, labels)
          loss_size.backward()
          
          # Change each parameter with respect to the recently computed loss.
          optimizer.step()

          # Update statistics
          running_loss += loss_size.data.item()
          
          # Print every 20th batch of an epoch
          if (i % print_every) == print_every-1:
              print("Epoch {}, Iteration {}\t train_loss: {:.2f} took: {:.2f}s".format(
                  epoch + 1, i+1,running_loss / print_every, time.time() - start_time))
              # Reset running loss and time
              train_loss_hist.append(running_loss / print_every)
              train_hist_x.append(idx)
              running_loss = 0.0
              start_time = time.time()
          idx += 1

        # At the end of the epoch, do a pass on the test set
        total_test_loss = 0
        for i, data in enumerate(test_loader, 0):
            inputs, labels = data['image'], data['label']

            # Wrap tensors in Variables
            inputs, labels = Variable(inputs).to(device, dtype=torch.float), Variable(labels).to(device, dtype=torch.long)

            # Forward pass
            test_outputs = net(inputs)
            test_loss_size = loss(test_outputs, labels)
            total_test_loss += test_loss_size.data.item()
        test_loss_hist.append(total_test_loss / len(test_loader))
        test_hist_x.append(idx)
        print("Validation loss = {:.2f}".format(
            total_test_loss / len(test_loader)))

    print("Training finished, took {:.2f}s".format(
        time.time() - training_start_time))
    return train_hist_x, train_loss_hist, test_hist_x, test_loss_hist

In [None]:
train_hist_x, train_loss_hist, test_hist_x, test_loss_hist = train_model(net)

In [None]:
def get_accuracy(net, loader):
    n_correct = 0
    n_total = 0
    for i, data in enumerate(loader, 0):
        # Get inputs in right form
        inputs, labels = data['image'], data['label']
        inputs, labels = Variable(inputs).to(device, dtype=torch.float), Variable(labels).to(device, dtype=torch.long)

        # Forward pass
        outputs = net(inputs)
        n_correct += np.sum(np.argmax(outputs.cpu().detach().numpy(), axis=1) == labels.cpu().numpy())
        n_total += labels.shape[0]
    return n_correct/n_total

plt.plot(train_hist_x,train_loss_hist)
plt.plot(test_hist_x,test_loss_hist)
plt.legend(['train loss', 'validation loss'])
plt.xlabel('Batch number')
plt.ylabel('Loss')
plt.show()

print("Train accuracy is", get_accuracy(net, train_loader))
print("Test accuracy is", get_accuracy(net, test_loader))