# Imports

In [None]:
# Install the necessary Python packages
!pip install numpy
!pip install tqdm
!pip install torch
!pip install torchvision
!pip install matplotlib
!pip install Pillow

In [None]:
import argparse, pdb
import numpy as np
import os
import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, TensorDataset, DataLoader
from PIL import Image
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from typing import Callable
import matplotlib.pyplot as plt

In [None]:
# Download image dataset from Google Drive
! pip install gdown
! gdown 1Oeto_5xV_l4zVJIi1fFDsTrzE3y8mKUK
! unzip csci699_p2_dataset.zip

# Section 1: Generate embedding dataset

In [None]:
# Define some useful hyperparameters
IMG_SIZE = 299
device = "cuda" if torch.cuda.is_available() else "cpu"
LABELS = ["cat", "dog", "neg"]

train_image_dir = "/content/datasets/train"
test_image_dir = "/content/datasets/test"

transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),            # Resize to dimensions for Inception network
    transforms.ToTensor(),                              # Convert PIL Image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
])

# See https://pytorch.org/vision/stable/generated/torchvision.datasets.ImageFolder.html for
# more details on usage.
train_img_folder = torchvision.datasets.ImageFolder(root=train_image_dir, transform=transform)

def to_numpy(tensor):
  return tensor.detach().cpu().numpy()

train_embs, train_labels = [], []
print('number of train images: ', len(train_img_folder))

######### Your code starts here #########
# We want to create a dataset of Inception-v3 embeddings.
# Hint: You can use the pretrained PyTorch Inception model here (https://pytorch.org/hub/pytorch_vision_inception_v3/)
# Iteration through the training images, use the bottleneck layers of Inception to generate embeddings
# and appending them to the respective lists: train_embs and train_labels

######### Your code ends here #########

train_embs = torch.from_numpy(np.concatenate(train_embs))
train_labels = torch.Tensor(train_labels)
train_dataset = TensorDataset(train_embs, train_labels)

# Section 2: Train linear classifier

In [None]:
# Define some training hyperparameters, feel free to modify these
num_epochs = 20
lr = 1e-3
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

######### Your code starts here #########
# We want to create a linear classifier which takes the embedding vectors as input.
# The training loop is already provided for you.

# 1. Define a new torch module for the classifier.
# 2. Define an appropriate optimizer from torch.optim.
# 3. Define the loss function for training the classifier.

linear_classifier =
optimizer =
loss_fn =
######### Your code ends here #########


for epoch in range(num_epochs):
  # set model to training mode
  linear_classifier.train()
  train_loss = 0

  for batch_idx, (embeddings, class_label) in enumerate(train_loader):
    embeddings = embeddings.to(device)
    class_label = class_label.to(device)

    optimizer.zero_grad()

    probs = linear_classifier(embeddings)
    loss = loss_fn(probs, class_label.long())

    loss.backward()
    train_loss += loss.item()
    optimizer.step()

  print('Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader)))

# Section 3: Classify test images

In [None]:
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),            # Resize to dimensions for Inception network
    transforms.ToTensor(),                              # Convert PIL Image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
])

# Load the test dataset
test_dataset = torchvision.datasets.ImageFolder(root=test_image_dir, transform=transform)

######### Your code starts here #########
# Classify all images in the test image folder
# Calculate the accuracy of the model on all of the images.

test_acc =
######### Your code ends here ########

print(f"Evaluated on {len(test_dataset)} samples.")
print(f"Accuracy: {test_acc * 100:.0f}%")

# Section 4: Object Detection

In [None]:
def compute_brute_force_classification(
    model: nn.Module,
    raw_img: np.ndarray,
    transforms,
    nH: int = 8,
    nW: int = 8
):
    '''
    This function returns the probabilities of each window.
    Inputs:
        model: Model which is used
        raw_img: H x W x 3 numpy array
        transforms: a sequence of transformations to apply to the image as preprocessing
        nH: number of windows in the vertical direction
        nW: number of windows in the horizontal direction
    Outputs:
        window_predictions: a (nH, nW, 3) np.array.
                            The last dim (size 3) is the probabilities
                            of each label (cat, dog, neg)
    '''
    ######### Your code starts here #########


    ######### Your code ends here #########

    return window_predictions

In [None]:
def plot_classification(raw_img, classification_array):
    nH, nW, _ = classification_array.shape
    aspect_ratio = float(raw_img.shape[0]) / raw_img.shape[1]
    plt.figure(figsize=(8, 8*aspect_ratio))
    p1 = plt.subplot(2,2,1)
    plt.imshow(classification_array[:,:,0], interpolation='none', cmap='jet')
    plt.title('%s probability' % LABELS[0])
    p1.set_aspect(aspect_ratio*nW/nH)
    plt.colorbar()
    p2 = plt.subplot(2,2,2)
    plt.imshow(classification_array[:,:,1], interpolation='none', cmap='jet')
    plt.title('%s probability' % LABELS[1])
    p2.set_aspect(aspect_ratio*nW/nH)
    plt.colorbar()
    p2 = plt.subplot(2,2,3)
    plt.imshow(classification_array[:,:,2], interpolation='none', cmap='jet')
    plt.title('%s probability' % LABELS[2])
    p2.set_aspect(aspect_ratio*nW/nH)
    plt.colorbar()
    plt.subplot(2,2,4)
    plt.imshow(raw_img)
    plt.savefig("detect.png")
    plt.show()

In [None]:
# Get raw image using PIL
catswithdogs_dir = "/content/datasets/catswithdogs"
raw_img = np.array(Image.open(os.path.join(catswithdogs_dir, "001211.jpg")))

# Define transformations
transform = transforms.Compose([
    transforms.ToPILImage(),                  # Numpy array to PIL Image first
    transforms.Resize((IMG_SIZE, IMG_SIZE)),  # Resize to dimensions for Inception network
    transforms.ToTensor(),                    # Convert PIL Image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
])

window_predictions = compute_brute_force_classification(
    model= # TODO: fill this in,
    raw_img=raw_img,
    transforms=transform,
)

plot_classification(raw_img, window_predictions)

In [None]:
def compute_convolutional_KxK_classification(
    model: nn.Module,
    raw_img: np.ndarray,
    transforms
):
    """
    Computes probabilities for each window based on the convolution layer of Inception
    Inputs:
      model: model which is used
      raw_img: numpy array of image

    Outputs:
      predictions: a (K, K, 3) np.array.
    """
    transformed_img = transform(raw_img)

    ######### Your code starts here #########

    ######### Your code ends here #########

    return predictions

In [None]:
# Get raw image, do not apply transform
catswithdogs_dir = "/content/datasets/catswithdogs"
raw_img = np.array(Image.open(os.path.join(catswithdogs_dir, "001211.jpg")))

transform = transforms.Compose([
    transforms.ToPILImage(),                  # Numpy array to PIL Image first
    transforms.Resize((IMG_SIZE, IMG_SIZE)),  # Resize to dimensions for Inception network
    transforms.ToTensor(),                    # Convert PIL Image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
])

window_predictions = compute_convolutional_KxK_classification(
    model= # TODO: fill this in,
    raw_img=raw_img,
    transforms=transform,
)

plot_classification(raw_img, window_predictions)

# Section 5: Saliency Mapping

In [None]:
def compute_and_plot_saliency(
    model: nn.Module,
    raw_img: np.ndarray
):
    """
    This function computes and plots the saliency plot.
    You need to compute the matrix M detailed in section 3.1 in
    K. Simonyan, A. Vedaldi, and A. Zisserman,
    "Deep inside convolutional networks: Visualising imageclassification models and saliency maps,"
    2013, Available at https://arxiv.org/abs/1312.6034.
​
    Inputs:
      model: model which is used
      raw_img: numpy array of image
    """
    ######### Your code starts here #########

    ######### Your code ends here #########

    # Code to save the saliency plot
    plt.subplot(2, 1, 1)
    plt.imshow(M)
    plt.title('Saliency with respect to predicted class %s' % LABELS[top_class])
    plt.subplot(2, 1, 2)
    plt.imshow(raw_img)
    plt.savefig("saliency.png")
    plt.show()


In [None]:
# Get raw image, do not apply transform
catswithdogs_dir = "/content/datasets/catswithdogs"
raw_img = np.array(Image.open(os.path.join(catswithdogs_dir, "001211.jpg")))

compute_and_plot_saliency(
    model = # TODO: fill this in,
    raw_img = raw_img
)