# Exercise 2

**Please Note**: We updated the requirements.txt

Please install the new requirements before editing this exercise.

## Import packages

In [5]:
import os
from vll.utils.download import download_mnist
import numpy as np
import matplotlib.pyplot as plt

import skimage
import skimage.io

import torch
import torch.nn.functional as F
from torchvision import transforms

from models.mnist.simple_cnn import Net

## Task 1
(2 points)

In this task, you will learn some basic tensor operations using the PyTorch library.

Reference for torch: https://pytorch.org/docs/stable/torch.html

In [11]:
import numpy as np
import torch
# Create a numpy array that looks like this: [0, 1, 2, ..., 19]
arr = arr = np.arange(20)

# Convert the numpy array to a torch tensor
tensor = torch.tensor(arr,dtype=torch.float32)
print(tensor)

# Create a tensor that contains random numbers.
# It should have the same size like the numpy array.
# Multiply it with the previous tensor.
rand_tensor = torch.rand_like(tensor)
tensor *= rand_tensor
print(tensor)

# Create a tensor that contains only 1s.
# It should have the same size like the numpy array.
# Substract it from the previous tensor.
tensor -= torch.ones_like(tensor)
print(tensor)

# Get the 5th element using a index.
element = tensor[4]
print(element)

# Create a tensor that contains only 0s.
# It should have the same size like the numpy array.
# Multiply it with the previous tensor without any assignment (in place).
tensor *= 0
print(tensor)

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
        14., 15., 16., 17., 18., 19.])
tensor([ 0.0000,  0.3274,  0.8937,  2.1220,  2.8266,  1.8614,  5.7118,  1.7785,
         1.2112,  1.1061,  5.9489,  6.0703, 11.8717,  1.6388,  1.3497,  7.3098,
         0.1905, 14.3708,  6.6088,  5.7197])
tensor([-1.0000, -0.6726, -0.1063,  1.1220,  1.8266,  0.8614,  4.7118,  0.7785,
         0.2112,  0.1061,  4.9489,  5.0703, 10.8717,  0.6388,  0.3497,  6.3098,
        -0.8095, 13.3708,  5.6088,  4.7197])
tensor(1.8266)
tensor([-0., -0., -0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., -0., 0., 0., 0.])


In [5]:
from skimage import io, color
import torch
from torchvision import transforms
from PIL import Image
import vll
# Load the image from the last exercise as RGB image.
image_path = "vll/data/data/pepo.jpg"
image = Image.open(image_path).convert("RGB")
# Convert the image to a tensor
transform = transforms.ToTensor()
image_tensor = transform(image)

# Print its shape
print(image_tensor.shape)

# Flatten the image
flattened_image = image_tensor.view(-1)
print(len(flattened_image))

# Add another dimension resulting in a 1x78643 tensor
expanded_image = flattened_image.unsqueeze(0)
print(expanded_image.shape)

# Revert the last action
reverted_image = expanded_image.squeeze(0)
print(reverted_image.shape)

# Reshape the tensor, so that it has the original 2D dimensions
reshaped_image = reverted_image.view(image_tensor.shape)
print(reshaped_image.shape)

# Calculate the sum, mean and max of the tensor
print(torch.sum(reshaped_image))
print(torch.mean(reshaped_image))
print(torch.max(reshaped_image))

torch.Size([3, 512, 512])
786432
torch.Size([1, 786432])
torch.Size([786432])
torch.Size([3, 512, 512])
tensor(358450.)
tensor(0.4558)
tensor(1.)


## Task 2
(2 points)

Use Autograd to perform operations on a tensor and output then gradients.

In [6]:
import torch
# Create a random 2x2 tensor which requires gradients
x = torch.rand((2, 2), requires_grad=True)
print("x:",x)

# Create another tensor by adding 2.0
y = x + 2.0
print("y:", y)

# Create a third tensor z = y^2
z = y ** 2
print("z:", z)

# Compute out as the mean of values in z
out = z.mean()
print("out:", out)

# Perform back propagation on out
out.backward()
# Print the gradients dout/dx
print("Gradients of out with respect to x:")
print(x.grad)

# Create a copy of y whithout gradients
y2 = y.detach().clone()
print("Does y2 require gradients?", y2.requires_grad)

# Perform the mean operation on z
# with gradients globally disabled
with torch.no_grad():
    out2 = z.mean()

print("out2 (with gradients globally disabled):", out2)

x: tensor([[0.4591, 0.6001],
        [0.2659, 0.6594]], requires_grad=True)
y: tensor([[2.4591, 2.6001],
        [2.2659, 2.6594]], grad_fn=<AddBackward0>)
z: tensor([[6.0472, 6.7604],
        [5.1342, 7.0725]], grad_fn=<PowBackward0>)
out: tensor(6.2536, grad_fn=<MeanBackward0>)
Gradients of out with respect to x:
tensor([[1.2295, 1.3000],
        [1.1329, 1.3297]])
Does y2 require gradients? False
out2 (with gradients globally disabled): tensor(6.2536)


## Task 3
(3 points)

Implement a Dataset class for MNIST.


In [13]:
# We first download the MNIST dataset
import os
from torchvision.datasets import MNIST
from torchvision import transforms

# Define the function to download MNIST dataset
def download_mnist(root="./models/mnist"):
    if not os.path.exists(root):
        os.makedirs(root)
    MNIST(root=root, train=True, download=True)
    MNIST(root=root, train=False, download=True)

# Define custom Dataset class for MNIST
class CustomMNISTDataset(torch.utils.data.Dataset):
    def __init__(self, root, train=True, transform=None):
        self.root = root
        self.train = train
        self.transform = transform

        # Download MNIST dataset if not available
        if not os.path.exists(root):
            download_mnist(root)

        # Initialize MNIST dataset
        self.mnist_data = MNIST(root=root, train=train, transform=transform, download=False)

    def __len__(self):
        return len(self.mnist_data)

    def __getitem__(self, idx):
        image, label = self.mnist_data[idx]
        return image, label

# Usage example:
if __name__ == "__main__":
    # Download MNIST dataset if not already available
    download_mnist()

    # Define transformation for the dataset
    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert PIL image to tensor
        transforms.Normalize((0.1307,), (0.3081,))  # Normalize the image
    ])

    # Instantiate the custom MNIST dataset class
    mnist_dataset = CustomMNISTDataset(root="./data", train=True, transform=transform)

    # Example usage:
    # mnist_loader = torch.utils.data.DataLoader(mnist_dataset, batch_size=64, shuffle=True)

download_mnist()         

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./models/mnist\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 11390718.17it/s]


Extracting ./models/mnist\MNIST\raw\train-images-idx3-ubyte.gz to ./models/mnist\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./models/mnist\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 277810.58it/s]


Extracting ./models/mnist\MNIST\raw\train-labels-idx1-ubyte.gz to ./models/mnist\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./models/mnist\MNIST\raw\t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./models/mnist\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1152614.76it/s]


Extracting ./models/mnist\MNIST\raw\t10k-images-idx3-ubyte.gz to ./models/mnist\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./models/mnist\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4534760.48it/s]


Extracting ./models/mnist\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./models/mnist\MNIST\raw



In [18]:
class MNIST:
    """
    Dataset class for MNIST
    """

    def __init__(self, root, transform=None):
        """
        root -- path to either "training" or "testing"
        
        transform -- transform (from torchvision.transforms)
                     to be applied to the data
        """
        # save transforms
        self.transform = transform
        
        # TODO: create a list of all subdirectories (named like the classes) 
        #       within the dataset root
        self.classes = sorted(os.listdir(root))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}
        
        # TODO: create a list of paths to all images
        #       with the ground truth label
        self.images = []
        for cls in self.classes:
            cls_dir = os.path.join(root, cls)
            for image_name in os.listdir(cls_dir):
                image_path = os.path.join(cls_dir, image_name)
                self.images.append((image_path, self.class_to_idx[cls]))
    
    def __len__(self):
        """
        Returns the lenght of the dataset (number of images)
        """
        # TODO: return the length (number of images) of the dataset
        return len(self.images)

    def __getitem__(self, index):
        """
        Loads and returns one image as floating point numpy array
        
        index -- image index in [0, self.__len__() - 1]
        """
        # TODO: load the ith image as an numpy array (dtype=float32)
        image_path, label = self.images[index]
        image = Image.open(image_path).convert('L')  # Convert to grayscale
        image = np.array(image, dtype=np.float32)
        
        # TODO: apply transforms to the image (if there are any)
        if self.transform:
            image = self.transform(image)
        
        # TODO: return a tuple (transformed image, ground truth)
        return image, label




## Task 4
(3 points)

You can now load a pretrained neural network model we provide.
Your last task is to run the model on the MNIST test dataset, plot some example images with the predicted labels and compute the prediction accuracy.

In [30]:
import matplotlib.pyplot as plt
from models.mnist.simple_cnn import Net


def validate(model, data_loader):
    # TODO: Create a 10x10 grid of subplots
   
    
    model.eval()
    correct = 0 # count for correct predictions
    
    with torch.no_grad():
        for i, item in enumerate(data_loader):
            # TODO: unpack item into image and ground truth
            #       and run network on them
            image, label = item
            output = model(image)

            # TODO: get class with highest probability
            _, predicted = torch.max(output, 1)
            
            # TODO: check if prediction is correct
            #       and add it to correct count
            correct += (predicted == label).sum().item()
            
            # plot the first 100 images
            if i < 100:
                # TODO: compute position of ith image in the grid
                plt.subplot(10, 10, i + 1)
                row = i // 10
                col = i % 10
                # TODO: convert image tensor to numpy array
                #       and normalize to [0, 1]
                image_np = image.squeeze().numpy()
                image_np = (image_np - image_np.min()) / (image_np.max() - image_np.min())
                
                # TODO: make wrongly predicted images red
                if predicted != label:
                    plt.imshow(image_np, cmap='gray', vmin=0, vmax=1)
                else:
                    plt.imshow(image_np, cmap='gray')
                
                # TODO: disable axis and show image
                plt.axis('off')
                
                # TODO: show the predicted class next to each image
                plt.title(f'Predicted: {predicted.item()}, Actual: {label.item()}')
            
            elif i == 100:
                plt.show()
    
    # TODO: compute and print the prediction accuracy in percent
    accuracy = correct / len(data_loader.dataset)
    print(f"Prediction accuracy: {accuracy * 100:.2f}%")

# create a DataLoader using the implemented MNIST dataset class
data_loader = torch.utils.data.DataLoader(
    MNIST('./models/mnist',
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=1, shuffle=True)

# create the neural network
model = Net()

# load the statedict from 'models/mnist/simple_cnn.pt'
model.load_state_dict(torch.load('./models/mnist/simplecnn.pt'))

# validate the model
validate(model, data_loader)

NotADirectoryError: [WinError 267] The directory name is invalid: './models/mnist\\simple_cnn.py'