# Problem 1

## 1-1

In [1]:
from torchvision import transforms
from PIL import Image
import numpy as np
from scipy.ndimage import shift

In [2]:
import torch

### Load the image (1920*2880) and translate it.

In [2]:
# Read the image
img_path = 'P1/testimg.jpg'
original_img = Image.open(img_path)     # Image Class

# Translate the image
img_np = np.array(original_img)            # dtype: [[R, G, B]*2880]
translated_img_np = shift(img_np, shift=[0, -1, 0], mode='nearest')        # shift(shift=[height, width, RGB]): shift image along each axis

# Show the image
translated_img = Image.fromarray(translated_img_np)
translated_img.save('P1/translated_image.jpg')

Calculate losses

In [3]:
# l1: Mean Absolute Error
l1 = np.mean(np.abs(img_np - translated_img_np))
print(f'Loss 1 (MAE): {l1}')

# l2: Mean Square Error
l2 = np.mean(np.square(img_np - translated_img_np))
print(f'Loss 2 (MSE): {l2}')

Loss 1 (MAE): 109.51044071903935
Loss 2 (MSE): 37.839024944540895


## 1-2

Download VGG-16 and show structure.

In [5]:
import torch
from torchvision import models
from torchsummary import summary

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load VGG16
model = models.vgg16(pretrained=True).to(device)
print('VGG16 Model Summary:')
summary = summary(model, input_size=(3, 244, 244), device=device)
summary


VGG16 Model Summary:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 244, 244]           1,792
              ReLU-2         [-1, 64, 244, 244]               0
            Conv2d-3         [-1, 64, 244, 244]          36,928
              ReLU-4         [-1, 64, 244, 244]               0
         MaxPool2d-5         [-1, 64, 122, 122]               0
            Conv2d-6        [-1, 128, 122, 122]          73,856
              ReLU-7        [-1, 128, 122, 122]               0
            Conv2d-8        [-1, 128, 122, 122]         147,584
              ReLU-9        [-1, 128, 122, 122]               0
        MaxPool2d-10          [-1, 128, 61, 61]               0
           Conv2d-11          [-1, 256, 61, 61]         295,168
             ReLU-12          [-1, 256, 61, 61]               0
           Conv2d-13          [-1, 256, 61, 61]         590,080
             ReLU-

Image processing

In [6]:
vgg16_feature = models.vgg16(pretrained=True).features.eval().to(device)

# Define an image processing function
def preprocessIMG(image):
    img_tensor = transforms.Compose([
        transforms.Resize(256),     # Resize the image
        transforms.ToTensor(),      # Translate the image to tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])     # Image Normalization
    ])(image).unsqueeze(0)
    return img_tensor.to(device)


org_img_tensor = preprocessIMG(original_img)
trans_img_tensor = preprocessIMG(translated_img)

# Define an feature extraction function
def FeatureExtract(model, image, layers):
    features = {}
    x = image
    for name, layer in model._modules.items():
        x = layer(x)
        if name in layers:
            features[layers[name]] = x
    return features

# Pick 4 different places for extracting features
a, b, c, d = '4', '9', '12', '30'
layers = {a: f'Layer {a}', b: f'Layer {b}', c: f'Layer {c}', d: f'Layer {d}'}

# Extract features in original image and translated image (type: dictionary)
org_feature = FeatureExtract(vgg16_feature, org_img_tensor, layers)
trans_feature = FeatureExtract(vgg16_feature, trans_img_tensor, layers)

# Define a function that compute the change in feature space
def Difference(org_feature, trans_feature):
    difference = {}
    for layer in org_feature:
        difference[layer] = torch.norm((org_feature[layer] - trans_feature[layer]), p=2).item()
    return difference

diff = Difference(org_feature, trans_feature)
print("Change of selected features:")
for key in diff:
    print(f"{key}: {diff[key]}")


Change of selected features:
Layer 4: 213.1699676513672
Layer 9: 279.9030456542969
Layer 12: 363.60198974609375
Layer 30: 11.8388032913208


## 1-3

In [7]:
def add_gaussian_noise(image, mean=0, var=1000):
    # Convert image to numpy array
    img_array = np.array(image)

    # Generate Gaussian noise
    row, col, ch = img_array.shape
    sigma = var ** 0.5
    gauss = np.random.normal(mean, sigma, (row, col, ch))
    noisy_image = img_array + gauss.reshape(row, col, ch)

    # Clip values to stay in the valid range [0, 255]
    noisy_image = np.clip(noisy_image, 0, 255).astype(np.uint8)
    
    # Convert back to PIL Image
    return Image.fromarray(noisy_image)

# Add noises to boh images
noise_org = add_gaussian_noise(original_img, mean=90)
noise_trans = add_gaussian_noise(translated_img, mean=90)

# Turn image to array
noise_org_np = np.array(noise_org)
noise_trans_np = np.array(noise_trans)

# # Save noise images
# noise_org.save('P1/noise_org.jpg')
# noise_trans.save('P1/noise_trans.jpg')

# Turn noise images to tensor
noise_org_tensor = preprocessIMG(noise_org)
noise_trans_tensor = preprocessIMG(noise_trans)

# Extract features from noise images and original images
num = '30'
layer_name = 'Conv5_3'
layer = {num: layer_name}
org_feature = FeatureExtract(vgg16_feature, org_img_tensor, layer)
trans_feature = FeatureExtract(vgg16_feature, trans_img_tensor, layer)
noise_org_feature = FeatureExtract(vgg16_feature, noise_org_tensor, layer)
noise_trans_feature = FeatureExtract(vgg16_feature, noise_trans_tensor, layer)

# Calculate the perceptual loss
diff_org = Difference(org_feature, noise_org_feature)
diff_trans = Difference(trans_feature, noise_trans_feature)

# Calculate l_1, l_2
l1_org = np.mean(np.abs(img_np - noise_org_np))
l1_trans = np.mean(np.abs(translated_img_np - noise_trans_np))
l2_org = np.mean(np.square(img_np - noise_org_np))
l2_trans = np.mean(np.square(translated_img_np - noise_trans_np))

# Print out l_1, l_2
print(f'l_1 of original image and noise image: {l1_org}')
print(f'l_1 of translated image and noise image: {l1_trans}', end='\n\n')
print(f'l_2 of original image and noise image: {l2_org}')
print(f'l_2 of translated image and noise image: {l2_trans}', end='\n\n')

# Print out perceptual loss
print(f'Perceptual loss of original image: {diff_org[layer_name]}')
print(f'Perceptual loss of translated image: {diff_trans[layer_name]}')

l_1 of original image and noise image: 175.97600507571374
l_1 of translated image and noise image: 175.99028826678241

l_2 of original image and noise image: 102.50470757378473
l_2 of translated image and noise image: 102.49764756944444

Perceptual loss of original image: 132.41009521484375
Perceptual loss of translated image: 134.11668395996094


## 1-4

In [None]:
from torchvision import transforms
from PIL import Image
import numpy as np

# Problem 3

### Create prototype datasetfrom ImageNet

In [None]:
import torch
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split, DataLoader
from PIL import Image, ImageDraw, ImageFont
from torch.utils.data import DataLoader, Subset
import random

# Define transformations
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# Path to ImageNet training set
path = '/data/datasets/community/deeplearning/imagenet/train'
# Load ImageNet training set
imagenet_data = datasets.ImageFolder(root=path, transform=transform)
# imagenet_data
print('Finished Loading imagefolder...')

# Sub-sample classes (e.g., 10 classes)
selected_classes = random.sample(range(1000), 10)  # Randomly select 10 classes

# Collect indices for samples belonging to selected classes
indices = [i for i, (_, class_idx) in enumerate(imagenet_data) if class_idx in selected_classes]

# Create a subset dataset for prototyping
subsampled_data = Subset(imagenet_data, indices)

# Split into train, val, and test sets
train_size = int(0.7 * len(subsampled_data))
val_size = int(0.15 * len(subsampled_data))
test_size = len(subsampled_data) - train_size - val_size
train_data, val_data, test_data = random_split(subsampled_data, [train_size, val_size, test_size])

# Loaders (for visualization and later model training)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# Visualizing class distribution using PIL
def visualize_class_distribution_pil(loader, title):
    # Calculate class counts
    class_counts = torch.zeros(10)
    for _, labels in loader:
        for label in labels:
            class_counts[label] += 1

    # Create a blank image for the chart
    width, height = 400, 300
    image = Image.new('RGB', (width, height), 'white')
    draw = ImageDraw.Draw(image)

    # Set up bar chart parameters
    bar_width = 30
    spacing = 10
    max_height = 150  # Max height for the tallest bar

    # Normalize class counts for visualization
    max_count = class_counts.max().item()
    class_counts = class_counts / max_count * max_height

    # Draw bars
    for i, count in enumerate(class_counts):
        x1 = i * (bar_width + spacing) + 50
        y1 = height - 50 - count
        x2 = x1 + bar_width
        y2 = height - 50
        draw.rectangle([x1, y1, x2, y2], fill="blue")

        # Add class labels
        draw.text((x1 + bar_width // 4, height - 40), str(i), fill="black")

    # Add title and labels
    draw.text((width // 2 - 40, 10), title, fill="black")
    draw.text((width // 2 - 40, height - 20), 'Classes', fill="black")
    draw.text((10, height // 2), 'Number of Images', fill="black")

    # Show the image
    image.show()

# Visualize the class distribution using PIL
visualize_class_distribution_pil(train_loader, 'Train Set Distribution')
visualize_class_distribution_pil(val_loader, 'Validation Set Distribution')
visualize_class_distribution_pil(test_loader, 'Test Set Distribution')

Finished Loading imagefolder...


### Design 36-layer Resnet

In [16]:
import torch.nn as nn
import torch.nn.functional as F

# Define Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Skip connection for matching dimensions
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


# Define ResNet Structure
class CustomResNet36(nn.Module):  # Renamed class to avoid conflict
    def __init__(self, block, num_blocks, num_classes=10):
        super(CustomResNet36, self).__init__()
        self.in_channels = 64
        
        # Initial layer before residual blocks
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        
        # 4 layers with residual blocks
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        
        # Final fully connected layer
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# Function that build 36-layer Resnet
def ResNet36():
    return CustomResNet36(ResidualBlock, [4, 4, 4, 4])

net = ResNet36()
print(net)

CustomResNet36(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runni