#  <center> Problem Set 5 Bubbles <center>
<center> Spring 2024 <center>
<center> 3.C01/3.C51, 7.C01/7.C51, 10.C01/10.C51, 20.C01/20.C51 <center>

In [None]:
import os 
import glob 
import PIL
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
import imageio
from skimage import io, color

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import torchvision.transforms as transforms
from torch import nn 
from torchvision.models import vgg16
import torch.optim as optim

## Part 1: Classifying  Steel  Surface  Defects

In [None]:
!wget https://raw.githubusercontent.com/coleygroup/ML4MolEng/main/psets/ps5/data/neu_surface_defects_jpeg.tar.gz
!tar -xf neu_surface_defects_jpeg.tar.gz

### 1.1 (15 points)  Build Image Datasets and DataLoaders

Get all the image filepaths.

In [None]:
files = glob.glob(os.path.join('neu_surface_defects', "*.jpg"))

Visualize a random image.

In [None]:
idx = 30
img = Image.open(files[idx])
print(files[idx])
img

Your ImageDataset class.

In [None]:
# dictionary labels 
label_dict = {
'Cr': 0, 
'In': 1, 
'Pa': 2,
'PS': 3, 
'RS': 4,
'Sc': 5
}

class ImageDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        
        '''
        Image dataset object that loads and transforms images. 
        
        '''
        
        self.paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        ################ Code #################

        # read images given file path 
        img = None
        label = None

        ################ Code #################

        # transform images 
        if self.transform:
            img = self.transform(img)

        sample = img, label
        return sample

Transform your dataset, split the data, and define your Datasets and DataLoaders.

In [None]:
################ Code #################




### 1.2 (10 points) Understand the Model Architecture

Define and load a pretrained VGG16 model.

In [None]:
class VGG_fc1(nn.Module):
    def __init__(self, weights):
        super(VGG_fc1, self).__init__()
        self.features = vgg16(weights=weights).features # convolutional layers
        self.avgpool = vgg16(weights=weights).avgpool
        self.fc1 = vgg16(weights=weights).classifier[0] # first layer of classifier
        
    def forward(self, x):
        """Extract first fully connected feature vector"""
        # Apply convolutions
        x = self.features(x)
        # Apply pooling
        x = self.avgpool(x)
        # Flatten and convert to vectors
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x
    
model = VGG_fc1(weights = "VGG16_Weights.DEFAULT").eval() # turn model into evaluation mode

The model architecture.

In [None]:
print(model)

Choose an image from your training set and visualize 4 channels in each of layers 1, 5, and 10.

In [None]:
################ Code #################



What do you observe?

In [None]:
############### Answer #################



### 1.3 (20 points) Train a Classifier with Transfer Learning

Define a VGG-based transfer learning classifier.

In [None]:
################ Code #################



Train your classifier.

In [None]:
################ Code #################




Compute and plot a test confusion matrix.

In [None]:
################ Code #################



### 1.4 (grad, 5 points) Pre-training questions

Why do you need to resize images to specific shapes and normalize pixel values to specific values for each color channel?

In [None]:
############### Answer #################



What are the benefits of transfer learning versus training the entire stack (CNN + MLP) again. What are the potential limitations of this approach? 

In [None]:
############### Answer #################

 

### 1.5 (grad, 20 points) Obtain Saliency Maps

Compute the saliency map for two images of each class.

In [None]:
################ Code #################




Comment on any pattern you observe in the saliency maps.

In [None]:
############### Answer #################



## Part 2: Image Segmentation

### 2.1 (15 points) Build Datasets and DataLoaders

Download and unzip data.

In [None]:
!wget https://raw.githubusercontent.com/coleygroup/ML4MolEng/main/psets/ps5/data/bubble_segmentation_jpeg.tar.gz
!tar -xf bubble_segmentation_jpeg.tar.gz

Parse data from image filepaths.

In [None]:
paths = [path for path in glob.glob("bubble_segmentation/*") if "bubbles" in path]

def load_img(path):
    x = np.array(Image.open(path)) / 255
    y = np.array(Image.open(path.replace("bubbles","masks"))) / 255
    return x, y

Load one image.

In [None]:
idx = 20
cells, masks = load_img(paths[idx])
fig, axes = plt.subplots(1,2)
axes[0].imshow(cells, cmap='gray')
axes[1].imshow(masks, cmap='gray')

Your ImageDataset class.

In [None]:
################ Code #################




Split your data and load your DataLoaders.

In [None]:
################ Code #################




Is it necessary to apply random translation to your images?  Briefly justify your answer.

In [None]:
############### Answer #################



### 2.2 (20 points) Train a U-Net Model that Performs Image Segmentation

Implement Dice loss.

In [None]:
def dice_loss(pred, target):
    """Calculate Dice loss.

    Parameters
    ----------
        pred:
            predictions from the model
        target:
            ground truth label
    """

################ Code #################




The U-Net Model.

In [None]:
class DownSampling(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, name=None):
        super(DownSampling, self).__init__()

        self.conv = ConvBlock(in_channels, out_channels, kernel_size)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):

        conv_out = self.conv(x)
        output = self.max_pool(conv_out)

        return output, conv_out


class UpSampling(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, name=None):
        super(UpSampling, self).__init__()

        self.conv = ConvBlock(in_channels, out_channels, kernel_size)
        self.conv_t = nn.ConvTranspose2d(out_channels, out_channels, kernel_size, \
                                         padding=1, stride=2, output_padding=1)


    def forward(self, x, skip):

        conv_out = self.conv(x)
        output = self.conv_t(conv_out)

        output += skip

        return output


class ConvBlock(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, padding=1, stride=1, name=None):
        super(ConvBlock, self).__init__()

        block = []
        # first conv layer
        block.append(nn.Conv2d(in_channels, out_channels, kernel_size, \
                               padding=padding, stride=stride))
        block.append(nn.ReLU())
        block.append(nn.BatchNorm2d(out_channels))

        # second conv layer
        block.append(nn.Conv2d(out_channels, out_channels, kernel_size, \
                               padding=padding, stride=stride))
        block.append(nn.ReLU())
        block.append(nn.BatchNorm2d(out_channels))

        # make sequential
        self.conv_block = nn.Sequential(*block)


    def forward(self, x):

        output = self.conv_block(x)

        return output
    
    
class UNet(nn.Module):

    def __init__(self, num_kernel=8, kernel_size=3, dim=1, target_dim=1):
        """UNet

        Arguments:
            num_kernel: int
                number of kernels to use for the first layer
            kernel_size: int
                size of the kernel for the first layer
            dims: int
                number of color channels for input images 
            target_dim: int 
                number of channels for the output mask
        """

        super(UNet, self).__init__()

        self.num_kernel = num_kernel
        self.kernel_size = kernel_size
        self.dim = dim
        self.target_dim = 1

        # encode
        self.encode_1 = DownSampling(self.dim, num_kernel, kernel_size)
        self.encode_2 = DownSampling(num_kernel, num_kernel*2, kernel_size)
        self.encode_3 = DownSampling(num_kernel*2, num_kernel*4, kernel_size)
        self.encode_4 = DownSampling(num_kernel*4, num_kernel*8, kernel_size)

        # bridge
        self.bridge = nn.Conv2d(num_kernel*8, num_kernel*16, kernel_size, padding=1, stride=1)

        # decode
        self.decode_4 = UpSampling(num_kernel*16, num_kernel*8, kernel_size)
        self.decode_3 = UpSampling(num_kernel*8, num_kernel*4, kernel_size)
        self.decode_2 = UpSampling(num_kernel*4, num_kernel*2, kernel_size)
        self.decode_1 = UpSampling(num_kernel*2, num_kernel, kernel_size)

        self.segment = nn.Conv2d(num_kernel, self.target_dim, 1, padding=0, stride=1)
        self.activate = nn.Sigmoid()


    def forward(self, x):

        has_channel = x.ndim == 4
        if not has_channel:
            x = x.unsqueeze(1)
            
        x, skip_1 = self.encode_1(x)
        x, skip_2 = self.encode_2(x)
        x, skip_3 = self.encode_3(x)
        x, skip_4 = self.encode_4(x)

        x = self.bridge(x)

        x = self.decode_4(x, skip_4)
        x = self.decode_3(x, skip_3)
        x = self.decode_2(x, skip_2)
        x = self.decode_1(x, skip_1)

        x = self.segment(x)

        pred = self.activate(x)

        if not has_channel:
            pred = pred.squeeze(1)

        return pred


    def args_dict(self):
        """model arguments to be saved
        """

        model_args = {'dim': self.dim,
                      'target_dim': self.target_dim,
                      'num_kernel' : self.num_kernel,
                      'kernel_size' : self.kernel_size}

        return model_args

Example model usage.

In [None]:
model = UNet()
y = model(torch.randn(4, 256, 256))

print(y.shape)

A function to plot a segmentation map.

In [None]:
def plot_seg(img, pred_seg, true_seg, mask_cutoff=0.5):

    """ Visualize segmentation results.
    Inputs:
        image: orginal image, shape: 256 x 256
        pred_seg: predicted mask, shape: 256 x 256 
        true_seg: true mask, shape: 256 x 256
        mask_cutoff: if the mask values is larger than mask_cutoff, the mask will appear on the image
    """
    img = img.squeeze()
    pred_seg = pred_seg.squeeze()
    true_seg = true_seg.squeeze()

    fig, ax = plt.subplots(1, 3, sharex='col', sharey='row')
    fig.set_size_inches((15,15))
    
    ax[0].set_title("Original Image")
    ax[1].set_title("Prediction")
    ax[2].set_title("Ground Truth")
    
    img = np.stack([img,img,img],axis = -1)
    ax[0].imshow(img)
    ax[1].imshow(np.clip(color.label2rgb(pred_seg > mask_cutoff,img,colors=[(255,0,0)],alpha=0.0025, bg_label=0, bg_color=None),0,1))
    ax[2].imshow(np.clip(color.label2rgb(true_seg > mask_cutoff,img,colors=[(255,0,0)],alpha=0.0025, bg_label=0, bg_color=None),0,1))

    plt.show()

Train your model.

In [None]:
################ Code #################




Show segmentation results for 3 images from the test dataset.

In [None]:
################ Code #################


