# UMC 301: Applied Data Science and Artificial Intelligence
## Assignment 2

### Submission instructions:

1.   The assignment is to be submitted in ONE single notebook.
2.   Submit the .ipynb file and pdf of the same with all cells open through this Teams Assignment.
3. If your IISc email ID is < username > @iisc.ac.in, then name the file < username >_Assgn_2. E.g. jonathan_Assgn_1a for email ID jonathan@iisc.ac.in.
4. Before submission, execute the ’Restart session and run all’ option from the Runtime/Kernel tab. Verify that there are no errors and that you are getting the output you expect.
5. Use the dataset : https://www.dropbox.com/scl/fi/7m0pt1dkmwbkr3byv5r2j/face_images.zip?rlkey=r2gqsdvuyvpqqk4a7bjd9kcvv&st=nj7wl35d&dl=1
**Data Files**

**face_images** : All the face images, cropped and aligned. <br>
**face_image_bbox.csv**: Bounding box information for each image. "x_1" and "y_1" represent the upper left point coordinate of bounding box. "width" and "height" represent the width and height of bounding box. <br>
**face_image_attr.csv**: Attribute labels for each image. There are 40 attributes. "1" represents positive while "-1" represents negative.



# Question 1
Train a ResNet18 model on the above dataset to classify the "Arched_Eyebrows" attribute. Apply appropriate data pre-processing [you can use less datapoints]. Evaluate the model by calculating precision, accuracy, F1 score, ROC curve, confusion matrix, and other relevant metrics. Do experiments with different hyperparameters and discuss about results. <br>
[Note: Marks will be given based on different experiments and discussion]

In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import pandas as pd
import torch.optim as optim
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc, precision_recall_curve
from matplotlib import pyplot as plt
import seaborn as sns


In [2]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split

In [3]:
# Load an image
from PIL import Image
from torchvision import transforms

def load_image(image_path, transform=None, max_size=None, shape=None):
    image = Image.open(image_path)

    if max_size:
        scale = max_size / max(image.size)
        size = image.size[0]*scale, image.size[1]*scale
        image = image.resize(size)

    if shape:
        image = image.resize(shape)

    if transform:
        image = transform(image).unsqueeze(0)

    return image

# Define a custom Dataset Class by extending the Dataset Class

In [14]:
x = pd.read_csv('face_images/face_image_eval_partition.csv')

In [15]:
x.loc[:60000, 'partition'] = 0
x.loc[60000:110000, 'partition'] = 1
x.loc[110000:,'partition'] =2
x = x.reset_index(drop=True)

In [16]:
x.to_csv('face_images/custom_partition.csv', index=False)


In [17]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, csv_file_split, csv_attr, transform=None, split=1):
        """
        Args:
            csv_file (str): Path to the CSV file containing image paths, splits, and labels.
            transform (callable, optional): Optional transform to be applied on a sample.
            split (str): Which dataset split to load ('train', 'test', 'val').
        """
        self.data = pd.read_csv(csv_file_split)
        self.data = self.data[self.data['partition'] == split]  # Filter by the desired split
        self.transform = transform
        self.eyebrows_arched = pd.read_csv(csv_attr)
        

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = 'face_images/face_images/img_align_celeba/'+self.data.iloc[idx, 0]
        imgid = self.data.iloc[idx, 0]
        label = self.eyebrows_arched[self.eyebrows_arched['image_id'] == imgid]['Arched_Eyebrows'].values[0]
        image = Image.open(img_path).convert('RGB')  # Load image
        if label == -1:
            label = 0
        if self.transform:
            image = self.transform(image)

        return image, label


# Define out transformer and Dataloaders

In [18]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


In [19]:
Train = CustomDataset(csv_file_split='face_images/custom_partition.csv', csv_attr='face_images/face_image_attr.csv', transform=transform, split=0)
Val = CustomDataset(csv_file_split='face_images/custom_partition.csv', csv_attr='face_images/face_image_attr.csv', transform=transform, split=1)
Test = CustomDataset(csv_file_split='face_images/custom_partition.csv', csv_attr='face_images/face_image_attr.csv', transform=transform, split=2)

In [20]:
Train.data

Unnamed: 0,image_id,partition
0,000001.jpg,0
1,000002.jpg,0
2,000003.jpg,0
3,000004.jpg,0
4,000005.jpg,0
...,...,...
59995,059996.jpg,0
59996,059997.jpg,0
59997,059998.jpg,0
59998,059999.jpg,0


In [21]:
# Create data loaders
test_loader = DataLoader(Test, batch_size=1, shuffle=False)

# Next, load pretrained weights and remove the default fully connected layer and add in the fully connected layer that we require

In [22]:
resnet18 = models.resnet18(pretrained=True)

# Modify the final layer to match binary classification
num_ftrs = resnet18.fc.in_features
resnet18.fc = nn.Sequential(
    nn.Linear(num_ftrs, 1),  # Single output for binary classification
    nn.Sigmoid()  # Sigmoid activation function
)
resnet18 = resnet18.cuda()  # Move model to GPU if available
criterion = nn.BCELoss()  # Binary Cross-Entropy



In [23]:
from tqdm import tqdm
optimizers = {
    'Adam': optim.Adam,
    'SGD' : optim.SGD,
    'AdamW' : optim.AdamW
}

learning_rates = [1e-4, 1e-3, 5e-4]
batch_sizes = [16,32,64]

# This is our training loop

In [24]:
results = dict()

In [25]:
# Training loop (simplified)
for opt in optimizers:
    results[opt] = dict()
    for lr in learning_rates:
        results[opt][lr] = dict()
        for bs in batch_sizes:
            results[opt][lr][bs] = {
                'train_loss': [],  # Store training loss for each epoch
                'val_loss': [],    # Store validation loss for each epoch
                'best_val_loss': float('inf'),  # Track the best validation loss
                'best_weights': None  # Store the best model weights
            }

            num_epochs = 20
            optimizer = optimizers[opt](resnet18.parameters(), lr=lr)

            train_loader = DataLoader(Train, batch_size=bs, shuffle=True)
            val_loader = DataLoader(Val, batch_size=bs, shuffle=False)

            for epoch in range(num_epochs):
                resnet18.train()
                running_loss = 0.0

                # Training phase
                for images, labels in tqdm(train_loader, total=len(train_loader)):
                    images, labels = images.cuda(), labels.float().cuda()

                    optimizer.zero_grad()
                    outputs = resnet18(images)
                    loss = criterion(outputs, labels.unsqueeze(1))  # Ensure correct shape for BCE loss
                    loss.backward()
                    optimizer.step()

                    running_loss += loss.item()

                # Compute and store average training loss for this epoch
                avg_train_loss = running_loss / len(train_loader)
                results[opt][lr][bs]['train_loss'].append(avg_train_loss)

                # Validation phase
                resnet18.eval()
                val_running_loss = 0.0

                with torch.no_grad():  # No gradient computation in evaluation mode
                    for images, labels in tqdm(val_loader, total=len(val_loader), colour='red'):
                        images, labels = images.cuda(), labels.float().cuda()
                        outputs = resnet18(images)
                        val_loss = criterion(outputs, labels.unsqueeze(1))  # Validation loss
                        val_running_loss += val_loss.item()

                # Compute and store average validation loss for this epoch
                avg_val_loss = val_running_loss / len(val_loader)
                results[opt][lr][bs]['val_loss'].append(avg_val_loss)

                # Check if this is the best validation loss so far
                if avg_val_loss < results[opt][lr][bs]['best_val_loss']:
                    results[opt][lr][bs]['best_val_loss'] = avg_val_loss
                    results[opt][lr][bs]['best_weights'] = resnet18.state_dict().copy()  # Store best weights

                print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss}, Val Loss: {avg_val_loss} , lr={lr}, bs={bs}, opt={opt}")

            
            # Optionally save the best model weights to a separate file
            torch.save(results[opt][lr][bs]['best_weights'], 
                       f'./output/best_weights-opt_{opt}-lr_{lr}-bs_{bs}.pt')


 41%|████      | 1533/3750 [03:55<05:41,  6.50it/s]


KeyboardInterrupt: 

In [14]:
torch.save({
    'model_state_dict': resnet18.state_dict(),
    'criterion_state_dict': criterion.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()}, './output/resnet18-checkpoint.pt')

In [None]:
torch.save(resnet18, './output/resnet18-model.pt')

# Used to resume the work by loading a presaved model

In [11]:
model2 = torch.load('./output/resnet18-model.pt', weights_only=False)

In [12]:
model2 = model2.cuda()

# Next, let's get the relevent metrics

In [None]:
# Load the best model
model2.eval()

all_labels = []
all_preds = []
all_probs = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, total=len(test_loader), colour='green'):  # Use test_loader here
        inputs = inputs.cuda()
        labels_np = labels.numpy()
        outputs = model2(inputs)
        preds = 0
        if outputs>0.5:
            preds = [1]
        else:
            preds = [0]
        
        #print(f'Label: {labels_np}, Prediction: {preds}, Probability: {outputs.cpu().numpy()}')
        all_labels.extend(labels_np)

        all_preds.extend(preds)
        all_probs.extend(outputs.cpu().numpy()[0])

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Not Arched', 'Arched'], 
            yticklabels=['Not Arched', 'Arched'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix on Test Set')
plt.show()

# Metrics
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds)
recall = recall_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)
fpr, tpr, thresholds = roc_curve(all_labels, all_probs)
roc_auc = auc(fpr, tpr)

print(f'Test Accuracy: {accuracy:.4f}')
print(f'Test Precision: {precision:.4f}')
print(f'Test Recall: {recall:.4f}')
print(f'Test F1 Score: {f1:.4f}')

# ROC Curve
plt.figure(figsize=(6,4))
plt.plot(fpr, tpr, color='darkorange', lw=2, 
         label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) on Test Set')
plt.legend(loc="lower right")
plt.show()


In [None]:
pr_curve = precision_recall_curve(all_labels, all_probs)
plt.figure(figsize=(6,4))
plt.plot(pr_curve[1], pr_curve[0], color='darkorange', lw=2, 
         label=f'PR curve')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve on Test Set')
plt.legend(loc="lower right")
plt.show()


# Question 2
Capture a photo of yourself, similar to the dataset mentioned above, and *visualize the activation maps* and *filters* at various layers of the model trained in Question 1. Then, conduct an occlusion experiment to examine the impact on the model's predictions. [I hope you won't copy this 😀]








In [335]:
# Load my photo
myphoto = load_image('sirjanh.jpg', transform=transform)

In [None]:
plt.imshow(myphoto[0].permute(1, 2, 0))

In [337]:
myphoto2=myphoto.cuda()
model2 = model2.cuda()

In [None]:
model2.eval()
with torch.no_grad():
    output = model2(myphoto2)
    print(output)
    if output>0.5:
        print('Arched')
    else:
        print('Not Arched')

In [339]:
activations=[]

def hook_fn(module, input, output):
    activations.append(output)

hooks = []
for name, module in model2.named_modules():
    if type(module) == nn.Conv2d:
        hook = module.register_forward_hook(hook_fn)
        hooks.append(hook)

with torch.no_grad():
    _ = model2(myphoto2)

for hk in hooks:
    hk.remove()


In [340]:
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm
import math

def viz_activation_conv(activations, max_columns=8):
    for lay in activations:
        new_acti = torch.squeeze(lay)
        
        if len(new_acti.shape) == 3:
            num_maps, height, width = new_acti.shape
        elif len(new_acti.shape) == 4:
            num_maps, height, width = new_acti.shape[1:]
        else:
            raise ValueError("Unexpected shape for activation layer")
        
        # Calculate adaptive figure size
        aspect_ratio = width / height
        fig_width = min(20, 2 * max_columns)
        fig_height = fig_width / aspect_ratio / (max_columns / math.ceil(num_maps / max_columns))
        
        # Calculate number of rows and columns
        num_columns = min(num_maps, max_columns)
        num_rows = math.ceil(num_maps / num_columns)
        
        print(f"Plotting {num_maps} maps in {num_rows} rows and {num_columns} columns")
        
        fig = plt.figure(figsize=(fig_width, fig_height))
        gs = fig.add_gridspec(num_rows, num_columns)
        
        for i in range(num_maps):
            ax = fig.add_subplot(gs[i // num_columns, i % num_columns])
            if len(new_acti.shape) == 3:
                im = ax.imshow(new_acti[i].cpu().numpy(), cmap='viridis', aspect='auto')
            else:
                im = ax.imshow(new_acti[0, i].cpu().numpy(), cmap='viridis', aspect='auto')
            ax.axis('off')
        
        plt.tight_layout(pad=0.5)
        plt.show()

In [None]:
viz_activation_conv(activations)

In [176]:
import math
def plot_conv_filters(model, figsize=(12, 12), max_filters=64, max_subplots=64):
    conv_layers = [module for module in model.modules() if isinstance(module, torch.nn.Conv2d)]
    
    for idx, conv_layer in enumerate(conv_layers):
        weights = conv_layer.weight.data.cpu().numpy()
        num_filters, num_channels, height, width = weights.shape
        
        # Limit the number of filters to plot
        num_filters_to_plot = num_filters
        
        # Calculate grid size
        grid_size = min(math.ceil(math.sqrt(num_filters_to_plot)), math.ceil(math.sqrt(max_subplots)))
        
        fig, axes = plt.subplots(grid_size, grid_size, figsize=figsize)
        fig.suptitle(f'Filters in Convolutional Layer {idx+1}', fontsize=16)
        
        for i in range(grid_size * grid_size):
            ax = axes[i // grid_size, i % grid_size]
            
            if i < num_filters_to_plot:
                # For filters with multiple input channels, we'll visualize the first channel
                filter_img = weights[i, 0]
                
                im = ax.imshow(filter_img)
                ax.axis('off')
            else:
                ax.axis('off')
        
        plt.tight_layout()
        plt.subplots_adjust(top=0.9)  # Adjust for the suptitle
        plt.show()


In [None]:
plot_conv_filters(model2)

# Occlusion Experiment

In [None]:
model2(myphoto2)

In [366]:
import numpy as np

In [343]:
temp = myphoto2.clone().cpu()
model_req = model2.cpu()

In [None]:
1-model_req(temp)

In [495]:
def occlusion(model, image, label, occ_size = 50, occ_stride = 6, occ_pixel = 0.4):

    #get the width and height of the image
    width, height = image.shape[-2], image.shape[-1]
    print(image.shape)
    #setting the output image width and height
    output_height = int(np.ceil((height)/occ_stride))
    output_width = int(np.ceil((width)/occ_stride))

    #create a white image of sizes we defined
    heatmap = torch.zeros((output_height, output_width))

    #iterate all the pixels in each column
    for h in tqdm(range(0, height, occ_stride), desc='Progress', leave=True, colour='green'):
        for w in tqdm(range(0, width, occ_stride), desc='Progress', leave=True, colour='red'):

            # YOUR CODE HERE
            # SLIDE ACROSS INPUT
            w_start, w_end = w, min(width, w+occ_size)
            h_start, h_end = h, min(height, h+int(occ_size//8))

            input_image = image.clone().detach()

            #replacing all the pixel information in the image with occ_pixel(grey) in the specified location
            input_image[:, :, w_start:w_end, h_start:h_end] = occ_pixel

            #run inference on modified image
            output = model(input_image)
            prob = (1-label)-output.tolist()[0][0]

            #setting the heatmap location to probability value
            heatmap[h//occ_stride, w//occ_stride] = prob 

    return heatmap

In [496]:
temp = model2.cuda()
pic_req = myphoto2.cuda()

In [None]:
heatmap = occlusion(temp, pic_req, 0)

In [None]:
imgplot = sns.heatmap(heatmap, xticklabels=False, yticklabels=False)
figure = imgplot.get_figure() 

In [502]:
def overlay_heatmap_on_image(image, heatmap, alpha=0.5, colormap='hot'):
    """
    Overlays the heatmap on the original image.
    
    Args:
    - image (torch.Tensor): The original image tensor in shape (1, C, H, W)
    - heatmap (torch.Tensor): The heatmap generated from the occlusion experiment
    - alpha (float): The transparency level for the heatmap overlay
    - colormap (str): The colormap to apply to the heatmap
    
    Returns:
    - None (Displays the image with the heatmap overlay)
    """
    # Convert the image and heatmap to NumPy arrays for plotting
    image_np = image.squeeze().permute(1, 2, 0).cpu().numpy()  # Convert from CHW to HWC format
    image_np = np.clip(image_np, 0, 1)  # Ensure image is in valid range (0, 1)

    # Normalize the heatmap to (0, 1)
    heatmap_np = heatmap.cpu().numpy()
    heatmap_np = (heatmap_np - heatmap_np.min()) / (heatmap_np.max() - heatmap_np.min())  # Normalize heatmap

    # Resize heatmap to match the image size (using bilinear interpolation)
    heatmap_resized = torch.nn.functional.interpolate(
        torch.tensor(heatmap_np).unsqueeze(0).unsqueeze(0),
        size=(image_np.shape[0], image_np.shape[1]),
        mode='bilinear'
    ).squeeze().numpy()

    # Apply a colormap to the heatmap
    heatmap_colored = cm.get_cmap(colormap)(heatmap_resized)
    heatmap_colored = np.delete(heatmap_colored, 3, 2)  # Remove the alpha channel from the colormap

    # Blend the original image and the heatmap
    blended_image = (1 - alpha) * image_np + alpha * heatmap_colored

    # Plot the original image, heatmap, and blended result
    plt.figure(figsize=(12, 6))

    # Display the original image with a grid
    ax = plt.subplot(1, 3, 1)
    ax.imshow(image_np)
    ax.set_title('Original Image with Grid')


    plt.subplot(1, 3, 2)
    plt.imshow(heatmap_resized, cmap=colormap)
    plt.title('Occlusion Heatmap')

    plt.subplot(1, 3, 3)
    plt.imshow(blended_image)
    plt.title('Image with Heatmap Overlay')

    plt.show()

In [503]:
from matplotlib import cm


In [None]:
heatmap.shape

In [None]:
overlay_heatmap_on_image(pic_req, heatmap)