# Import libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torchvision.models import vgg16, VGG16_Weights
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
from PIL import Image
import os
import numpy as np

# Database creations using pytorch Dataset 

In [None]:
class ImageQualityDataset(Dataset):
    """Dataset for image quality assessment."""

    def __init__(self, csv_file, transform=None):
        """
        Args:
            csv_file (string): Path to the CSV file with annotations.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        """Returns the number of samples in the dataset."""
        return len(self.data)

    def __getitem__(self, idx):
        """
        Retrieves an image and its labels by index.

        Args:
            idx (int): Index of the sample to retrieve.

        Returns:
            tuple: A tuple (image, labels) where:
                image (PIL.Image): The image.
                labels (torch.Tensor): Tensor containing quality and authenticity scores.
        """
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(os.getcwd(), self.data.iloc[idx, 3])  # image_path column
        image = Image.open(img_name).convert('RGB')
        quality = self.data.iloc[idx, 0]  # Quality column
        authenticity = self.data.iloc[idx, 1]  # Authenticity column
        labels = torch.tensor([quality, authenticity], dtype=torch.float)


        if self.transform:
            image = self.transform(image)

        return image, labels


# Definitions of the models

In [None]:
class VGG16(nn.Module):
    """VGG16 model for image quality assessment."""

    def __init__(self, num_outputs=2):
        """
        Initializes the VGG16 model.

        Args:
            num_outputs (int): Number of output features. Defaults to 2 (quality and authenticity).
        """
        super(VGG16, self).__init__()
        # Load pre-trained VGG16 model
        self.vgg16 = models.vgg16(weights=VGG16_Weights.DEFAULT)

        # Freeze all layers
        for param in self.vgg16.parameters():
            param.requires_grad = False

        # Modify the classifier
        num_features = self.vgg16.classifier[6].in_features
        self.vgg16.classifier = nn.Sequential(
            *list(self.vgg16.classifier.children())[:-1],  # Remove last layer with 1000 outputs
            nn.Linear(num_features, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, num_outputs)  # Add new layer with num_out outputs
        )

    def forward(self, x):
        """
        Forward pass of the model.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor.
        """
        return self.vgg16(x)
    
class QualityPredictor(nn.Module):
    def __init__(self, freeze_backbone=True):
        super().__init__()
        # Load pre-trained VGG16
        vgg = vgg16(weights=VGG16_Weights.DEFAULT)
        
        # Freeze backbone if requested
        if freeze_backbone:
            for param in vgg.features.parameters():
                param.requires_grad = False
                
        # Extract features up to fc2
        self.features = vgg.features
        self.avgpool = vgg.avgpool
        self.fc1 = vgg.classifier[:-1]  # Up to fc2 (4096 -> 128)
        
        # New regression head
        self.regression_head = nn.Sequential(
            nn.Linear(4096, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 2)  # Predict quality and realness
        )
        
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        features = self.fc1(x)
        predictions = self.regression_head(features)
        return predictions, features

# Utility funcitons for training and evaluation

In [None]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=10, device='cuda'):
    """
    Trains the model.

    Args:
        model (nn.Module): The model to train.
        dataloaders (dict): A dictionary containing the training and validation data loaders.
        criterion (nn.Module): The loss function.
        optimizer (optim.Optimizer): The optimizer.
        num_epochs (int): Number of epochs to train for. Defaults to 10.
        device (str): Device to use for training ('cuda' or 'cpu'). Defaults to 'cuda'.

    Returns:
        nn.Module: The trained model.
    """
    model.to(device)
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:  # Iterate over training and validation phases
            print(f'{phase} phase')
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0

            for inputs, labels in dataloaders[phase]:  # Iterate over data in the current phase
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):  # Enable gradients only during training
                    outputs, _ = model(inputs)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f}') # Print loss for the current phase

    print("Finished Training")
    return model

def test_model(model, dataloader, criterion, device='cuda'):

    """
    Tests the model on the test dataset.

    Args:
        model (nn.Module): The trained model.
        dataloader (DataLoader): The test data loader.
        criterion (nn.Module): The loss function.
        device (str): Device to use for testing ('cuda' or 'cpu'). Defaults to 'cuda'.

    Returns:
        float: The average loss on the test dataset.
    """
    model.eval()  # Set the model to evaluation mode
    model.to(device)
    running_loss = 0.0

    with torch.no_grad():  # Disable gradient calculation
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs, _ = model(inputs)
            
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)

    test_loss = running_loss / len(dataloader.dataset)
    print(f'Test Loss: {test_loss:.4f}')
    return test_loss

def get_predictions(model, dataloader, device)-> tuple[torch.Tensor, torch.Tensor]:
    """
    Get predictions from the model.

    Args:
        model (nn.Module): The trained model.
        dataloader (DataLoader): The data loader.

    Returns:
        tuple: A tuple (predictions, labels) where:
            predictions (torch.Tensor): Predictions from the model.
            labels (torch.Tensor): Ground truth labels.
    """
    model.eval()  # Set the model to evaluation mode
    model.to(device)
    predictions = []
    labels = []

    with torch.no_grad():  # Disable gradient calculation
        for inputs, target in dataloader:
            outputs, _ = model(inputs.to(device))
            predictions.append(outputs)
            labels.append(target)

    #move to cpu and concatenate
    predictions = torch.cat(predictions).cpu()
    labels = torch.cat(labels).cpu()

    return predictions, labels

def get_regression_errors(tuple: tuple[torch.Tensor, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]:
    """
    Get regression errors.

    Args:
        tuple: A tuple (predictions, labels) where:
            predictions (torch.Tensor): Predictions from the model.
            labels (torch.Tensor): Ground truth labels.

    Returns:
        tuple: A tuple (quality_errors, authenticity_errors) where:
            quality_errors (torch.Tensor): Quality errors.
            authenticity_errors (torch.Tensor): Authenticity errors.
    """
    predictions, labels = tuple
    quality_errors = predictions[:, 0] - labels[:, 0]
    authenticity_errors = predictions[:, 1] - labels[:, 1]
    return quality_errors, authenticity_errors

def get_rmse(errors: torch.Tensor) -> torch.Tensor:
    """
    Get the root mean squared error.

    Args:
        errors (torch.Tensor): Errors.

    Returns:
        torch.Tensor: Root mean squared error.
    """
    return torch.sqrt(torch.mean(errors ** 2))


## Training section

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data transformations for the ImageNet dataset
data_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

annotations_file = 'Dataset/AIGCIQA2023/mos_data.csv'

# Create the dataset
dataset = ImageQualityDataset(csv_file=annotations_file, transform=data_transforms)

# Split the dataset into training, validation, and test sets
train_size = int(0.7 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])


# Create data loaders
BATCH_SIZE = 64
EPOCHS = 20
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# Create a dictionary containing the data loaders
dataloaders = {
    'train': train_dataloader,
    'val': val_dataloader,
    'test': test_dataloader
}

model = QualityPredictor()
criterion = nn.MSELoss()  # Mean Squared Error Loss (regression)
optimizer = optim.Adam(model.regression_head.parameters(), lr=0.001)

model_path = 'Models/VGG-16_finetuned_regression.pth'

quality_predictor_trained= train_model(model, dataloaders, criterion, optimizer, EPOCHS, device)

# Save the trained model
torch.save(quality_predictor_trained.state_dict(), model_path)

# Load the trained model
quality_predictor_trained = QualityPredictor()
quality_predictor_trained.load_state_dict(torch.load(model_path))


## Utilities for pruning 

In [None]:
def compute_feature_map_importance(model, dataloader, device, layer_name) -> tuple[np.ndarray, np.ndarray]:
    """Computes the importance of each feature map in a convolution
    layer by measuring the change in predictions when the feature map is zero
    out.
    
    Returns:
        tuple: (indices, importance_scores) where both are numpy arrays
    """
    #if importance_scores.npy exists, load it
    if os.path.exists('importance_scores.npy'):
        return np.load('importance_scores.npy')
    
    model.eval()
    model.to(device)
    importance_scores = []
    dict_modules = dict(model.named_modules())
    layer = dict_modules[layer_name]
    baseline_predictions = get_predictions(model, dataloader, device)
    regression_errors = get_regression_errors(baseline_predictions)
    quality_errors, authenticity_errors = regression_errors
    baseline_quality_rmse = get_rmse(quality_errors)
    baseline_authenticity_rmse = get_rmse(authenticity_errors)
    average_baseline_rmse = (baseline_quality_rmse + baseline_authenticity_rmse) / 2

    
    print(f'Average baseline RMSE: {average_baseline_rmse:.4f}')

    with torch.no_grad():
        for i in range(layer.out_channels):
            # Create a backup of the weights and bias
            backup_weights = layer.weight[i, ...].clone()
            backup_bias = layer.bias[i].clone() if layer.bias is not None else None

            # Zero out the i-th output channel
            layer.weight[i, ...] = 0
            if layer.bias is not None:
                layer.bias[i] = 0

            # Get predictions with the pruned feature map
            pruned_predictions = get_predictions(model, dataloader, device)
            pruned_regression_errors = get_regression_errors(pruned_predictions)
            pruned_quality_errors, pruned_authenticity_errors = pruned_regression_errors
            pruned_quality_rmse = get_rmse(pruned_quality_errors)
            pruned_authenticity_rmse = get_rmse(pruned_authenticity_errors)
            average_pruned_rmse = (pruned_quality_rmse + pruned_authenticity_rmse) / 2
    
            # Compute importance score
            importance_score = average_baseline_rmse - average_pruned_rmse
            importance_scores.append([i, importance_score])
            

            print(f'Feature map {i}: Importance score: {importance_score:.4f}')
            
            # After computing importance, restore weights and bias
            layer.weight[i, ...] = backup_weights
            if layer.bias is not None:
                layer.bias[i] = backup_bias 

    sorted_importance_scores = sorted(importance_scores, key=lambda x: x[1], reverse=True)
    # save np array 
    np.save('importance_scores.npy', sorted_importance_scores)
    return np.array(sorted_importance_scores)

def find_optimal_feature_subset(model, dataloader, device, layer_name, sorted_importance_scores, model_path='Models/pruned_model.pth'):
    """
    Find an optimal subset of feature maps by iteratively adding features in order of importance
    and tracking model performance, keeping the subset that maximizes performance.
    
    Args:
        model: The neural network model
        dataloader: DataLoader for evaluation
        device: Device to run the model on (cuda/cpu)
        layer_name: Name of the layer to optimize
        sorted_importance_scores: List of tuples (channel_index, importance_score) sorted by importance
        
    Returns:
        Dictionary with optimal subset and performance metrics
    """
    model.eval()
    model.to(device)
    
    # Reverse the sorted importance scores
    sorted_importance_scores = sorted_importance_scores[::-1]
    
    # Get the target layer
    dict_modules = dict(model.named_modules())
    layer = dict_modules[layer_name]
    
    # Create a backup of the original weights and bias
    original_weights = layer.weight.clone()
    original_bias = layer.bias.clone() if layer.bias is not None else None
    
    # Initialize tracking variables
    best_rmse = float('inf')
    best_subset = []
    rmse_history = []
    current_subset = []
    
    # Get baseline with no features (all zeroed out)
    layer.weight.data.fill_(0)
    if layer.bias is not None:
        layer.bias.data.fill_(0)
        
    baseline_predictions = get_predictions(model, dataloader, device)
    baseline_regression_errors = get_regression_errors(baseline_predictions)
    baseline_quality_errors, baseline_authenticity_errors = baseline_regression_errors
    baseline_quality_rmse = get_rmse(baseline_quality_errors)
    baseline_authenticity_rmse = get_rmse(baseline_authenticity_errors)
    baseline_rmse = (baseline_quality_rmse + baseline_authenticity_rmse) / 2
    
    print(f"Baseline RMSE (no features): {baseline_rmse:.4f}")
    print("------------------")
    
    # Track performance with empty set
    rmse_history.append(([], baseline_rmse))
    
    # Iteratively add feature maps in order of importance
    for idx, (channel_idx, _) in enumerate(sorted_importance_scores):
        channel_idx = int(channel_idx)
        
        # Add this feature map to the current subset
        current_subset.append(channel_idx)
        
        # Reset all weights to zero first
        layer.weight.data.fill_(0)
        if layer.bias is not None:
            layer.bias.data.fill_(0)
        
        # Enable only the feature maps in the current subset
        for ch_idx in current_subset:
            layer.weight[ch_idx, ...] = original_weights[ch_idx, ...]
            if layer.bias is not None:
                layer.bias[ch_idx] = original_bias[ch_idx]
        
        # Evaluate model with current subset
        predictions = get_predictions(model, dataloader, device)
        regression_errors = get_regression_errors(predictions)
        quality_errors, authenticity_errors = regression_errors
        quality_rmse = get_rmse(quality_errors)
        authenticity_rmse = get_rmse(authenticity_errors)
        current_rmse = (quality_rmse + authenticity_rmse) / 2
        
        # Record performance
        rmse_history.append((current_subset.copy(), current_rmse))
        
        print(f"Iteration {idx+1}/{len(sorted_importance_scores)}: " +
              f"Added channel {channel_idx}, " +
              f"Subset size: {len(current_subset)}, " +
              f"RMSE: {current_rmse:.4f}")
        
        # Update best subset if this one is better
        if current_rmse < best_rmse:
            best_rmse = current_rmse
            best_subset = current_subset.copy()
            print(f"  ✓ New best subset found! RMSE: {best_rmse:.4f}")
    
    print("\n------------------")
    print(f"Best RMSE: {best_rmse:.4f} with {len(best_subset)} features")
    print(f"Improvement over baseline: {baseline_rmse - best_rmse:.4f}")
    print(f"Feature reduction: {(1 - len(best_subset)/len(sorted_importance_scores))*100:.1f}%")
    
    # Apply the best subset to the model
    layer.weight.data.fill_(0)
    if layer.bias is not None:
        layer.bias.data.fill_(0)
        
    for ch_idx in best_subset:
        layer.weight[ch_idx, ...] = original_weights[ch_idx, ...]
        if layer.bias is not None:
            layer.bias[ch_idx] = original_bias[ch_idx]
    
    # Save the pruned model
    torch.save(model.state_dict(), model_path)
    
    # Restore original weights for future use
    layer.weight.data.copy_(original_weights)
    if layer.bias is not None:
        layer.bias.data.copy_(original_bias)
    
    return {
        'best_subset': best_subset,
        'best_rmse': best_rmse,
        'baseline_rmse': baseline_rmse,
        'improvement': baseline_rmse - best_rmse,
        'reduction_percentage': (1 - len(best_subset)/len(sorted_importance_scores))*100,
        'rmse_history': rmse_history
    }

def remove_noisy_feature_maps(model, dataloader, device, layer_name, sorted_importance_scores, model_path='Models/pruned_model.pth'):
    """
    Remove noisy feature maps from a convolutional layer based on importance scores.
    Feature maps are zeroed out one by one and kept zeroed only if model performance improves.
    
    Args:
        model: The neural network model
        dataloader: DataLoader for evaluation
        device: Device to run the model on (cuda/cpu)
        layer_name: Name of the layer to optimize
        sorted_importance_scores: List of tuples (channel_index, importance_score) sorted by importance
        
    Returns:
        Dictionary with pruning results and performance metrics
    """
    model.eval()
    model.to(device)
    
    # Get the target layer
    dict_modules = dict(model.named_modules())
    layer = dict_modules[layer_name]
    
    # Create a backup of the original weights and bias
    original_weights = layer.weight.clone()
    original_bias = layer.bias.clone() if layer.bias is not None else None
    
    # Initialize tracking variables
    removed_features = []
    rmse_history = []
    
    # Get baseline performance
    baseline_predictions = get_predictions(model, dataloader, device)
    baseline_regression_errors = get_regression_errors(baseline_predictions)
    baseline_quality_errors, baseline_authenticity_errors = baseline_regression_errors
    baseline_quality_rmse = get_rmse(baseline_quality_errors)
    baseline_authenticity_rmse = get_rmse(baseline_authenticity_errors)
    average_baseline_rmse = (baseline_quality_rmse + baseline_authenticity_rmse) / 2
    
    print(f"Baseline RMSE: {average_baseline_rmse:.4f}")
    print("------------------")
    
    # Track initial performance
    rmse_history.append(([], average_baseline_rmse))
    baseline_rmse = average_baseline_rmse
    
    # Iterate over the sorted indices and if removing a feature map improves performance, keep it removed
    for idx, (channel_idx, importance_score) in enumerate(sorted_importance_scores):
        channel_idx = int(channel_idx)
        
        # Temporarily zero out this feature map
        layer.weight[channel_idx, ...] = 0
        if layer.bias is not None:
            layer.bias[channel_idx] = 0
        
        # Evaluate model with feature map removed
        predictions = get_predictions(model, dataloader, device)
        regression_errors = get_regression_errors(predictions)
        quality_errors, authenticity_errors = regression_errors
        quality_rmse = get_rmse(quality_errors)
        authenticity_rmse = get_rmse(authenticity_errors)
        average_new_rmse = (quality_rmse + authenticity_rmse) / 2
        
        print(f"Iteration {idx+1}/{len(sorted_importance_scores)}: " +
              f"Testing removal of channel {channel_idx}, " +
              f"Importance: {importance_score:.4f}, " +
              f"RMSE: {average_new_rmse:.4f}")
        
        # Decide whether to keep this feature map removed
        if average_new_rmse < baseline_rmse:
            baseline_rmse = average_new_rmse if average_new_rmse < average_baseline_rmse else average_baseline_rmse
            removed_features.append(channel_idx)
            rmse_history.append((removed_features.copy(), baseline_rmse))
            print(f"  ✓ IMPROVING: Zeroing out feature map {channel_idx}")
        else:
            # Restore the feature map
            layer.weight[channel_idx, ...] = original_weights[channel_idx, ...]
            if layer.bias is not None:
                layer.bias[channel_idx] = original_bias[channel_idx]
            print(f"  ✗ NOT IMPROVING: Keeping feature map {channel_idx}")
        
        print(f"  Current best RMSE: {baseline_rmse:.4f}")
        print("------------------")
    
    # Final statistics
    print("\n------------------")
    print(f"Final RMSE: {baseline_rmse:.4f} after removing {len(removed_features)} feature maps")
    print(f"Improvement over baseline: {average_baseline_rmse - baseline_rmse:.4f}")
    print(f"Feature reduction: {(len(removed_features)/len(sorted_importance_scores))*100:.1f}%")
    
    # Save the pruned model
    torch.save(model.state_dict(), model_path)
    
    return {
        'removed_features': removed_features,
        'baseline_rmse': average_baseline_rmse,
        'final_rmse': baseline_rmse,
        'improvement': average_baseline_rmse - baseline_rmse,
        'reduction_percentage': (len(removed_features)/len(sorted_importance_scores))*100,
        'rmse_history': rmse_history
    }

def remove_negative_impact_feature_maps(model, dataloader, device, layer_name, sorted_importance_scores, model_path='Models/negative_impact_pruned_model.pth'):
    """
    Remove feature maps that have a negative impact on model performance based on importance scores (impotance score < 0).
    
    Args:
        model: The neural network model
        dataloader: DataLoader for evaluation
        device: Device to run the model on (cuda/cpu)
        layer_name: Name of the layer to optimize
        sorted_importance_scores: List of tuples (channel_index, importance_score) sorted by importance
        
    Returns:
        Dictionary with pruning results and performance metrics
    """
    model.eval()
    model.to(device)
    
    # Get the target layer
    dict_modules = dict(model.named_modules())
    layer = dict_modules[layer_name]
    
    # Create a backup of the original weights and bias
    original_weights = layer.weight.clone()
    original_bias = layer.bias.clone() if layer.bias is not None else None
    
    # Get baseline performance
    predictions = get_predictions(model, dataloader, device)
    regression_errors = get_regression_errors(predictions)
    quality_errors, authenticity_errors = regression_errors
    quality_rmse = get_rmse(quality_errors)
    authenticity_rmse = get_rmse(authenticity_errors)
    baseline_rmse = (quality_rmse + authenticity_rmse) / 2

    # Initialize tracking variables
    removed_features = []
    
    # Iterate over the sorted indices and zero out all the feature maps that have a negative impact (importance < 0)

    for idx, (channel_idx, importance_score) in enumerate(sorted_importance_scores):
        print(f"Iteration {idx} - Channel {channel_idx}: Importance score: {importance_score:.4f}")
        if importance_score > 0:
            channel_idx = int(channel_idx)
            layer.weight[channel_idx, ...] = 0
            if layer.bias is not None:
                layer.bias[channel_idx] = 0
            removed_features.append(channel_idx)

    # Evaluate model with feature maps removed
    new_predictions = get_predictions(model, dataloader, device)
    new_regression_errors = get_regression_errors(new_predictions)
    new_quality_errors, new_authenticity_errors = new_regression_errors
    new_quality_rmse = get_rmse(new_quality_errors)
    new_authenticity_rmse = get_rmse(new_authenticity_errors)
    new_rmse = (new_quality_rmse + new_authenticity_rmse) / 2

    # Save the pruned model
    torch.save(model.state_dict(), model_path)

    # Restore original weights for future use
    layer.weight.data.copy_(original_weights)
    if layer.bias is not None:
        layer.bias.data.copy_(original_bias)

    
    return {
        'removed_features': removed_features,
        'baseline_rmse': baseline_rmse,
        'final_rmse': new_rmse,
        'improvement': baseline_rmse - new_rmse,
        'reduction_percentage': (len(removed_features)/len(sorted_importance_scores))*100
    }

def remove_channels(model,device,layer_name,channels_indexes)->QualityPredictor:
    """
    Remove channels, using an index list, from a convolutional layer in a model.
    
    Args:
        model: The neural network model
        device: Device to run the model on (cuda/cpu)
        layer_name: Name of the layer to optimize
        channels_indexes: List of channel indexes to remove
        
    Returns:
        The pruned model
    """
    model.eval()
    model.to(device)
    
    # Get the target layer
    dict_modules = dict(model.named_modules())
    layer = dict_modules[layer_name]
    
    # Create a backup of the original weights and bias
    original_weights = layer.weight.clone()
    original_bias = layer.bias.clone() if layer.bias is not None else None
    
    # Zero out the specified channels
    for channel_idx in channels_indexes:
        layer.weight[channel_idx, ...] = 0
        if layer.bias is not None:
            layer.bias[channel_idx] = 0
    
    return model

# Creation of diffrent models using different pruning techniques

- Deletion of models is due to make sure that im not using the same model again and again (first draft, not sure if im correctlly restoring weights in each pruning technique)

In [None]:
# LAYER to prune
LAYER = 'features.28'
DEVICE = 'cuda'

# Base model for importance score computation
base_model = QualityPredictor()
base_model.load_state_dict(torch.load('Models/VGG-16_finetuned_regression.pth'))
base_model.eval()
base_model.to(DEVICE)

sorted_importance_scores = compute_feature_map_importance(base_model, train_dataloader, DEVICE, LAYER)
del base_model

# Model for noisy feature maps removal
noisy_pruning_model = QualityPredictor()
noisy_pruning_model.load_state_dict(torch.load('Models/VGG-16_finetuned_regression.pth'))
noisy_pruning_model.eval()
noisy_pruning_model.to(DEVICE)

noisy_optimal_subset = remove_noisy_feature_maps(noisy_pruning_model, train_dataloader, DEVICE, LAYER, sorted_importance_scores, model_path='Models/noise_out_pruned_model.pth')

del noisy_pruning_model

# Model for optimal subset selection
optimal_subset_model = QualityPredictor()
optimal_subset_model.load_state_dict(torch.load('Models/VGG-16_finetuned_regression.pth'))
optimal_subset_model.eval()
optimal_subset_model.to(DEVICE)

# Find the optimal subset of feature maps
optimal_subset = find_optimal_feature_subset(optimal_subset_model, train_dataloader, DEVICE, LAYER, sorted_importance_scores, model_path='Models/optimal_set_pruned_model.pth')

# Model for negative impact feature maps removal
negative_impact_model = QualityPredictor()
negative_impact_model.load_state_dict(torch.load('Models/VGG-16_finetuned_regression.pth'))
negative_impact_model.eval()
negative_impact_model.to(DEVICE)

negative_impact_subset = remove_negative_impact_feature_maps(negative_impact_model, train_dataloader, DEVICE, LAYER, sorted_importance_scores, model_path='Models/negative_impact_pruned_model.pth')

del negative_impact_model


# Testing with already saved weights

In [None]:
NOISY_PRUNED_MODEL_PATH = 'Models/noise_out_pruned_model.pth'
BEST_SUBSET_PRUNED_MODEL_PATH = 'Models/optimal_set_pruned_model.pth'
NEGATIVE_IMPACT_PRUNED_MODEL_PATH = 'Models/negative_impact_pruned_model.pth'

noisy_pruned_model = QualityPredictor()
noisy_pruned_model.load_state_dict(torch.load(NOISY_PRUNED_MODEL_PATH, weights_only=True))

best_subset_pruned_model = QualityPredictor()
best_subset_pruned_model.load_state_dict(torch.load(BEST_SUBSET_PRUNED_MODEL_PATH, weights_only=True))

negative_impact_pruned_model = QualityPredictor()
negative_impact_pruned_model.load_state_dict(torch.load(NEGATIVE_IMPACT_PRUNED_MODEL_PATH,weights_only=True))

baseline_model = QualityPredictor()
baseline_model.load_state_dict(torch.load('Models/VGG-16_finetuned_regression.pth',weights_only=True))

# Testing

# Test the baseline model
print("Testing the baseline model")
test_model(baseline_model, test_dataloader, criterion, device)
print("------------------")
# test the noisy pruned model
print("Testing the noisy pruned model")
test_model(noisy_pruned_model, test_dataloader, criterion, device)
print("------------------")
# test the best subset pruned model
print("Testing the best subset pruned model")
test_model(best_subset_pruned_model, test_dataloader, criterion, device)
print("------------------")
# test the negative impact pruned model
print("Testing the negative impact pruned model")
test_model(negative_impact_pruned_model, test_dataloader, criterion, device)
print("------------------")


# Final Aanalysis - Comparing the models zeroed out weights & Correlations between the models predicitons and ground truth

## Zero-out weights analysis

In [None]:
# Create a function that extract the indices of the zeroed out feature maps in a convolutional layer

def get_zeroed_feature_maps(model, layer_name):
    """
    Get the indices of the zeroed out feature maps in a convolutional layer.

    Args:
        model (nn.Module): The neural network model.
        layer_name (str): The name of the convolutional layer.

    Returns:
        list: The indices of the zeroed out feature maps.
    """
    dict_modules = dict(model.named_modules())
    layer = dict_modules[layer_name]
    zeroed_feature_maps = []

    for i, weight in enumerate(layer.weight):
        if torch.all(weight == 0):
            zeroed_feature_maps.append(i)
    zeroed_feature_maps.sort()

    num_zeroed = len(zeroed_feature_maps)

    return zeroed_feature_maps, num_zeroed

# Get the zeroed out feature maps in the 'features.28' layer of the noisy pruned model
_, noisy_num_zeroed = get_zeroed_feature_maps(noisy_pruned_model, 'features.28')

# Get the zeroed out feature maps in the 'features.28' layer of the best subset pruned model
_, best_subset_num_zeroed = get_zeroed_feature_maps(best_subset_pruned_model, 'features.28')

# Get the zeroed out feature maps in the 'features.28' layer of the negative impact pruned model
_, negative_impact_num_zeroed = get_zeroed_feature_maps(negative_impact_pruned_model, 'features.28')

print(f"Noisy pruned model: {noisy_num_zeroed} zeroed out feature maps")
print(f"Best subset pruned model: {best_subset_num_zeroed} zeroed out feature maps")
print(f"Negative impact pruned model: {negative_impact_num_zeroed} zeroed out feature maps")


## Correlation analysis

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
from sklearn.metrics import r2_score
import matplotlib.gridspec as gridspec

def plot_correlations(model, dataloader, device, save_path=None, combination_method='average', title=""):
    """
    Computes and plots correlation with multiple ways of combining quality and authenticity metrics.
    
    Args:
        model (nn.Module): The trained model to evaluate
        dataloader (DataLoader): Test dataloader containing images and true scores
        device (str): Device to run the model on ('cuda' or 'cpu')
        save_path (str, optional): Path to save the plot. If None, plot is displayed instead.
        combination_method (str): Method to combine scores ('average', 'weighted', 'euclidean', or 'all')
        
    Returns:
        dict: Dictionary containing correlation metrics
    """
    # Set model to evaluation mode
    model.eval()
    model.to(device)
    
    # Collect predictions and ground truth
    pred_list = []
    true_list = []
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            outputs, _ = model(inputs)
            pred_list.append(outputs.cpu())
            true_list.append(labels)
    
    # Concatenate batches
    predictions = torch.cat(pred_list, dim=0).numpy()
    ground_truth = torch.cat(true_list, dim=0).numpy()
    
    # Extract quality and authenticity scores
    pred_quality = predictions[:, 0]
    true_quality = ground_truth[:, 0]
    
    pred_authenticity = predictions[:, 1]
    true_authenticity = ground_truth[:, 1]
    
    # Calculate correlation metrics for individual scores
    quality_spearman, q_pvalue = spearmanr(pred_quality, true_quality)
    auth_spearman, a_pvalue = spearmanr(pred_authenticity, true_authenticity)
    
    quality_r2 = r2_score(true_quality, pred_quality)
    auth_r2 = r2_score(true_authenticity, pred_authenticity)
    
    # Different combination methods
    combinations = {}
    
    # Method 1: Simple average (mean)
    true_avg = (true_quality + true_authenticity) / 2
    pred_avg = (pred_quality + pred_authenticity) / 2
    avg_spearman, avg_pvalue = spearmanr(true_avg, pred_avg)
    avg_r2 = r2_score(true_avg, pred_avg)
    combinations['average'] = {
        'true': true_avg,
        'pred': pred_avg,
        'spearman': avg_spearman,
        'p_value': avg_pvalue,
        'r2': avg_r2,
        'name': 'Average Score',
        'description': 'Simple average of quality and authenticity',
        'color': 'purple'
    }
    
    # Method 2: Weighted average (assume quality is twice as important)
    true_weighted = (2*true_quality + true_authenticity) / 3
    pred_weighted = (2*pred_quality + pred_authenticity) / 3
    weighted_spearman, weighted_pvalue = spearmanr(true_weighted, pred_weighted)
    weighted_r2 = r2_score(true_weighted, pred_weighted)
    combinations['weighted'] = {
        'true': true_weighted,
        'pred': pred_weighted,
        'spearman': weighted_spearman,
        'p_value': weighted_pvalue,
        'r2': weighted_r2,
        'name': 'Weighted Score',
        'description': 'Weighted average (2:1 quality:authenticity)',
        'color': 'green'
    }
    
    # Method 3: Euclidean distance in 2D space
    # Normalize the values to [0,1] first to ensure equal weighting
    true_quality_norm = (true_quality - true_quality.min()) / (true_quality.max() - true_quality.min())
    true_auth_norm = (true_authenticity - true_authenticity.min()) / (true_authenticity.max() - true_authenticity.min())
    pred_quality_norm = (pred_quality - pred_quality.min()) / (pred_quality.max() - pred_quality.min())
    pred_auth_norm = (pred_authenticity - pred_authenticity.min()) / (pred_authenticity.max() - pred_authenticity.min())
    
    # Calculate 2D Euclidean distance
    true_euclidean = np.sqrt(true_quality_norm**2 + true_auth_norm**2)
    pred_euclidean = np.sqrt(pred_quality_norm**2 + pred_auth_norm**2)
    euclidean_spearman, euclidean_pvalue = spearmanr(true_euclidean, pred_euclidean)
    euclidean_r2 = r2_score(true_euclidean, pred_euclidean)
    combinations['euclidean'] = {
        'true': true_euclidean,
        'pred': pred_euclidean,
        'spearman': euclidean_spearman,
        'p_value': euclidean_pvalue,
        'r2': euclidean_r2,
        'name': 'Euclidean Combined Score',
        'description': 'Euclidean distance in normalized 2D space',
        'color': 'orange'
    }
    
    # Decide which combination to use for plotting
    if combination_method == 'all':
        # Create a 3x2 grid (3 rows, 2 columns)
        fig = plt.figure(figsize=(14, 16))
        fig.suptitle(title)
        gs = gridspec.GridSpec(3, 2, height_ratios=[1, 1, 1])
        
        # Individual scores (top row)
        ax1 = plt.subplot(gs[0, 0])  # Quality
        ax2 = plt.subplot(gs[0, 1])  # Authenticity
        
        # Combined scores (middle and bottom rows)
        ax3 = plt.subplot(gs[1, :])  # Average (full width)
        ax4 = plt.subplot(gs[2, 0])  # Weighted
        ax5 = plt.subplot(gs[2, 1])  # Euclidean
        
        axes = [ax1, ax2, ax3, ax4, ax5]
    else:
        # Use a 1x2 grid for individual scores and 1 row for the selected combined method
        fig = plt.figure(figsize=(14, 12))
        fig.suptitle(title)
        gs = gridspec.GridSpec(2, 4, height_ratios=[1, 1])
        
        ax1 = plt.subplot(gs[0, 0:2])  # Quality
        ax2 = plt.subplot(gs[0,2:4])  # Authenticity
        ax3 = plt.subplot(gs[1, 1:3])  # Combined (full width)
        
        axes = [ax1, ax2, ax3]
    
    # Plot Quality score correlation
    ax1.scatter(true_quality, pred_quality, alpha=0.7, color='blue')
    
    # Add identity line
    min_val = min(min(true_quality), min(pred_quality))
    max_val = max(max(true_quality), max(pred_quality))
    ax1.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect prediction')
    
    # Add regression line
    z = np.polyfit(true_quality, pred_quality, 1)
    p = np.poly1d(z)
    x_sorted = np.sort(true_quality)
    ax1.plot(x_sorted, p(x_sorted), 'g-', label=f'Best fit (y = {z[0]:.3f}x + {z[1]:.3f})')
    
    ax1.set_xlabel('True Quality Score')
    ax1.set_ylabel('Predicted Quality Score')
    ax1.set_title(f'Quality Score Correlation\nSpearman ρ = {quality_spearman:.4f}, R² = {quality_r2:.4f}')
    ax1.grid(alpha=0.3)
    ax1.legend()
    
    # Plot Authenticity score correlation
    ax2.scatter(true_authenticity, pred_authenticity, alpha=0.7, color='blue')
    
    # Add identity line
    min_val = min(min(true_authenticity), min(pred_authenticity))
    max_val = max(max(true_authenticity), max(pred_authenticity))
    ax2.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect prediction')
    
    # Add regression line
    z = np.polyfit(true_authenticity, pred_authenticity, 1)
    p = np.poly1d(z)
    x_sorted = np.sort(true_authenticity)
    ax2.plot(x_sorted, p(x_sorted), 'g-', label=f'Best fit (y = {z[0]:.3f}x + {z[1]:.3f})')
    
    ax2.set_xlabel('True Authenticity Score')
    ax2.set_ylabel('Predicted Authenticity Score')
    ax2.set_title(f'Authenticity Score Correlation\nSpearman ρ = {auth_spearman:.4f}, R² = {auth_r2:.4f}')
    ax2.grid(alpha=0.3)
    ax2.legend()
    
    # Plot Combined score(s)
    if combination_method == 'all':
        # Plot all three combination methods
        comb_methods = ['average', 'weighted', 'euclidean']
        comb_axes = [ax3, ax4, ax5]
        
        for method, ax in zip(comb_methods, comb_axes):
            comb = combinations[method]
            ax.scatter(comb['true'], comb['pred'], alpha=0.7, color=comb['color'])
            
            # Add identity line
            min_val = min(min(comb['true']), min(comb['pred']))
            max_val = max(max(comb['true']), max(comb['pred']))
            ax.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect prediction')
            
            # Add regression line
            z = np.polyfit(comb['true'], comb['pred'], 1)
            p = np.poly1d(z)
            x_sorted = np.sort(comb['true'])
            ax.plot(x_sorted, p(x_sorted), 'g-', label=f'Best fit (y = {z[0]:.3f}x + {z[1]:.3f})')
            
            ax.set_xlabel(f'True {comb["name"]}')
            ax.set_ylabel(f'Predicted {comb["name"]}')
            ax.set_title(f'{comb["name"]} Correlation\nSpearman ρ = {comb["spearman"]:.4f}, R² = {comb["r2"]:.4f}\n{comb["description"]}')
            ax.grid(alpha=0.3)
            ax.legend()
    else:
        # Plot just the selected method
        comb = combinations[combination_method]
        ax3.scatter(comb['true'], comb['pred'], alpha=0.7, color=comb['color'])
        
        # Add identity line
        min_val = min(min(comb['true']), min(comb['pred']))
        max_val = max(max(comb['true']), max(comb['pred']))
        ax3.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect prediction')
        
        # Add regression line
        z = np.polyfit(comb['true'], comb['pred'], 1)
        p = np.poly1d(z)
        x_sorted = np.sort(comb['true'])
        ax3.plot(x_sorted, p(x_sorted), 'g-', label=f'Best fit (y = {z[0]:.3f}x + {z[1]:.3f})')
        
        ax3.set_xlabel(f'True {comb["name"]}')
        ax3.set_ylabel(f'Predicted {comb["name"]}')
        ax3.set_title(f'{comb["name"]} Correlation\nSpearman ρ = {comb["spearman"]:.4f}, R² = {comb["r2"]:.4f}\n{comb["description"]}')
        ax3.grid(alpha=0.3)
        ax3.legend()
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
    else:
        plt.show()
    
    # Print summary statistics
    print(f"Quality Score - Spearman ρ: {quality_spearman:.4f} (p-value: {q_pvalue:.4g}), R²: {quality_r2:.4f}")
    print(f"Authenticity Score - Spearman ρ: {auth_spearman:.4f} (p-value: {a_pvalue:.4g}), R²: {auth_r2:.4f}")
    
    # Print combined statistics
    for method, comb in combinations.items():
        if combination_method == 'all' or method == combination_method:
            print(f"{comb['name']} - Spearman ρ: {comb['spearman']:.4f} (p-value: {comb['p_value']:.4g}), R²: {comb['r2']:.4f}")
    
    # Return all metrics
    metrics = {
        'quality': {
            'spearman': quality_spearman,
            'p_value': q_pvalue,
            'r2': quality_r2
        },
        'authenticity': {
            'spearman': auth_spearman,
            'p_value': a_pvalue,
            'r2': auth_r2
        }
    }
    
    # Add combined metrics
    for method, comb in combinations.items():
        metrics[method] = {
            'spearman': comb['spearman'],
            'p_value': comb['p_value'],
            'r2': comb['r2']
        }
        
    return metrics


BASELINE_PATH_NAME = 'Plots/baseline_correlation.png'
NOISY_PATH_NAME = 'Plots/noisy_pruned_correlation.png'
BEST_SUBSET_PATH_NAME = 'Plots/best_subset_correlation.png'
NEGATIVE_PATH_NAME = 'Plots/negative_impact_correlation.png'

# Example usage:
plot_correlations(baseline_model, test_dataloader, device, save_path=BASELINE_PATH_NAME, title="BASELINE MODEL")
plot_correlations(noisy_pruned_model, test_dataloader, device, save_path=NOISY_PATH_NAME, title="NOISY PRUNED MODEL")
plot_correlations(best_subset_pruned_model, test_dataloader, device, save_path=BEST_SUBSET_PATH_NAME, title="BEST SUBSET PRUNED MODEL")
plot_correlations(negative_impact_pruned_model, test_dataloader, device, save_path=NEGATIVE_PATH_NAME, title="NEGATIVE IMPACT PRUNED MODEL")