In [6]:
# Install required libraries
!pip install torch torchvision
!pip install tqdm
!pip install pandas

# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn.utils.prune as prune
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm



## [3 points] Exercise 3: Your Model, Device, and Data


In this section, you will repeat the simple experiments from Exercise 2 on your own model, device, and data. Additionally, you will choose two of three options for practical benefits to your pruned model's accuracy and latency. You may use a different sparsity level, higher or lower than 33%, if it makes sense for your settings. Make sure to report any changes you made and why you made them. Additionally, report any challenges encountered measuring latency or storage on your device.

### [1 point] 1. Repeat Exercise 2.4 (repeated unstructured pruning) for your model, on your device and with your data.

Keep performing the same unstructured magnitude pruning of your choice of sparsity level of the remaining weights on the same model (*without re-training or resetting the model*). You will apply the same function as above with the same 0.33 proportion parameter.

Collect values for this table, keeping in mind that you will need to plot the results later. You might want to keep the values in Pandas DataFrames. Sparsity reported should be the percentage of *prunable* parameters pruned. 

| Iteration | Sparsity (%) | Accuracy | Latency (s) | Disk Size (MB) |
| --------- | ------------ | -------- | ----------- | -------------- |
|     0     |   0.0%       |          |             |                |
|     1     |      ?       |          |             |                |
|     2     |              |          |             |                |
|     3     |              |          |             |                |
|     4     |              |          |             |                |
|     5     |              |          |             |                |


In [None]:
# Path to the saved PyTorch model file
saved_model_path = "model_weights_ResNet50_224_resize.pth"  # Replace with your file name

# Load the trained PyTorch model
# Ensure the model architecture matches the one used during training
num_classes = len(labels)  # 29 classes
model = models.resnet50(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)

# Load the saved weights
model.load_state_dict(torch.load(saved_model_path, map_location=torch.device("cpu")))
print(f"Model weights loaded from {saved_model_path}")

# Set the model to evaluation mode
model.eval()


In [8]:
def calculate_sparsity(model, print_results=False):
    """
    Calculate the sparsity level (using the percent of elements that are 0) for:
    - each parameter,
    - all pruned parameters overall, and
    - the model overall.
    
    Report each of these values: 
    - the sparsity level of each parameter, 
    - across all pruned parameters, and 
    - for the model overall. 
    """
    sparsity_per_parameter = {}
    total_zero_count_pruned = 0
    total_element_count_pruned = 0
    total_zero_count_model = 0
    total_element_count_model = sum(p.numel() for p in model.parameters() if p.requires_grad)

    # Iterate over all buffers in the model
    for name, buffer in model.named_buffers():
        # Calculate the number of zero elements and total elements in the buffer
        zero_count = (buffer == 0).sum().item()
        total_elements = buffer.numel()
        
        # Calculate the sparsity level for this parameter
        sparsity_per_parameter[name] = zero_count / total_elements * 100

        # Check if this is a pruned parameter by looking for "weight_mask" or "bias_mask" in the name
        if "weight_mask" in name or "bias_mask" in name:
            total_zero_count_pruned += zero_count
            total_element_count_pruned += total_elements

        # Accumulate for overall model sparsity
        total_zero_count_model += zero_count

    # Calculate overall sparsity for pruned parameters and the entire model
    sparsity_pruned_parameters = (total_zero_count_pruned / total_element_count_pruned * 100
                                  if total_element_count_pruned > 0 else 0)
    sparsity_model = total_zero_count_model / total_element_count_model * 100

    # Print or return the results
    if print_results:
        print("Sparsity per parameter:")
        for name, sparsity in sparsity_per_parameter.items():
            print(f"  {name}: {sparsity:.2f}%")
        
        print(f"Sparsity across all pruned parameters: {sparsity_pruned_parameters:.2f}%")
        print(f"Sparsity for the model overall: {sparsity_model:.2f}%")

    # Optionally, return the values for further use
    return {
        "sparsity_per_parameter": sparsity_per_parameter,
        "sparsity_pruned_parameters": sparsity_pruned_parameters,
        "sparsity_model": sparsity_model
    }

In [42]:
def model_repeated_pruning(model_path, sparsity):

    # Create empty DataFrame to store results
    results_df = pd.DataFrame(columns=[
        'iteration',
        'sparsity_model'
    ])

    # Define the labels for the classes (A-Z, del, nothing, space)
    labels = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
        'del', 'nothing', 'space'
    ]

    # Create model
    num_classes = len(labels)  # 29 classes
    model = models.resnet50(weights=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)

    # Load the saved weights
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    print(f"Model weights loaded from {model_path}")

    # Set the model to evaluation mode
    model.eval()
    #print([m for m in model.named_modules()])

    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.Conv2d, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Store initial results (iteration 0)
    sparsity_results = calculate_sparsity(model)
    
    results_df.loc[0] = {
        'iteration': 0,
        'sparsity_model': sparsity_results['sparsity_model']
    }

    print(f"Iteration 0 - Model Sparsity: {sparsity_results['sparsity_model']:.2f}%")

    for i in range(1, 6):
        print(f"Pruning iteration {i}")
        prune.global_unstructured(prune_params, pruning_method=prune.L1Unstructured, amount=sparsity)
        #torch.save(model.state_dict(), f"data/models/pruned_model_iteration_{i}.pth")
        
        sparsity_results = calculate_sparsity(model)
        
        # Store results in DataFrame
        results_df.loc[i] = {
            'iteration': i,
            'sparsity_model': sparsity_results['sparsity_model']
        }

        print(f"Sparsity for the model overall at Iteration {i}: {sparsity_results['sparsity_model']:.2f}%")
    
    
    # Display the DataFrame
    print("\nFinal Results DataFrame:")
    print(results_df)
    
    return results_df

In [44]:
#[1 point] 1. Repeat Exercise 2.4 (repeated unstructured pruning) for your model, on your device and with your data.
pruning_results = model_repeated_pruning("models/model_weights_ResNet50_224_resize.pth", 0.33)




Model weights loaded from models/model_weights_ResNet50_224_resize.pth
Iteration 0 - Model Sparsity: 0.00%
Pruning iteration 1
Sparsity for the model overall at Iteration 1: 0.12%
Pruning iteration 2
Sparsity for the model overall at Iteration 2: 0.20%
Pruning iteration 3
Sparsity for the model overall at Iteration 3: 0.26%
Pruning iteration 4
Sparsity for the model overall at Iteration 4: 0.29%
Pruning iteration 5
Sparsity for the model overall at Iteration 5: 0.32%

Final Results DataFrame:
   iteration  sparsity_model
0          0        0.000000
1          1        0.120352
2          2        0.200989
3          3        0.255017
4          4        0.291215
5          5        0.315469


  model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))


### [2 points] 2. Choose two of the following three options to implement on your model, device, and data (1 point per option):

1. Implement a structured pruning technique. You may prune dimensions of matrices, attention heads, entire layers, etc. Describe your strategy and report the results in a table, adjusting the "sparsity rate" column and as needed.

    Fill in the following table with your results (choose any 2-3 pruned models to compare to the unpruned model):

    | Structure Pruned | Sparsity Rate | Accuracy | Latency (s) | Disk Size (MB) |
    | ---------------- | ------------- | -------- | ----------- | -------------- |
    | Attention heads? |               |          |             |                |
    | Layers?          |               |          |             |                |
    | Other?           |               |          |             |                |



2. Conduct a sensitivity analysis of pruning (structured or unstructured) different components of your model. For instance, what happens to your model's performance when you prune input embeddings vs hidden layer weights? Do earlier layers seem more or less important than later layers? You are not required to conduct a thorough study, but you should be able to draw a couple concrete conclusions.

    Fill in the following table with your results (choose any 2-3 pruned models to compare to the unpruned model):

    |        Pruning Technique        |  Sparsity Rate  | Accuracy | Latency (s) | Disk Size (MB) |
    | ------------------------------- | --------------- | -------- | ----------- | -------------- |
    | Unstructured, all non-embedding |  30% global     |          |             |                |
    | Structured, attention heads     |  50% per module |          |             |                |



3. Export and run your unpruned and a diverse sample of your pruned models on an inference runtime (ONNX runtime, TensorRT). Check out [the PyTorch ONNX docs](https://pytorch.org/docs/stable/onnx.html) and [this page](https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html) for reference. Did you run into any challenges? Do you see latency benefits? Was anything surprising? Report inference latency and discuss.

    Fill in the following table with your results (choose any 2-3 pruned models to compare to the unpruned model):

    | Inference Runtime | Sparsity Rate | Latency (s) | Disk Size (MB) |
    | ----------------- | ------------- | ----------- | -------------- |
    | ONNX              |     0%        |             |                |
    | ONNX (pruned)     |    30%        |             |                |

In [10]:
#[2 points] 2. Choose two of the following three options to implement on your model, device, and data (1 point per option):
#1. Implement a structured pruning technique. You may prune dimensions of matrices, attention heads, entire layers, etc. Describe your strategy and report the results in a table, adjusting the "sparsity rate" column and as needed.
#2. 