In [None]:
# Install required libraries
!pip install torch torchvision
!pip install tqdm
!pip install pandas
!pip install -U coremltools


In [3]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn.utils.prune as prune
import torch.nn.functional as F
from copy import deepcopy
import pandas as pd
import os
from tqdm import tqdm
import coremltools as ct
from coremltools.optimize.torch.pruning import ModuleMagnitudePrunerConfig, MagnitudePruner, MagnitudePrunerConfig

Torch version 2.5.1 has not been tested with coremltools. You may run into unexpected errors. Torch 2.4.0 is the most recent version that has been tested.


## [3 points] Exercise 3: Your Model, Device, and Data


In this section, you will repeat the simple experiments from Exercise 2 on your own model, device, and data. Additionally, you will choose two of three options for practical benefits to your pruned model's accuracy and latency. You may use a different sparsity level, higher or lower than 33%, if it makes sense for your settings. Make sure to report any changes you made and why you made them. Additionally, report any challenges encountered measuring latency or storage on your device.

### [1 point] 1. Repeat Exercise 2.4 (repeated unstructured pruning) for your model, on your device and with your data.

Keep performing the same unstructured magnitude pruning of your choice of sparsity level of the remaining weights on the same model (*without re-training or resetting the model*). You will apply the same function as above with the same 0.33 proportion parameter.

Collect values for this table, keeping in mind that you will need to plot the results later. You might want to keep the values in Pandas DataFrames. Sparsity reported should be the percentage of *prunable* parameters pruned. 

| Iteration | Sparsity (%) | Accuracy | Latency (s) | Disk Size (MB) |
| --------- | ------------ | -------- | ----------- | -------------- |
|     0     |   0.0%       |          |             |                |
|     1     |      ?       |          |             |                |
|     2     |              |          |             |                |
|     3     |              |          |             |                |
|     4     |              |          |             |                |
|     5     |              |          |             |                |


In [4]:
def calculate_sparsity(model, print_results=False):
    """
    Calculate the sparsity level (using the percent of elements that are 0) for:
    - each parameter,
    - all pruned parameters overall, and
    - the model overall.
    
    Report each of these values: 
    - the sparsity level of each parameter, 
    - across all pruned parameters, and 
    - for the model overall. 
    """
    sparsity_per_parameter = {}
    total_zero_count_pruned = 0
    total_element_count_pruned = 0
    total_zero_count_model = 0
    total_element_count_model = sum(p.numel() for p in model.parameters() if p.requires_grad)

    # Iterate over all buffers in the model
    for name, buffer in model.named_buffers():
        # Calculate the number of zero elements and total elements in the buffer
        zero_count = (buffer == 0).sum().item()
        total_elements = buffer.numel()
        
        # Calculate the sparsity level for this parameter
        sparsity_per_parameter[name] = zero_count / total_elements * 100

        # Check if this is a pruned parameter by looking for "weight_mask" or "bias_mask" in the name
        if "weight_mask" in name or "bias_mask" in name:
            total_zero_count_pruned += zero_count
            total_element_count_pruned += total_elements

        # Accumulate for overall model sparsity
        total_zero_count_model += zero_count

    # Calculate overall sparsity for pruned parameters and the entire model
    sparsity_pruned_parameters = (total_zero_count_pruned / total_element_count_pruned * 100
                                  if total_element_count_pruned > 0 else 0)
    sparsity_model = total_zero_count_model / total_element_count_model * 100

    # Print or return the results
    if print_results:
        print("Sparsity per parameter:")
        for name, sparsity in sparsity_per_parameter.items():
            print(f"  {name}: {sparsity:.2f}%")
        
        print(f"Sparsity across all pruned parameters: {sparsity_pruned_parameters:.2f}%")
        print(f"Sparsity for the model overall: {sparsity_model:.2f}%")

    # Optionally, return the values for further use
    return {
        "sparsity_per_parameter": sparsity_per_parameter,
        "sparsity_pruned_parameters": sparsity_pruned_parameters,
        "sparsity_model": sparsity_model
    }

In [5]:
def print_size_of_model(model, label=""):
    sd = model.state_dict()
    for item in sd:
        sd[item] = model.state_dict()[item].to_sparse()
    
    torch.save(sd, "temp.pt")
    size=os.path.getsize("temp.pt")
    #print("model: ",label,' \t','Size (MB):', size/1e6)

    os.remove('temp.pt')
    return size

In [6]:
def evaluate_model(model):
    model.eval()

    size = print_size_of_model(model, "sparse")
    
    return size

In [7]:
def sparse_evaluate(model, device):
    model.to(device)

    labels = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
        'del', 'nothing', 'space'
    ]

    # Create model
    num_classes = len(labels)  # 29 classes
    model_copy = models.resnet50(weights=False)
    num_ftrs = model.fc.in_features
    model_copy.fc = nn.Linear(num_ftrs, num_classes)

    prune_params = [(m[1], "weight") for m in model_copy.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]
    for p in prune_params:
        prune.identity(p[0], "weight")
    # Copy the parameters
    model_copy.load_state_dict(model.state_dict())
    
    copy_params = [(m[1], "weight") for m in model_copy.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]
    # (we assume the same model architecture)
    for p in copy_params:
        prune.remove(*p)
    
    return evaluate_model(model_copy)

In [8]:
def load_asl_model(model_path):
    labels = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
        'del', 'nothing', 'space'
    ]
    # Create model
    num_classes = len(labels)  # 29 classes
    model = models.resnet50(weights=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)

    # Load the saved weights
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Set the model to evaluation mode
    model.eval()
    
    return model, device

In [None]:
def save_copy_of_model(model, model_path):
    for p in prune_params:
    prune.remove(*p)

In [17]:
def model_repeated_pruning(model_path, sparsity):

    # Create empty DataFrame to store results
    results_df = pd.DataFrame(columns=[
        'iteration',
        'sparsity_model',
        'size_mb'
    ])

    # Define the labels for the classes (A-Z, del, nothing, space)

    model, device = load_asl_model(model_path)
    print(f"Model weights loaded from {model_path}")

    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    # Store initial results (iteration 0)
    sparsity_results = calculate_sparsity(model)
    size = evaluate_model(model)
    
    results_df.loc[0] = {
        'iteration': 0,
        'sparsity_model': sparsity_results['sparsity_model'],
        'size_mb': size/1e6
    }

    model_path_list = [model_path]

    print(f"Iteration 0 - Model Sparsity: {sparsity_results['sparsity_model']:.2f}%")

    for i in range(1, 6):
        print(f"Pruning iteration {i}")
        prune.global_unstructured(prune_params, pruning_method=prune.L1Unstructured, amount=sparsity)



        #print(prune_params[0][0].weight)
        #print(type(prune_params[0][0]))

        saved_model_path = f"models/pruned_model_iteration_{i}.pth"
        model_path_list.append(saved_model_path)
        torch.save(model.state_dict(), saved_model_path)

        size = sparse_evaluate(model, device)
        sparsity_results = calculate_sparsity(model)

          
        # Store results in DataFrame
        results_df.loc[i] = {
            'iteration': i,
            'sparsity_model': sparsity_results['sparsity_model'],
            'size_mb': size/1e6
        }

        print(f"Sparsity for the model overall at Iteration {i}: {sparsity_results['sparsity_model']:.2f}%, Size MB: {size/1e6}")
    
    
    # Display the DataFrame
    print("\nFinal Results DataFrame:")
    print(results_df)
    
    return results_df, model_path_list

In [18]:
# [1 point] 1. Repeat Exercise 2.4 (repeated unstructured pruning) for your model, on your device and with your data.
# https://apple.github.io/coremltools/source/coremltools.optimize.torch.pruning.html
pruning_results, model_paths = model_repeated_pruning("./models/model_weights_ResNet50_224_resize.pth", 0.33)


  model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))


Model weights loaded from ./models/model_weights_ResNet50_224_resize.pth
Iteration 0 - Model Sparsity: 0.00%
Pruning iteration 1




RuntimeError: Error(s) in loading state_dict for ResNet:
	Missing key(s) in state_dict: "conv1.weight_orig", "conv1.weight_mask", "bn1.weight_orig", "bn1.weight_mask", "layer1.0.conv1.weight_orig", "layer1.0.conv1.weight_mask", "layer1.0.bn1.weight_orig", "layer1.0.bn1.weight_mask", "layer1.0.conv2.weight_orig", "layer1.0.conv2.weight_mask", "layer1.0.bn2.weight_orig", "layer1.0.bn2.weight_mask", "layer1.0.conv3.weight_orig", "layer1.0.conv3.weight_mask", "layer1.0.bn3.weight_orig", "layer1.0.bn3.weight_mask", "layer1.0.downsample.0.weight_orig", "layer1.0.downsample.0.weight_mask", "layer1.0.downsample.1.weight_orig", "layer1.0.downsample.1.weight_mask", "layer1.1.conv1.weight_orig", "layer1.1.conv1.weight_mask", "layer1.1.bn1.weight_orig", "layer1.1.bn1.weight_mask", "layer1.1.conv2.weight_orig", "layer1.1.conv2.weight_mask", "layer1.1.bn2.weight_orig", "layer1.1.bn2.weight_mask", "layer1.1.conv3.weight_orig", "layer1.1.conv3.weight_mask", "layer1.1.bn3.weight_orig", "layer1.1.bn3.weight_mask", "layer1.2.conv1.weight_orig", "layer1.2.conv1.weight_mask", "layer1.2.bn1.weight_orig", "layer1.2.bn1.weight_mask", "layer1.2.conv2.weight_orig", "layer1.2.conv2.weight_mask", "layer1.2.bn2.weight_orig", "layer1.2.bn2.weight_mask", "layer1.2.conv3.weight_orig", "layer1.2.conv3.weight_mask", "layer1.2.bn3.weight_orig", "layer1.2.bn3.weight_mask", "layer2.0.conv1.weight_orig", "layer2.0.conv1.weight_mask", "layer2.0.bn1.weight_orig", "layer2.0.bn1.weight_mask", "layer2.0.conv2.weight_orig", "layer2.0.conv2.weight_mask", "layer2.0.bn2.weight_orig", "layer2.0.bn2.weight_mask", "layer2.0.conv3.weight_orig", "layer2.0.conv3.weight_mask", "layer2.0.bn3.weight_orig", "layer2.0.bn3.weight_mask", "layer2.0.downsample.0.weight_orig", "layer2.0.downsample.0.weight_mask", "layer2.0.downsample.1.weight_orig", "layer2.0.downsample.1.weight_mask", "layer2.1.conv1.weight_orig", "layer2.1.conv1.weight_mask", "layer2.1.bn1.weight_orig", "layer2.1.bn1.weight_mask", "layer2.1.conv2.weight_orig", "layer2.1.conv2.weight_mask", "layer2.1.bn2.weight_orig", "layer2.1.bn2.weight_mask", "layer2.1.conv3.weight_orig", "layer2.1.conv3.weight_mask", "layer2.1.bn3.weight_orig", "layer2.1.bn3.weight_mask", "layer2.2.conv1.weight_orig", "layer2.2.conv1.weight_mask", "layer2.2.bn1.weight_orig", "layer2.2.bn1.weight_mask", "layer2.2.conv2.weight_orig", "layer2.2.conv2.weight_mask", "layer2.2.bn2.weight_orig", "layer2.2.bn2.weight_mask", "layer2.2.conv3.weight_orig", "layer2.2.conv3.weight_mask", "layer2.2.bn3.weight_orig", "layer2.2.bn3.weight_mask", "layer2.3.conv1.weight_orig", "layer2.3.conv1.weight_mask", "layer2.3.bn1.weight_orig", "layer2.3.bn1.weight_mask", "layer2.3.conv2.weight_orig", "layer2.3.conv2.weight_mask", "layer2.3.bn2.weight_orig", "layer2.3.bn2.weight_mask", "layer2.3.conv3.weight_orig", "layer2.3.conv3.weight_mask", "layer2.3.bn3.weight_orig", "layer2.3.bn3.weight_mask", "layer3.0.conv1.weight_orig", "layer3.0.conv1.weight_mask", "layer3.0.bn1.weight_orig", "layer3.0.bn1.weight_mask", "layer3.0.conv2.weight_orig", "layer3.0.conv2.weight_mask", "layer3.0.bn2.weight_orig", "layer3.0.bn2.weight_mask", "layer3.0.conv3.weight_orig", "layer3.0.conv3.weight_mask", "layer3.0.bn3.weight_orig", "layer3.0.bn3.weight_mask", "layer3.0.downsample.0.weight_orig", "layer3.0.downsample.0.weight_mask", "layer3.0.downsample.1.weight_orig", "layer3.0.downsample.1.weight_mask", "layer3.1.conv1.weight_orig", "layer3.1.conv1.weight_mask", "layer3.1.bn1.weight_orig", "layer3.1.bn1.weight_mask", "layer3.1.conv2.weight_orig", "layer3.1.conv2.weight_mask", "layer3.1.bn2.weight_orig", "layer3.1.bn2.weight_mask", "layer3.1.conv3.weight_orig", "layer3.1.conv3.weight_mask", "layer3.1.bn3.weight_orig", "layer3.1.bn3.weight_mask", "layer3.2.conv1.weight_orig", "layer3.2.conv1.weight_mask", "layer3.2.bn1.weight_orig", "layer3.2.bn1.weight_mask", "layer3.2.conv2.weight_orig", "layer3.2.conv2.weight_mask", "layer3.2.bn2.weight_orig", "layer3.2.bn2.weight_mask", "layer3.2.conv3.weight_orig", "layer3.2.conv3.weight_mask", "layer3.2.bn3.weight_orig", "layer3.2.bn3.weight_mask", "layer3.3.conv1.weight_orig", "layer3.3.conv1.weight_mask", "layer3.3.bn1.weight_orig", "layer3.3.bn1.weight_mask", "layer3.3.conv2.weight_orig", "layer3.3.conv2.weight_mask", "layer3.3.bn2.weight_orig", "layer3.3.bn2.weight_mask", "layer3.3.conv3.weight_orig", "layer3.3.conv3.weight_mask", "layer3.3.bn3.weight_orig", "layer3.3.bn3.weight_mask", "layer3.4.conv1.weight_orig", "layer3.4.conv1.weight_mask", "layer3.4.bn1.weight_orig", "layer3.4.bn1.weight_mask", "layer3.4.conv2.weight_orig", "layer3.4.conv2.weight_mask", "layer3.4.bn2.weight_orig", "layer3.4.bn2.weight_mask", "layer3.4.conv3.weight_orig", "layer3.4.conv3.weight_mask", "layer3.4.bn3.weight_orig", "layer3.4.bn3.weight_mask", "layer3.5.conv1.weight_orig", "layer3.5.conv1.weight_mask", "layer3.5.bn1.weight_orig", "layer3.5.bn1.weight_mask", "layer3.5.conv2.weight_orig", "layer3.5.conv2.weight_mask", "layer3.5.bn2.weight_orig", "layer3.5.bn2.weight_mask", "layer3.5.conv3.weight_orig", "layer3.5.conv3.weight_mask", "layer3.5.bn3.weight_orig", "layer3.5.bn3.weight_mask", "layer4.0.conv1.weight_orig", "layer4.0.conv1.weight_mask", "layer4.0.bn1.weight_orig", "layer4.0.bn1.weight_mask", "layer4.0.conv2.weight_orig", "layer4.0.conv2.weight_mask", "layer4.0.bn2.weight_orig", "layer4.0.bn2.weight_mask", "layer4.0.conv3.weight_orig", "layer4.0.conv3.weight_mask", "layer4.0.bn3.weight_orig", "layer4.0.bn3.weight_mask", "layer4.0.downsample.0.weight_orig", "layer4.0.downsample.0.weight_mask", "layer4.0.downsample.1.weight_orig", "layer4.0.downsample.1.weight_mask", "layer4.1.conv1.weight_orig", "layer4.1.conv1.weight_mask", "layer4.1.bn1.weight_orig", "layer4.1.bn1.weight_mask", "layer4.1.conv2.weight_orig", "layer4.1.conv2.weight_mask", "layer4.1.bn2.weight_orig", "layer4.1.bn2.weight_mask", "layer4.1.conv3.weight_orig", "layer4.1.conv3.weight_mask", "layer4.1.bn3.weight_orig", "layer4.1.bn3.weight_mask", "layer4.2.conv1.weight_orig", "layer4.2.conv1.weight_mask", "layer4.2.bn1.weight_orig", "layer4.2.bn1.weight_mask", "layer4.2.conv2.weight_orig", "layer4.2.conv2.weight_mask", "layer4.2.bn2.weight_orig", "layer4.2.bn2.weight_mask", "layer4.2.conv3.weight_orig", "layer4.2.conv3.weight_mask", "layer4.2.bn3.weight_orig", "layer4.2.bn3.weight_mask", "fc.weight_orig", "fc.weight_mask". 
	Unexpected key(s) in state_dict: "conv1.weight", "bn1.weight", "layer1.0.conv1.weight", "layer1.0.bn1.weight", "layer1.0.conv2.weight", "layer1.0.bn2.weight", "layer1.0.conv3.weight", "layer1.0.bn3.weight", "layer1.0.downsample.0.weight", "layer1.0.downsample.1.weight", "layer1.1.conv1.weight", "layer1.1.bn1.weight", "layer1.1.conv2.weight", "layer1.1.bn2.weight", "layer1.1.conv3.weight", "layer1.1.bn3.weight", "layer1.2.conv1.weight", "layer1.2.bn1.weight", "layer1.2.conv2.weight", "layer1.2.bn2.weight", "layer1.2.conv3.weight", "layer1.2.bn3.weight", "layer2.0.conv1.weight", "layer2.0.bn1.weight", "layer2.0.conv2.weight", "layer2.0.bn2.weight", "layer2.0.conv3.weight", "layer2.0.bn3.weight", "layer2.0.downsample.0.weight", "layer2.0.downsample.1.weight", "layer2.1.conv1.weight", "layer2.1.bn1.weight", "layer2.1.conv2.weight", "layer2.1.bn2.weight", "layer2.1.conv3.weight", "layer2.1.bn3.weight", "layer2.2.conv1.weight", "layer2.2.bn1.weight", "layer2.2.conv2.weight", "layer2.2.bn2.weight", "layer2.2.conv3.weight", "layer2.2.bn3.weight", "layer2.3.conv1.weight", "layer2.3.bn1.weight", "layer2.3.conv2.weight", "layer2.3.bn2.weight", "layer2.3.conv3.weight", "layer2.3.bn3.weight", "layer3.0.conv1.weight", "layer3.0.bn1.weight", "layer3.0.conv2.weight", "layer3.0.bn2.weight", "layer3.0.conv3.weight", "layer3.0.bn3.weight", "layer3.0.downsample.0.weight", "layer3.0.downsample.1.weight", "layer3.1.conv1.weight", "layer3.1.bn1.weight", "layer3.1.conv2.weight", "layer3.1.bn2.weight", "layer3.1.conv3.weight", "layer3.1.bn3.weight", "layer3.2.conv1.weight", "layer3.2.bn1.weight", "layer3.2.conv2.weight", "layer3.2.bn2.weight", "layer3.2.conv3.weight", "layer3.2.bn3.weight", "layer3.3.conv1.weight", "layer3.3.bn1.weight", "layer3.3.conv2.weight", "layer3.3.bn2.weight", "layer3.3.conv3.weight", "layer3.3.bn3.weight", "layer3.4.conv1.weight", "layer3.4.bn1.weight", "layer3.4.conv2.weight", "layer3.4.bn2.weight", "layer3.4.conv3.weight", "layer3.4.bn3.weight", "layer3.5.conv1.weight", "layer3.5.bn1.weight", "layer3.5.conv2.weight", "layer3.5.bn2.weight", "layer3.5.conv3.weight", "layer3.5.bn3.weight", "layer4.0.conv1.weight", "layer4.0.bn1.weight", "layer4.0.conv2.weight", "layer4.0.bn2.weight", "layer4.0.conv3.weight", "layer4.0.bn3.weight", "layer4.0.downsample.0.weight", "layer4.0.downsample.1.weight", "layer4.1.conv1.weight", "layer4.1.bn1.weight", "layer4.1.conv2.weight", "layer4.1.bn2.weight", "layer4.1.conv3.weight", "layer4.1.bn3.weight", "layer4.2.conv1.weight", "layer4.2.bn1.weight", "layer4.2.conv2.weight", "layer4.2.bn2.weight", "layer4.2.conv3.weight", "layer4.2.bn3.weight", "fc.weight". 

In [None]:
def inspect_weights(model_path)
    
    

In [16]:
path = "./models/"


for i in range(1,6):
    model_path = f"./models/pruned_model_iteration_{i}.pth"
    model, devie = load_asl_model(model_path)
    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]
    print(model[0])

  model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))


RuntimeError: Error(s) in loading state_dict for ResNet:
	Missing key(s) in state_dict: "conv1.weight", "bn1.weight", "layer1.0.conv1.weight", "layer1.0.bn1.weight", "layer1.0.conv2.weight", "layer1.0.bn2.weight", "layer1.0.conv3.weight", "layer1.0.bn3.weight", "layer1.0.downsample.0.weight", "layer1.0.downsample.1.weight", "layer1.1.conv1.weight", "layer1.1.bn1.weight", "layer1.1.conv2.weight", "layer1.1.bn2.weight", "layer1.1.conv3.weight", "layer1.1.bn3.weight", "layer1.2.conv1.weight", "layer1.2.bn1.weight", "layer1.2.conv2.weight", "layer1.2.bn2.weight", "layer1.2.conv3.weight", "layer1.2.bn3.weight", "layer2.0.conv1.weight", "layer2.0.bn1.weight", "layer2.0.conv2.weight", "layer2.0.bn2.weight", "layer2.0.conv3.weight", "layer2.0.bn3.weight", "layer2.0.downsample.0.weight", "layer2.0.downsample.1.weight", "layer2.1.conv1.weight", "layer2.1.bn1.weight", "layer2.1.conv2.weight", "layer2.1.bn2.weight", "layer2.1.conv3.weight", "layer2.1.bn3.weight", "layer2.2.conv1.weight", "layer2.2.bn1.weight", "layer2.2.conv2.weight", "layer2.2.bn2.weight", "layer2.2.conv3.weight", "layer2.2.bn3.weight", "layer2.3.conv1.weight", "layer2.3.bn1.weight", "layer2.3.conv2.weight", "layer2.3.bn2.weight", "layer2.3.conv3.weight", "layer2.3.bn3.weight", "layer3.0.conv1.weight", "layer3.0.bn1.weight", "layer3.0.conv2.weight", "layer3.0.bn2.weight", "layer3.0.conv3.weight", "layer3.0.bn3.weight", "layer3.0.downsample.0.weight", "layer3.0.downsample.1.weight", "layer3.1.conv1.weight", "layer3.1.bn1.weight", "layer3.1.conv2.weight", "layer3.1.bn2.weight", "layer3.1.conv3.weight", "layer3.1.bn3.weight", "layer3.2.conv1.weight", "layer3.2.bn1.weight", "layer3.2.conv2.weight", "layer3.2.bn2.weight", "layer3.2.conv3.weight", "layer3.2.bn3.weight", "layer3.3.conv1.weight", "layer3.3.bn1.weight", "layer3.3.conv2.weight", "layer3.3.bn2.weight", "layer3.3.conv3.weight", "layer3.3.bn3.weight", "layer3.4.conv1.weight", "layer3.4.bn1.weight", "layer3.4.conv2.weight", "layer3.4.bn2.weight", "layer3.4.conv3.weight", "layer3.4.bn3.weight", "layer3.5.conv1.weight", "layer3.5.bn1.weight", "layer3.5.conv2.weight", "layer3.5.bn2.weight", "layer3.5.conv3.weight", "layer3.5.bn3.weight", "layer4.0.conv1.weight", "layer4.0.bn1.weight", "layer4.0.conv2.weight", "layer4.0.bn2.weight", "layer4.0.conv3.weight", "layer4.0.bn3.weight", "layer4.0.downsample.0.weight", "layer4.0.downsample.1.weight", "layer4.1.conv1.weight", "layer4.1.bn1.weight", "layer4.1.conv2.weight", "layer4.1.bn2.weight", "layer4.1.conv3.weight", "layer4.1.bn3.weight", "layer4.2.conv1.weight", "layer4.2.bn1.weight", "layer4.2.conv2.weight", "layer4.2.bn2.weight", "layer4.2.conv3.weight", "layer4.2.bn3.weight", "fc.weight". 
	Unexpected key(s) in state_dict: "conv1.weight_orig", "conv1.weight_mask", "bn1.weight_orig", "bn1.weight_mask", "layer1.0.conv1.weight_orig", "layer1.0.conv1.weight_mask", "layer1.0.bn1.weight_orig", "layer1.0.bn1.weight_mask", "layer1.0.conv2.weight_orig", "layer1.0.conv2.weight_mask", "layer1.0.bn2.weight_orig", "layer1.0.bn2.weight_mask", "layer1.0.conv3.weight_orig", "layer1.0.conv3.weight_mask", "layer1.0.bn3.weight_orig", "layer1.0.bn3.weight_mask", "layer1.0.downsample.0.weight_orig", "layer1.0.downsample.0.weight_mask", "layer1.0.downsample.1.weight_orig", "layer1.0.downsample.1.weight_mask", "layer1.1.conv1.weight_orig", "layer1.1.conv1.weight_mask", "layer1.1.bn1.weight_orig", "layer1.1.bn1.weight_mask", "layer1.1.conv2.weight_orig", "layer1.1.conv2.weight_mask", "layer1.1.bn2.weight_orig", "layer1.1.bn2.weight_mask", "layer1.1.conv3.weight_orig", "layer1.1.conv3.weight_mask", "layer1.1.bn3.weight_orig", "layer1.1.bn3.weight_mask", "layer1.2.conv1.weight_orig", "layer1.2.conv1.weight_mask", "layer1.2.bn1.weight_orig", "layer1.2.bn1.weight_mask", "layer1.2.conv2.weight_orig", "layer1.2.conv2.weight_mask", "layer1.2.bn2.weight_orig", "layer1.2.bn2.weight_mask", "layer1.2.conv3.weight_orig", "layer1.2.conv3.weight_mask", "layer1.2.bn3.weight_orig", "layer1.2.bn3.weight_mask", "layer2.0.conv1.weight_orig", "layer2.0.conv1.weight_mask", "layer2.0.bn1.weight_orig", "layer2.0.bn1.weight_mask", "layer2.0.conv2.weight_orig", "layer2.0.conv2.weight_mask", "layer2.0.bn2.weight_orig", "layer2.0.bn2.weight_mask", "layer2.0.conv3.weight_orig", "layer2.0.conv3.weight_mask", "layer2.0.bn3.weight_orig", "layer2.0.bn3.weight_mask", "layer2.0.downsample.0.weight_orig", "layer2.0.downsample.0.weight_mask", "layer2.0.downsample.1.weight_orig", "layer2.0.downsample.1.weight_mask", "layer2.1.conv1.weight_orig", "layer2.1.conv1.weight_mask", "layer2.1.bn1.weight_orig", "layer2.1.bn1.weight_mask", "layer2.1.conv2.weight_orig", "layer2.1.conv2.weight_mask", "layer2.1.bn2.weight_orig", "layer2.1.bn2.weight_mask", "layer2.1.conv3.weight_orig", "layer2.1.conv3.weight_mask", "layer2.1.bn3.weight_orig", "layer2.1.bn3.weight_mask", "layer2.2.conv1.weight_orig", "layer2.2.conv1.weight_mask", "layer2.2.bn1.weight_orig", "layer2.2.bn1.weight_mask", "layer2.2.conv2.weight_orig", "layer2.2.conv2.weight_mask", "layer2.2.bn2.weight_orig", "layer2.2.bn2.weight_mask", "layer2.2.conv3.weight_orig", "layer2.2.conv3.weight_mask", "layer2.2.bn3.weight_orig", "layer2.2.bn3.weight_mask", "layer2.3.conv1.weight_orig", "layer2.3.conv1.weight_mask", "layer2.3.bn1.weight_orig", "layer2.3.bn1.weight_mask", "layer2.3.conv2.weight_orig", "layer2.3.conv2.weight_mask", "layer2.3.bn2.weight_orig", "layer2.3.bn2.weight_mask", "layer2.3.conv3.weight_orig", "layer2.3.conv3.weight_mask", "layer2.3.bn3.weight_orig", "layer2.3.bn3.weight_mask", "layer3.0.conv1.weight_orig", "layer3.0.conv1.weight_mask", "layer3.0.bn1.weight_orig", "layer3.0.bn1.weight_mask", "layer3.0.conv2.weight_orig", "layer3.0.conv2.weight_mask", "layer3.0.bn2.weight_orig", "layer3.0.bn2.weight_mask", "layer3.0.conv3.weight_orig", "layer3.0.conv3.weight_mask", "layer3.0.bn3.weight_orig", "layer3.0.bn3.weight_mask", "layer3.0.downsample.0.weight_orig", "layer3.0.downsample.0.weight_mask", "layer3.0.downsample.1.weight_orig", "layer3.0.downsample.1.weight_mask", "layer3.1.conv1.weight_orig", "layer3.1.conv1.weight_mask", "layer3.1.bn1.weight_orig", "layer3.1.bn1.weight_mask", "layer3.1.conv2.weight_orig", "layer3.1.conv2.weight_mask", "layer3.1.bn2.weight_orig", "layer3.1.bn2.weight_mask", "layer3.1.conv3.weight_orig", "layer3.1.conv3.weight_mask", "layer3.1.bn3.weight_orig", "layer3.1.bn3.weight_mask", "layer3.2.conv1.weight_orig", "layer3.2.conv1.weight_mask", "layer3.2.bn1.weight_orig", "layer3.2.bn1.weight_mask", "layer3.2.conv2.weight_orig", "layer3.2.conv2.weight_mask", "layer3.2.bn2.weight_orig", "layer3.2.bn2.weight_mask", "layer3.2.conv3.weight_orig", "layer3.2.conv3.weight_mask", "layer3.2.bn3.weight_orig", "layer3.2.bn3.weight_mask", "layer3.3.conv1.weight_orig", "layer3.3.conv1.weight_mask", "layer3.3.bn1.weight_orig", "layer3.3.bn1.weight_mask", "layer3.3.conv2.weight_orig", "layer3.3.conv2.weight_mask", "layer3.3.bn2.weight_orig", "layer3.3.bn2.weight_mask", "layer3.3.conv3.weight_orig", "layer3.3.conv3.weight_mask", "layer3.3.bn3.weight_orig", "layer3.3.bn3.weight_mask", "layer3.4.conv1.weight_orig", "layer3.4.conv1.weight_mask", "layer3.4.bn1.weight_orig", "layer3.4.bn1.weight_mask", "layer3.4.conv2.weight_orig", "layer3.4.conv2.weight_mask", "layer3.4.bn2.weight_orig", "layer3.4.bn2.weight_mask", "layer3.4.conv3.weight_orig", "layer3.4.conv3.weight_mask", "layer3.4.bn3.weight_orig", "layer3.4.bn3.weight_mask", "layer3.5.conv1.weight_orig", "layer3.5.conv1.weight_mask", "layer3.5.bn1.weight_orig", "layer3.5.bn1.weight_mask", "layer3.5.conv2.weight_orig", "layer3.5.conv2.weight_mask", "layer3.5.bn2.weight_orig", "layer3.5.bn2.weight_mask", "layer3.5.conv3.weight_orig", "layer3.5.conv3.weight_mask", "layer3.5.bn3.weight_orig", "layer3.5.bn3.weight_mask", "layer4.0.conv1.weight_orig", "layer4.0.conv1.weight_mask", "layer4.0.bn1.weight_orig", "layer4.0.bn1.weight_mask", "layer4.0.conv2.weight_orig", "layer4.0.conv2.weight_mask", "layer4.0.bn2.weight_orig", "layer4.0.bn2.weight_mask", "layer4.0.conv3.weight_orig", "layer4.0.conv3.weight_mask", "layer4.0.bn3.weight_orig", "layer4.0.bn3.weight_mask", "layer4.0.downsample.0.weight_orig", "layer4.0.downsample.0.weight_mask", "layer4.0.downsample.1.weight_orig", "layer4.0.downsample.1.weight_mask", "layer4.1.conv1.weight_orig", "layer4.1.conv1.weight_mask", "layer4.1.bn1.weight_orig", "layer4.1.bn1.weight_mask", "layer4.1.conv2.weight_orig", "layer4.1.conv2.weight_mask", "layer4.1.bn2.weight_orig", "layer4.1.bn2.weight_mask", "layer4.1.conv3.weight_orig", "layer4.1.conv3.weight_mask", "layer4.1.bn3.weight_orig", "layer4.1.bn3.weight_mask", "layer4.2.conv1.weight_orig", "layer4.2.conv1.weight_mask", "layer4.2.bn1.weight_orig", "layer4.2.bn1.weight_mask", "layer4.2.conv2.weight_orig", "layer4.2.conv2.weight_mask", "layer4.2.bn2.weight_orig", "layer4.2.bn2.weight_mask", "layer4.2.conv3.weight_orig", "layer4.2.conv3.weight_mask", "layer4.2.bn3.weight_orig", "layer4.2.bn3.weight_mask", "fc.weight_orig", "fc.weight_mask". 

In [79]:
def convert_to_coreml(model_path):

    # Define the labels for the classes (A-Z, del, nothing, space)
    labels = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
        'del', 'nothing', 'space'
    ]
        
    # Load the trained PyTorch model
    # Ensure the model architecture matches the one used during training
    num_classes = len(labels)  # 29 classes
    model = models.resnet50(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)

    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]
    for p in prune_params:
        prune.identity(p[0], "weight")
    # Copy the parameters
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    
    copy_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]
    # (we assume the same model architecture)
    for p in copy_params:
        prune.remove(*p)

    # Load the saved weights
    print(f"Model weights loaded from {model_path}")

    # Set the model to evaluation mode
    model.eval()

    # 1. Create an example input tensor resized to 224x224
    example_input = torch.rand(1, 3, 224, 224)  # Batch size 1, 3 color channels, 224x224 image

    # 2. Convert the model to TorchScript
    traced_model = torch.jit.trace(model, example_input)
    print("Model converted to TorchScript format")

    # 3. Convert the TorchScript model to CoreML format
    mlmodel = ct.convert(
        traced_model,
        inputs=[ct.ImageType(name="image", shape=example_input.shape, channel_first=True)],
        classifier_config=ct.ClassifierConfig(class_labels=labels, predicted_feature_name="classLabel"),
        convert_to="neuralnetwork",  # Ensures compatibility with .mlmodel format
    )
    print("Model converted to CoreML format")

    # 4. Apply Post-Training Quantization to INT8
    # quantized_mlmodel = ct.models.neural_network.quantization_utils.quantize_weights(
    #     mlmodel, nbits=8  # Use 8-bit integer quantization
    # )
    # print("Model quantized to INT8")

    # 5. Save the CoreML model
    mlmodel_path = model_path.replace(".pth", ".mlmodel")  # Desired output filename
    mlmodel.save(mlmodel_path)
    print(f"CoreML model saved as {mlmodel_path}")
    return

In [82]:
# Convert each pytoch model to CoreML for testing
for model_path in model_paths[1:]:
    convert_to_coreml(model_path)

  model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))


Model weights loaded from models/pruned_model_iteration_1.pth
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 6947.04 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 220.43 passes/s]
Running MIL default pipeline: 100%|██████████| 86/86 [00:00<00:00, 179.31 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 361.91 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 146.82 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_iteration_1.mlmodel
Model weights loaded from models/pruned_model_iteration_2.pth
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 8648.45 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 231.05 passes/s]
Running MIL default pipeline: 100%|██████████| 86/86 [00:00<00:00, 182.16 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 389.34 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 165.53 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_iteration_2.mlmodel
Model weights loaded from models/pruned_model_iteration_3.pth
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 8987.68 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 196.53 passes/s]
Running MIL default pipeline: 100%|██████████| 86/86 [00:00<00:00, 186.10 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 378.14 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 166.19 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_iteration_3.mlmodel
Model weights loaded from models/pruned_model_iteration_4.pth
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 8988.77 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 223.01 passes/s]
Running MIL default pipeline: 100%|██████████| 86/86 [00:00<00:00, 176.73 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 392.60 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 164.13 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_iteration_4.mlmodel
Model weights loaded from models/pruned_model_iteration_5.pth
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 8262.42 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 215.43 passes/s]
Running MIL default pipeline: 100%|██████████| 86/86 [00:00<00:00, 175.20 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 392.58 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 158.89 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_iteration_5.mlmodel


| Iteration | Sparsity (%) | Accuracy | Latency (s) | Disk Size (MB) |
| --------- | ------------ | -------- | ----------- | -------------- |
|     0     |   0.0%       |  0.9990476190476191   |             |      847.00    |
|     1     |   32.96%     |  0.9990476190476191   |             |      567.61    |
|     2     |   55.05%     |  0.9990476190476191   |             |      380.44    |
|     3     |   69.84%     |  0.9990476190476191   |             |      255.08    |
|     4     |   79.76%     |  0.9990476190476191   |             |      171.10    |
|     5     |   86.40%     |  0.9990476190476191   |             |      114.85    |


### [2 points] 2. Choose two of the following three options to implement on your model, device, and data (1 point per option):

1. Implement a structured pruning technique. You may prune dimensions of matrices, attention heads, entire layers, etc. Describe your strategy and report the results in a table, adjusting the "sparsity rate" column and as needed.

    Fill in the following table with your results (choose any 2-3 pruned models to compare to the unpruned model):

    | Structure Pruned | Sparsity Rate | Accuracy | Latency (s) | Disk Size (MB) |
    | ---------------- | ------------- | -------- | ----------- | -------------- |
    | Attention heads? |               |          |             |                |
    | Layers?          |               |          |             |                |
    | Other?           |               |          |             |                |



2. Conduct a sensitivity analysis of pruning (structured or unstructured) different components of your model. For instance, what happens to your model's performance when you prune input embeddings vs hidden layer weights? Do earlier layers seem more or less important than later layers? You are not required to conduct a thorough study, but you should be able to draw a couple concrete conclusions.

    Fill in the following table with your results (choose any 2-3 pruned models to compare to the unpruned model):

    |        Pruning Technique        |  Sparsity Rate  | Accuracy | Latency (s) | Disk Size (MB) |
    | ------------------------------- | --------------- | -------- | ----------- | -------------- |
    | Unstructured, all non-embedding |  30% global     |          |             |                |
    | Structured, attention heads     |  50% per module |          |             |                |



3. Export and run your unpruned and a diverse sample of your pruned models on an inference runtime (ONNX runtime, TensorRT). Check out [the PyTorch ONNX docs](https://pytorch.org/docs/stable/onnx.html) and [this page](https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html) for reference. Did you run into any challenges? Do you see latency benefits? Was anything surprising? Report inference latency and discuss.

    Fill in the following table with your results (choose any 2-3 pruned models to compare to the unpruned model):

    | Inference Runtime | Sparsity Rate | Latency (s) | Disk Size (MB) |
    | ----------------- | ------------- | ----------- | -------------- |
    | ONNX              |     0%        |             |                |
    | ONNX (pruned)     |    30%        |             |                |

In [None]:
#[2 points] 2. Choose two of the following three options to implement on your model, device, and data (1 point per option):
#1. Implement a structured pruning technique. You may prune dimensions of matrices, attention heads, entire layers, etc. Describe your strategy and report the results in a table, adjusting the "sparsity rate" column and as needed.
#2. 