In [1]:
# Install required libraries
!pip install torch torchvision
!pip install tqdm
!pip install pandas
!pip install torch-pruning
!pip install -U coremltools


Collecting torchvision
  Obtaining dependency information for torchvision from https://files.pythonhosted.org/packages/28/57/4d7ad90be612f5ac6c4bdafcb0ff13e818e14a340a88c8ca00d9ed8c2dad/torchvision-0.20.1-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Downloading torchvision-0.20.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.1 kB)
Collecting torch
  Obtaining dependency information for torch from https://files.pythonhosted.org/packages/d0/db/5d9cbfbc7968d79c5c09a0bc0bc3735da079f2fd07cc10498a62b320a480/torch-2.5.1-cp311-none-macosx_11_0_arm64.whl.metadata
  Downloading torch-2.5.1-cp311-none-macosx_11_0_arm64.whl.metadata (28 kB)
Collecting sympy==1.13.1 (from torch)
  Obtaining dependency information for sympy==1.13.1 from https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl.metadata
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading torchvision-0.20.1-cp311-cp311-macosx_11_0_arm64

In [2]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn.utils.prune as prune
import torch.nn.functional as F
import torch_pruning as tp
from copy import deepcopy
import pandas as pd
import os
import heapq
from tqdm import tqdm
import coremltools as ct
from coremltools.optimize.torch.pruning import ModuleMagnitudePrunerConfig, MagnitudePruner, MagnitudePrunerConfig

Torch version 2.5.1 has not been tested with coremltools. You may run into unexpected errors. Torch 2.4.0 is the most recent version that has been tested.


## [3 points] Exercise 3: Your Model, Device, and Data


In this section, you will repeat the simple experiments from Exercise 2 on your own model, device, and data. Additionally, you will choose two of three options for practical benefits to your pruned model's accuracy and latency. You may use a different sparsity level, higher or lower than 33%, if it makes sense for your settings. Make sure to report any changes you made and why you made them. Additionally, report any challenges encountered measuring latency or storage on your device.

### [1 point] 1. Repeat Exercise 2.4 (repeated unstructured pruning) for your model, on your device and with your data.

Keep performing the same unstructured magnitude pruning of your choice of sparsity level of the remaining weights on the same model (*without re-training or resetting the model*). You will apply the same function as above with the same 0.33 proportion parameter.

Collect values for this table, keeping in mind that you will need to plot the results later. You might want to keep the values in Pandas DataFrames. Sparsity reported should be the percentage of *prunable* parameters pruned. 

| Iteration | Sparsity (%) | Accuracy | Latency (s) | Disk Size (MB) |
| --------- | ------------ | -------- | ----------- | -------------- |
|     0     |   0.0%       |          |             |                |
|     1     |      ?       |          |             |                |
|     2     |              |          |             |                |
|     3     |              |          |             |                |
|     4     |              |          |             |                |
|     5     |              |          |             |                |


In [3]:
def calculate_sparsity(model, print_results=False):
    """
    Calculate the sparsity level (using the percent of elements that are 0) for:
    - each parameter,
    - all pruned parameters overall, and
    - the model overall.
    
    Report each of these values: 
    - the sparsity level of each parameter, 
    - across all pruned parameters, and 
    - for the model overall. 
    """
    sparsity_per_parameter = {}
    total_zero_count_pruned = 0
    total_element_count_pruned = 0
    total_zero_count_model = 0
    total_element_count_model = sum(p.numel() for p in model.parameters() if p.requires_grad)

    # Iterate over all buffers in the model
    for name, buffer in model.named_buffers():
        # Calculate the number of zero elements and total elements in the buffer
        zero_count = (buffer == 0).sum().item()
        total_elements = buffer.numel()
        
        # Calculate the sparsity level for this parameter
        sparsity_per_parameter[name] = zero_count / total_elements * 100

        # Check if this is a pruned parameter by looking for "weight_mask" or "bias_mask" in the name
        if "weight_mask" in name or "bias_mask" in name:
            total_zero_count_pruned += zero_count
            total_element_count_pruned += total_elements

        # Accumulate for overall model sparsity
        total_zero_count_model += zero_count

    # Calculate overall sparsity for pruned parameters and the entire model
    sparsity_pruned_parameters = (total_zero_count_pruned / total_element_count_pruned * 100
                                  if total_element_count_pruned > 0 else 0)
    sparsity_model = total_zero_count_model / total_element_count_model * 100

    # Print or return the results
    if print_results:
        print("Sparsity per parameter:")
        for name, sparsity in sparsity_per_parameter.items():
            print(f"  {name}: {sparsity:.2f}%")
        
        print(f"Sparsity across all pruned parameters: {sparsity_pruned_parameters:.2f}%")
        print(f"Sparsity for the model overall: {sparsity_model:.2f}%")

    # Optionally, return the values for further use
    return {
        "sparsity_per_parameter": sparsity_per_parameter,
        "sparsity_pruned_parameters": sparsity_pruned_parameters,
        "sparsity_model": sparsity_model
    }

In [4]:
def print_size_of_model(model, label=""):
    sd = model.state_dict()
    for item in sd:
        sd[item] = model.state_dict()[item].to_sparse()
    
    torch.save(sd, "temp.pt")
    size=os.path.getsize("temp.pt")
    #print("model: ",label,' \t','Size (MB):', size/1e6)

    os.remove('temp.pt')
    return size

In [5]:
def evaluate_model(model):
    model.eval()

    size = print_size_of_model(model, "sparse")
    
    return size

In [6]:
def sparse_evaluate(model, device):
    model.to(device)
    model_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    model_copy = safe_deepcopy(model, model_params)
    model_copy_params = [(m[1], "weight") for m in model_copy.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    for p in model_copy_params:
        prune.remove(*p)
    
    return evaluate_model(model_copy)

In [7]:
def load_asl_model(model_path):
    labels = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
        'del', 'nothing', 'space'
    ]
    # Create model
    num_classes = len(labels)  # 29 classes
    model = models.resnet50(weights=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)

    # Load the saved weights
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Set the model to evaluation mode
    model.eval()
    
    return model, device

In [8]:
def safe_deepcopy(model, params_to_prune):

    # detach the parameters in params_to_prune
    for module, name in params_to_prune:
        param = getattr(module, name)
        setattr(module, name, param.detach())

    return deepcopy(model)


In [9]:
def save_copy_of_model(model, model_path):
    
    model_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    model_copy = safe_deepcopy(model, model_params)
    model_copy_params = [(m[1], "weight") for m in model_copy.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    for p in model_copy_params:
        prune.remove(*p)

    torch.save(model_copy.state_dict(), model_path)

In [10]:
def model_repeated_pruning(model_path, sparsity, iterations):

    # Create empty DataFrame to store results
    results_df = pd.DataFrame(columns=[
        'iteration',
        'sparsity_model',
        'size_mb'
    ])

    # Define the labels for the classes (A-Z, del, nothing, space)

    model, device = load_asl_model(model_path)
    print(f"Model weights loaded from {model_path}")

    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    # Store initial results (iteration 0)
    sparsity_results = calculate_sparsity(model)
    size = evaluate_model(model)
    
    results_df.loc[0] = {
        'iteration': 0,
        'sparsity_model': sparsity_results['sparsity_model'],
        'size_mb': size/1e6
    }

    model_path_list = [model_path]

    model, device = load_asl_model(model_path)
    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    print(f"Iteration 0 - Model Sparsity: {sparsity_results['sparsity_model']:.2f}%")

    for i in range(1, 6):
        print(f"Pruning iteration {i}")

        prune.global_unstructured(prune_params, pruning_method=prune.L1Unstructured, amount=sparsity)

        saved_model_path = f"models/pruned_model_{i}.pth"
        save_copy_of_model(model, saved_model_path)
        model_path_list.append(saved_model_path)

        size = sparse_evaluate(model, device)
        sparsity_results = calculate_sparsity(model)


        # Store results in DataFrame
        results_df.loc[i] = {
            'iteration': i,
            'sparsity_model': sparsity_results['sparsity_model'],
            'size_mb': size/1e6
        }

        print(f"Sparsity for the model overall at Iteration {i}: {sparsity_results['sparsity_model']:.2f}%, Size MB: {size/1e6}")
    
    
    # Display the DataFrame
    print("\nFinal Results DataFrame:")
    print(results_df)
    
    return results_df, model_path_list

In [11]:
# [1 point] 1. Repeat Exercise 2.4 (repeated unstructured pruning) for your model, on your device and with your data.
# https://apple.github.io/coremltools/source/coremltools.optimize.torch.pruning.html
pruning_results, model_paths = model_repeated_pruning("./models/model_weights_ResNet50_224_resize.pth", sparsity=0.33, iterations=5)


  model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))


Model weights loaded from ./models/model_weights_ResNet50_224_resize.pth


  model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))


Iteration 0 - Model Sparsity: 0.00%
Pruning iteration 1
Sparsity for the model overall at Iteration 1: 32.96%, Size MB: 567.609062
Pruning iteration 2
Sparsity for the model overall at Iteration 2: 55.05%, Size MB: 380.443878
Pruning iteration 3
Sparsity for the model overall at Iteration 3: 69.84%, Size MB: 255.07703
Pruning iteration 4
Sparsity for the model overall at Iteration 4: 79.76%, Size MB: 171.102054
Pruning iteration 5
Sparsity for the model overall at Iteration 5: 86.40%, Size MB: 114.853414

Final Results DataFrame:
   iteration  sparsity_model     size_mb
0          0        0.000000  847.002150
1          1       32.962769  567.609062
2          2       55.047824  380.443878
3          3       69.844811  255.077030
4          4       79.758793  171.102054
5          5       86.401161  114.853414


In [None]:
path = "./models/"


for i in range(1,6):
    model_path = f"./models/pruned_model_{i}.pth"
    print(f"Model: {model_path}")
    model, devie = load_asl_model(model_path)
    print(model)
    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]
    print(prune_params[0][0].weight[2][1])

In [16]:
def convert_to_coreml(model_path):

    # Define the labels for the classes (A-Z, del, nothing, space)
    labels = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
        'del', 'nothing', 'space'
    ]
        
    # Load the trained PyTorch model
    # Ensure the model architecture matches the one used during training
    num_classes = len(labels)  # 29 classes
    model = models.resnet50(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)


    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    model.eval()

    preprocess = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to 224x224
    transforms.ToTensor(),          # Convert image to tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),  # Normalize for ResNet
    ])


    # 1. Create an example input tensor resized to 224x224
    example_input = torch.rand(1, 3, 224, 224)  # Batch size 1, 3 color channels, 224x224 image

    # 2. Convert the model to TorchScript
    traced_model = torch.jit.trace(model, example_input)
    print("Model converted to TorchScript format")

    # 3. Convert the TorchScript model to CoreML format
    mlmodel = ct.convert(
        traced_model,
        inputs=[ct.ImageType(
        name="image",
        shape=example_input.shape,
        channel_first=True,
        scale=1/255.0,  # Scale input to [0,1]
        bias=[-0.485/0.229, -0.456/0.224, -0.406/0.225],  # Mean divided by std for normalization
        color_layout="RGB",
        )],
        classifier_config=ct.ClassifierConfig(class_labels=labels, predicted_feature_name="classLabel"),
        convert_to="neuralnetwork",
    )
    print("Model converted to CoreML format")

    # 4. Apply Post-Training Quantization to INT8
    # quantized_mlmodel = ct.models.neural_network.quantization_utils.quantize_weights(
    #     mlmodel, nbits=8  # Use 8-bit integer quantization
    # )
    # print("Model quantized to INT8")

    # 5. Save the CoreML model
    mlmodel_path = model_path.replace(".pth", ".mlmodel")  # Desired output filename
    mlmodel.save(mlmodel_path)
    print(f"CoreML model saved as {mlmodel_path}")

In [17]:
# Convert each pytoch model to CoreML for testing
for model_path in model_paths[1:]:
    convert_to_coreml(model_path)

  model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))


Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 6265.02 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 240.49 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 191.33 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 421.58 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 149.82 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_1.mlmodel
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 8305.81 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 241.20 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 178.63 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 396.86 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 144.19 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_2.mlmodel
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 7631.84 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 227.77 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 174.69 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 402.72 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 139.77 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_3.mlmodel
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 8646.10 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 225.59 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 188.55 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 418.53 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 149.14 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_4.mlmodel
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 8130.82 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 237.53 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 191.80 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 408.18 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:03<00:00, 144.66 ops/s] 


Model converted to CoreML format
CoreML model saved as models/pruned_model_5.mlmodel


| Iteration | Sparsity (%) | Accuracy | Latency (s) | Disk Size (MB) |
| --------- | ------------ | -------- | ----------- | -------------- |
|     0     |   0.0%       |  0.7304  |  0.0067168  |      847.00    |
|     1     |   32.96%     |  0.7035  |  0.0049252  |      567.61    |
|     2     |   55.05%     |  0.3448  |  0.0052006  |      380.44    |
|     3     |   69.84%     |  0.0379  |  0.0038571  |      255.08    |
|     4     |   79.76%     |  0.0357  |  0.0040812  |      171.10    |
|     5     |   86.40%     |  0.0357  |  0.0037301  |      114.85    |


### [2 points] 2. Choose two of the following three options to implement on your model, device, and data (1 point per option):

1. Implement a structured pruning technique. You may prune dimensions of matrices, attention heads, entire layers, etc. Describe your strategy and report the results in a table, adjusting the "sparsity rate" column and as needed.

    Fill in the following table with your results (choose any 2-3 pruned models to compare to the unpruned model):

    Channel (dimension) pruning

    | Structure Pruned | Sparsity Rate | Accuracy | Latency (s) | Disk Size (MB) |
    | ---------------- | ------------- | -------- | ----------- | -------------- |
    | Conv Layers      |    10%        |  0.6755  |  0.004057   |     75.6       |
    | Conv Layers      |    20%        |  0.3775  |  0.002604   |     59.8       |
    | Conv Layers      |    30%        |  0.1478  |  0.001871   |     45.5       |


In [None]:
#[2 points] 2. Choose two of the following three options to implement on your model, device, and data (1 point per option):
#1. Implement a structured pruning technique. You may prune dimensions of matrices, attention heads, entire layers, etc. Describe your strategy and report the results in a table, adjusting the "sparsity rate" column and as needed.
# Prune by magnitutude of combined parameters in a Conv2d filer a) 64x64 b) 128x128 c)256x256 and its following batch norm
#2. Unstructured magnitutude pruning of a)64 input layers b)128 input layers c)256 input layers

In [24]:
def model_structured_pruning(model_path, sparsity, pruned_layer):
    model, device = load_asl_model(model_path)
    model.eval()

    labels = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
        'del', 'nothing', 'space'
    ]
        
    # Load the trained PyTorch model
    # Ensure the model architecture matches the one used during training
    num_classes = len(labels)  # 29 classes

    example_inputs = torch.randn(1, 3, 224, 224)

    # 1. Importance criterion
    imp = tp.importance.GroupNormImportance(p=2) # or GroupTaylorImportance(), GroupHessianImportance(), etc.

    # 2. Initialize a pruner with the model and the importance criterion
    ignored_layers = []
    for m in model.modules():
        if isinstance(m, (torch.nn.Linear)):
            ignored_layers.append(m) # DO NOT prune the final classifier!

    print(ignored_layers)

    pruner = tp.pruner.MetaPruner( # We can always choose MetaPruner if sparse training is not required.
        model,
        example_inputs,
        importance=imp,
        pruning_ratio=sparsity, # remove 50% channels, ResNet18 = {64, 128, 256, 512} => ResNet18_Half = {32, 64, 128, 256}
        # pruning_ratio_dict = {model.conv1: 0.2, model.layer2: 0.8}, # customized pruning ratios for layers or blocks
        ignored_layers=ignored_layers,
        round_to=4, # It's recommended to round dims/channels to 4x or 8x for acceleration. Please see: https://docs.nvidia.com/deeplearning/performance/dl-performance-convolutional/index.html
    )

    # 3. Prune Model
    base_macs, base_nparams = tp.utils.count_ops_and_params(model, example_inputs)
    pruner.step()
    macs, nparams = tp.utils.count_ops_and_params(model, example_inputs)
    print(f"MACs: {base_macs/1e9} G -> {macs/1e9} G, #Params: {base_nparams/1e6} M -> {nparams/1e6} M")

    save_path = f"models/structued_pruning_{pruned_layer}.pth"
    torch.save(model.state_dict(), save_path)
    print(f"Saved model to {save_path}")

    # 1. Create an example input tensor resized to 224x224
    example_input = torch.rand(1, 3, 224, 224)  # Batch size 1, 3 color channels, 224x224 image

    # 2. Convert the model to TorchScript
    traced_model = torch.jit.trace(model, example_input)
    print("Model converted to TorchScript format")

    # 3. Convert the TorchScript model to CoreML format
    mlmodel = ct.convert(
        traced_model,
        inputs=[ct.ImageType(
        name="image",
        shape=example_input.shape,
        channel_first=True,
        scale=1/255.0,  # Scale input to [0,1]
        bias=[-0.485/0.229, -0.456/0.224, -0.406/0.225],  # Mean divided by std for normalization
        color_layout="RGB",
        )],
        classifier_config=ct.ClassifierConfig(class_labels=labels, predicted_feature_name="classLabel"),
        convert_to="neuralnetwork",
    )
    print("Model converted to CoreML format")

    # 4. Apply Post-Training Quantization to INT8
    # quantized_mlmodel = ct.models.neural_network.quantization_utils.quantize_weights(
    #     mlmodel, nbits=8  # Use 8-bit integer quantization
    # )
    # print("Model quantized to INT8")

    # 5. Save the CoreML model
    mlmodel_path = save_path.replace(".pth", ".mlmodel")  # Desired output filename
    mlmodel.save(mlmodel_path)
    print(f"CoreML model saved as {mlmodel_path}")
    
    return save_path
    

In [25]:
strucuted_model_paths = []
strucuted_model_paths.append(model_structured_pruning(model_path="./models/model_weights_ResNet50_224_resize.pth", sparsity=0.1, pruned_layer="10"))
strucuted_model_paths.append(model_structured_pruning(model_path="./models/model_weights_ResNet50_224_resize.pth", sparsity=0.2, pruned_layer="20"))
strucuted_model_paths.append(model_structured_pruning(model_path="./models/model_weights_ResNet50_224_resize.pth", sparsity=0.3, pruned_layer="30"))

  model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))


[Linear(in_features=2048, out_features=29, bias=True)]
MACs: 4.119935517 G -> 3.267928657 G, #Params: 23.567453 M -> 18.920045 M
Saved model to models/structued_pruning_10.pth
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 5374.18 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 180.14 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 166.92 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 389.96 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:02<00:00, 179.45 ops/s] 


Model converted to CoreML format
CoreML model saved as models/structued_pruning_10.mlmodel
[Linear(in_features=2048, out_features=29, bias=True)]
MACs: 4.119935517 G -> 2.580514873 G, #Params: 23.567453 M -> 14.963713 M
Saved model to models/structued_pruning_20.pth
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 8324.06 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 224.17 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 191.64 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 203.63 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:02<00:00, 225.87 ops/s] 


Model converted to CoreML format
CoreML model saved as models/structued_pruning_20.mlmodel
[Linear(in_features=2048, out_features=29, bias=True)]
MACs: 4.119935517 G -> 1.999082993 G, #Params: 23.567453 M -> 11.389361 M
Saved model to models/structued_pruning_30.pth
Model converted to TorchScript format


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 440/441 [00:00<00:00, 7338.97 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 245.19 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 188.28 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 390.32 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 503/503 [00:01<00:00, 294.45 ops/s] 


Model converted to CoreML format
CoreML model saved as models/structued_pruning_30.mlmodel


2. Conduct a sensitivity analysis of pruning (structured or unstructured) different components of your model. For instance, what happens to your model's performance when you prune input embeddings vs hidden layer weights? Do earlier layers seem more or less important than later layers? You are not required to conduct a thorough study, but you should be able to draw a couple concrete conclusions.

    Fill in the following table with your results (choose any 2-3 pruned models to compare to the unpruned model):

    |        Pruning Technique        |  Sparsity Rate  | Accuracy | Latency (s) | Disk Size (MB) |
    | ------------------------------- | --------------- | -------- | ----------- | -------------- |
    | Unstructured, LayerNorm         |  20%      |          |             |                |
    | Unstructured, LayerNorm         |  40%      |          |             |                |
    | Unstructured, Conv2d            |  20%      |          |             |                |
    | Unstructured, Conv2d            |  40%      |          |             |                |

In [None]:
def model_sensitivity_pruning(model_path, sparsity_list):

    # Create empty DataFrame to store results
    results_df = pd.DataFrame(columns=[
        'iteration',
        'sparsity_model',
        'size_mb'
    ])

    # Define the labels for the classes (A-Z, del, nothing, space)

    model, device = load_asl_model(model_path)
    print(f"Model weights loaded from {model_path}")

    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    # Store initial results (iteration 0)
    sparsity_results = calculate_sparsity(model)
    size = evaluate_model(model)
    
    results_df.loc[0] = {
        'iteration': 0,
        'sparsity_model': sparsity_results['sparsity_model'],
        'size_mb': size/1e6
    }

    model_path_list = [model_path]

    model, device = load_asl_model(model_path)
    prune_params = [(m[1], "weight") for m in model.named_modules() if len(list(m[1].children()))==0 and not isinstance(m[1], (nn.ReLU, nn.MaxPool2d, nn.AdaptiveAvgPool2d))]

    print(f"Iteration 0 - Model Sparsity: {sparsity_results['sparsity_model']:.2f}%")
    for layer in [nn.Conv2d, nn.LayerNorm]
    for sparsity in sparsity_list:
        print(f"Pruning iteration {i}")

        prune.global_unstructured(prune_params, pruning_method=prune.L1Unstructured, amount=sparsity)

        saved_model_path = f"models/pruned_model_{i}.pth"
        save_copy_of_model(model, saved_model_path)
        model_path_list.append(saved_model_path)

        size = sparse_evaluate(model, device)
        sparsity_results = calculate_sparsity(model)


        # Store results in DataFrame
        results_df.loc[i] = {
            'iteration': i,
            'sparsity_model': sparsity_results['sparsity_model'],
            'size_mb': size/1e6
        }

        print(f"Sparsity for the model overall at Iteration {i}: {sparsity_results['sparsity_model']:.2f}%, Size MB: {size/1e6}")
    
    
    # Display the DataFrame
    print("\nFinal Results DataFrame:")
    print(results_df)
    
    return results_df, model_path_list

In [None]:
model_sens_list = model_sensitivity_pruning("./models/model_weights_ResNet50_224_resize.pth", sparsity_list=[0.2,0.4])