In [None]:
import os
import torch
import pandas as pd
from transformers import RobertaModel, DistilBertModel, AutoModel
import torch.nn.utils.prune as prune

In [None]:
# Checking if GPU is available
print(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

In [None]:
def load_model(model_type, model_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)
    if model_path and os.path.exists(model_path):
        # Try loading as a generic PyTorch model
        try:
            model = torch.load(model_path, map_location=device)
        except Exception as e:
            raise IOError(f"Error loading model from {model_path}: {e}")
    elif model_type == 'distilroberta-base':
        model = DistilBertModel.from_pretrained('distilroberta-base')
    elif model_type == 'roberta-base':
        model = RobertaModel.from_pretrained('roberta-base')
    else:
        raise ValueError("Invalid model type or path")

    return model.to(device)

def check_sparsity(model):
    total_params = 0
    nonzero_params = 0
    layer_sparsity = {}
    for name, param in model.named_parameters():
        if not param.requires_grad:  # exclude non-trainable parameters
            continue
        layer_size = param.numel()
        layer_nonzero = torch.count_nonzero(param)
        layer_sparsity[name] = 1 - layer_nonzero.item() / layer_size
        total_params += layer_size
        nonzero_params += layer_nonzero.item()
    overall_sparsity = 1 - nonzero_params / total_params
    print(f"Overall Sparsity: {overall_sparsity:.4%}")
    layer_sparsity_df = pd.DataFrame(layer_sparsity.items(), columns=['Layer Name', 'Sparsity'])
    # layer_sparsity_df.sort_values(by='Sparsity', ascending=False, inplace=True)
    display(layer_sparsity_df)

def mpruner_layerwise(model, pruning_proportion):
    # Iterate over each layer in the model
    for name, module in model.named_modules():
        # Check if the module is a linear layer
        if isinstance(module, torch.nn.Linear):
            # Apply pruning to the layer
            prune.l1_unstructured(module, name='weight', amount=pruning_proportion)

    # After pruning, remove the reparametrization
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            prune.remove(module, 'weight')

    # Return the pruned model
    return model

In [None]:
model_type = 'roberta-base'     # Can be 'roberta-base', 'distilroberta-base', or a custom model path
model_path = None               # Set this to None if you want to use pre-trained models

# Loading model
model = load_model(model_type, model_path)
print(type(model))

In [None]:
# Checking sparsity before pruning
check_sparsity(model)

In [None]:
# Pruning model
pruning_rate = 0.2              # Between 0 and 1
pruned_model = mpruner_layerwise(model, pruning_rate)

In [None]:
# Checking sparsity after pruning
check_sparsity(pruned_model)

In [None]:
# Saving the model
filename = f"{model_type}-mpruned-layerwise-torch-{pruning_rate:.2f}.pt"
print(f"Saving model to {filename}")
torch.save(pruned_model, filename)
torch.cuda.empty_cache()