In [1]:
!pip install datasets
import torch
import numpy as np
import random
from datasets import load_dataset
from transformers import BertForSequenceClassification, BertTokenizer, Trainer, TrainingArguments
import os
import time

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Load IMDB dataset and create subsets
dataset = load_dataset('imdb')
small_train_dataset = dataset["train"].shuffle(seed=42).select([i for i in range(1500)])
small_test_dataset = dataset["test"].shuffle(seed=42).select([i for i in range(500)])

# Split the training dataset into train and validation sets
train_val_split = small_train_dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_val_split["train"]
validation_dataset = train_val_split["test"]

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(validation_dataset)}")
print(f"Test set size: {len(small_test_dataset)}")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Training set size: 1200
Validation set size: 300
Test set size: 500


In [2]:
# Load tokenizer and tokenize datasets
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
validation_dataset = validation_dataset.map(tokenize_function, batched=True)
small_test_dataset = small_test_dataset.map(tokenize_function, batched=True)

train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
validation_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
small_test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

print("Tokenization complete.")

Tokenization complete.


In [3]:
# Load BERT model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
print(f"Model loaded with {sum(p.numel() for p in model.parameters()):,} parameters.")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded with 109,483,778 parameters.


In [4]:
from torch.nn.utils import prune

def calculate_pruning_impact(model):
    total_params = sum(p.numel() for p in model.parameters())
    total_pruned_weights = 0

    for transformer_layer in model.bert.encoder.layer:
        weights = transformer_layer.intermediate.dense.weight.detach().cpu()
        total_pruned_weights += (weights == 0).sum().item()

    remaining_params = total_params - total_pruned_weights
    percent_pruned = (total_pruned_weights / total_params) * 100
    print(f"Total Parameters: {total_params:,}, Pruned Parameters: {total_pruned_weights:,}, Pruned Percentage: {percent_pruned:.2f}%")
    return total_params, total_pruned_weights, remaining_params, percent_pruned

def apply_structured_pruning(layer, amount=0.2):
    prune.ln_structured(layer, name="weight", amount=amount, n=1, dim=0)

In [5]:
print("Calculating original model parameters...")
original_model_memory, _, _, _ = calculate_pruning_impact(model)

# Apply structured pruning to all layers
for i, transformer_layer in enumerate(model.bert.encoder.layer):
    apply_structured_pruning(transformer_layer.intermediate.dense, amount=0.2)
    apply_structured_pruning(transformer_layer.output.dense, amount=0.2)

print("Calculating pruned model parameters...")
_, _, pruned_model_memory, _ = calculate_pruning_impact(model)

Calculating original model parameters...
Total Parameters: 109,483,778, Pruned Parameters: 0, Pruned Percentage: 0.00%
Calculating pruned model parameters...
Total Parameters: 109,483,778, Pruned Parameters: 5,658,624, Pruned Percentage: 5.17%


In [6]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    accuracy = accuracy_score(labels, predictions)
    return {"accuracy": accuracy}

In [7]:
from transformers import EarlyStoppingCallback
from sklearn.metrics import accuracy_score

training_args = TrainingArguments(
    output_dir="./structured_pruned_bert_output",
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=10,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir="./structured_pruned_logs",
    report_to="none",
    seed=42,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

print("Training structured pruned model...")
trainer.train()

  trainer = Trainer(


Training structured pruned model...


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7038,0.69249,0.603333
2,0.6989,0.726501,0.47
3,0.684,0.605552,0.673333
4,0.5899,0.67982,0.726667
5,0.5205,0.540756,0.7
6,0.4812,0.523099,0.773333
7,0.4462,0.530244,0.76
8,0.352,0.590401,0.76


TrainOutput(global_step=1200, training_loss=0.5595565827687582, metrics={'train_runtime': 460.9148, 'train_samples_per_second': 26.035, 'train_steps_per_second': 3.254, 'total_flos': 631466532864000.0, 'train_loss': 0.5595565827687582, 'epoch': 8.0})

In [8]:
# Evaluate pruned model on validation set
pruned_validation_results = trainer.evaluate(eval_dataset=validation_dataset)
pruned_validation_accuracy = pruned_validation_results["eval_accuracy"] * 100
print(f"Pruned Model Validation Accuracy: {pruned_validation_accuracy:.2f}%")

# Evaluate pruned model on test set
pruned_test_results = trainer.evaluate(eval_dataset=small_test_dataset)
pruned_test_accuracy = pruned_test_results["eval_accuracy"] * 100
print(f"Pruned Model Test Accuracy: {pruned_test_accuracy:.2f}%")

Pruned Model Validation Accuracy: 77.33%
Pruned Model Test Accuracy: 73.40%


In [9]:
def measure_inference_time(model, dataset, batch_size=8, num_batches=10):
    model.eval()
    model.to('cuda')
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)

    times = []
    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            if i >= num_batches:
                break
            inputs = {k: v.to('cuda') for k, v in batch.items() if k in ['input_ids', 'attention_mask']}
            torch.cuda.synchronize()
            start_time = time.time()
            _ = model(**inputs)
            torch.cuda.synchronize()
            end_time = time.time()
            times.append(end_time - start_time)

    avg_time = sum(times) / len(times)
    return avg_time

# Measure inference time for pruned model
pruned_model_avg_time = measure_inference_time(model, small_test_dataset)
print(f"Pruned Model Inference Time: {pruned_model_avg_time:.4f} seconds")

Pruned Model Inference Time: 0.0557 seconds


In [10]:
# Load and train original model
original_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

original_trainer = Trainer(
    model=original_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

print("Training original model...")
original_trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  original_trainer = Trainer(


Training original model...


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5577,0.439133,0.826667
2,0.3596,0.480858,0.86
3,0.1913,0.525699,0.85


TrainOutput(global_step=450, training_loss=0.3695038562350803, metrics={'train_runtime': 226.896, 'train_samples_per_second': 52.888, 'train_steps_per_second': 6.611, 'total_flos': 236799949824000.0, 'train_loss': 0.3695038562350803, 'epoch': 3.0})

In [11]:
# Load and train original model
original_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

original_trainer = Trainer(
    model=original_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

print("Training original model...")
original_trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  original_trainer = Trainer(


Training original model...


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5808,0.39434,0.83
2,0.3649,0.577436,0.846667
3,0.2008,0.567942,0.863333


TrainOutput(global_step=450, training_loss=0.3821798748440213, metrics={'train_runtime': 173.8256, 'train_samples_per_second': 69.035, 'train_steps_per_second': 8.629, 'total_flos': 236799949824000.0, 'train_loss': 0.3821798748440213, 'epoch': 3.0})

In [12]:
# Measure inference time for original model
original_model_avg_time = measure_inference_time(original_model, small_test_dataset)
print(f"Original Model Inference Time: {original_model_avg_time:.4f} seconds")

Original Model Inference Time: 0.0577 seconds


In [13]:
# Calculate total parameters for original model
original_model_memory = sum(p.numel() for p in original_model.parameters())
print(f"Original Model Parameters: {original_model_memory:,}")

Original Model Parameters: 109,483,778


In [16]:
original_test_results = original_trainer.evaluate(eval_dataset=small_test_dataset)
original_model_accuracy_score = original_test_results["eval_accuracy"] * 100
print(f"Original Model Test Accuracy: {original_model_accuracy_score:.2f}%")

Original Model Test Accuracy: 83.60%


In [17]:
def write_file(
    pruning_type,
    dataset_used,
    num_samples,
    original_model_memory,
    original_model_accuracy_score,
    original_model_avg_time,
    pruned_model_memory,
    pruned_model_accuracy_score,
    pruned_model_avg_time
):
    file_name = f"{pruning_type}_pruning_summary.txt"
    content = (
        f"Pruning Method: {pruning_type.capitalize()} Pruning\n"
        f"Dataset Used: {dataset_used}\n"
        f"Number of Samples for Inference: {num_samples}\n\n"
        f"Original Model Parameters: {original_model_memory:,}\n"
        f"Original Model Accuracy (%): {original_model_accuracy_score:.2f}\n"
        f"Original Model Inference Time (avg seconds): {original_model_avg_time:.4f}\n\n"
        f"Pruned Model Parameters: {pruned_model_memory:,}\n"
        f"Pruned Model Accuracy (%): {pruned_model_accuracy_score:.2f}\n"
        f"Pruned Model Inference Time (avg seconds): {pruned_model_avg_time:.4f}\n"
    )
    file_path = os.path.join(os.getcwd(), file_name)
    with open(file_path, "w") as file:
        file.write(content)
    print(f"Summary saved to: {file_path}")

write_file(
    pruning_type="structured",
    dataset_used="IMDB",
    num_samples=len(small_test_dataset),
    original_model_memory=original_model_memory,
    original_model_accuracy_score=original_model_accuracy_score,
    original_model_avg_time=original_model_avg_time,
    pruned_model_memory=pruned_model_memory,
    pruned_model_accuracy_score=pruned_test_accuracy,
    pruned_model_avg_time=pruned_model_avg_time
)



Summary saved to: /content/structured_pruning_summary.txt
