In [8]:
!pip install datasets
import torch
import numpy as np
import random
from datasets import load_dataset
from transformers import BertForSequenceClassification, BertTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback
from sklearn.metrics import accuracy_score
import os
import time

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)



In [9]:
# Load and split the IMDB dataset
dataset = load_dataset('imdb')
small_train_dataset = dataset["train"].shuffle(seed=42).select([i for i in range(1500)])
small_test_dataset = dataset["test"].shuffle(seed=42).select([i for i in range(500)])

# Split the training dataset into training and validation sets
train_val_split = small_train_dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_val_split["train"]
validation_dataset = train_val_split["test"]

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(validation_dataset)}")
print(f"Test set size: {len(small_test_dataset)}")

Training set size: 1200
Validation set size: 300
Test set size: 500


In [10]:
# Load tokenizer and tokenize datasets
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
validation_dataset = validation_dataset.map(tokenize_function, batched=True)
small_test_dataset = small_test_dataset.map(tokenize_function, batched=True)

train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
validation_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
small_test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

In [11]:
# Load the model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
print(f"Model loaded with {sum(p.numel() for p in model.parameters())} parameters.")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded with 109483778 parameters.


In [12]:
# Define pruning functions
from torch.nn.utils import prune

def calculate_pruning_impact(model):
    total_params = sum(p.numel() for p in model.parameters())
    total_intermediate_weights = 0
    total_intermediate_zero_weights = 0

    for transformer_layer in model.bert.encoder.layer:
        weights = transformer_layer.intermediate.dense.weight.detach().cpu()
        total_intermediate_weights += weights.numel()
        total_intermediate_zero_weights += (weights == 0).sum().item()

    remaining_params = total_params - total_intermediate_zero_weights
    percent_deducted = (total_intermediate_zero_weights / total_params) * 100
    print(f"Total Parameters: {total_params}, Zero Parameters: {total_intermediate_zero_weights}, Remaining Parameters: {remaining_params}, Pruned Percentage: {percent_deducted:.2f}%")
    return total_params, total_intermediate_zero_weights, remaining_params, percent_deducted

def prune_layer(layer, amount=0.2):
    prune.l1_unstructured(layer, name="weight", amount=amount)

In [13]:
print("Calculating original model parameters...")
original_model_memory, _, _, _ = calculate_pruning_impact(model)

for layer in model.bert.encoder.layer:
    prune_layer(layer.intermediate.dense, amount=0.2)
    prune_layer(layer.output.dense, amount=0.2)

print("Calculating pruned model parameters...")
_, _, pruned_model_memory, _ = calculate_pruning_impact(model)

Calculating original model parameters...
Total Parameters: 109483778, Zero Parameters: 0, Remaining Parameters: 109483778, Pruned Percentage: 0.00%
Calculating pruned model parameters...
Total Parameters: 109483778, Zero Parameters: 5662308, Remaining Parameters: 103821470, Pruned Percentage: 5.17%


In [14]:
# Training arguments for the pruned model
training_args = TrainingArguments(
    output_dir="./pruned_bert_output",
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=10,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir="./pruned_logs",
    report_to="none",
    seed=42,
    load_best_model_at_end=True,
)

# Trainer for the pruned model with EarlyStoppingCallback
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

trainer.train()



Epoch,Training Loss,Validation Loss
1,0.5614,0.484335
2,0.3568,0.627354
3,0.2013,0.612039


TrainOutput(global_step=450, training_loss=0.3731534915500217, metrics={'train_runtime': 144.556, 'train_samples_per_second': 83.013, 'train_steps_per_second': 10.377, 'total_flos': 236799949824000.0, 'train_loss': 0.3731534915500217, 'epoch': 3.0})

In [16]:
# Training arguments for the original model
original_training_args = TrainingArguments(
    output_dir="./original_bert_output",
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=10,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir="./original_logs",
    report_to="none",
    seed=42,
    load_best_model_at_end=True,
)

# Load the original model
original_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

#trainer for the original model with EarlyStoppingCallback
original_trainer = Trainer(
    model=original_model,
    args=original_training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

original_trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.7158,0.689144
2,0.5923,0.454334
3,0.3757,0.507343
4,0.1701,0.683319
5,0.0721,0.723657


TrainOutput(global_step=750, training_loss=0.38518287022908526, metrics={'train_runtime': 216.6718, 'train_samples_per_second': 55.383, 'train_steps_per_second': 6.923, 'total_flos': 394666583040000.0, 'train_loss': 0.38518287022908526, 'epoch': 5.0})

In [17]:
# Evaluation function for metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    return {"accuracy": accuracy_score(labels, preds)}

trainer.compute_metrics = compute_metrics
original_trainer.compute_metrics = compute_metrics

# Evaluate both models on the validation set
pruned_validation_results = trainer.evaluate(eval_dataset=validation_dataset)
original_validation_results = original_trainer.evaluate(eval_dataset=validation_dataset)

print(f"Pruned Model Validation Accuracy: {pruned_validation_results['eval_accuracy'] * 100:.2f}%")
print(f"Original Model Validation Accuracy: {original_validation_results['eval_accuracy'] * 100:.2f}%")

Pruned Model Validation Accuracy: 78.00%
Original Model Validation Accuracy: 77.00%


In [18]:
pruned_test_results = trainer.evaluate(eval_dataset=small_test_dataset)
original_test_results = original_trainer.evaluate(eval_dataset=small_test_dataset)

print(f"Pruned Model Test Accuracy: {pruned_test_results['eval_accuracy'] * 100:.2f}%")
print(f"Original Model Test Accuracy: {original_test_results['eval_accuracy'] * 100:.2f}%")

Pruned Model Test Accuracy: 75.00%
Original Model Test Accuracy: 79.40%


In [19]:
def measure_inference_time(model, dataset, batch_size=8, num_batches=10):
    model.eval()
    model.to('cuda')
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)

    times = []
    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            if i >= num_batches:
                break
            inputs = {k: v.to('cuda') for k, v in batch.items() if k in ['input_ids', 'attention_mask']}
            torch.cuda.synchronize()
            start_time = time.time()
            _ = model(**inputs)
            torch.cuda.synchronize()
            end_time = time.time()
            times.append(end_time - start_time)

    avg_time = sum(times) / len(times)
    return avg_time

pruned_inference_time = measure_inference_time(model, small_test_dataset)
non_pruned_inference_time = measure_inference_time(original_model, small_test_dataset)

print(f"Pruned Model Inference Time: {pruned_inference_time:.4f} seconds")
print(f"Original Model Inference Time: {non_pruned_inference_time:.4f} seconds")

Pruned Model Inference Time: 0.0560 seconds
Original Model Inference Time: 0.0557 seconds


In [20]:
def write_file(
    pruning_type,
    dataset_used,
    num_samples,
    original_model_memory,
    original_model_accuracy_score,
    original_model_avg_time,
    pruned_model_memory,
    pruned_model_accuracy_score,
    pruned_model_avg_time
):
    file_name = f"{pruning_type}_pruning_summary.txt"
    content = (
        f"Pruning Method: {pruning_type.capitalize()} Pruning\n"
        f"Dataset Used: {dataset_used}\n"
        f"Number of Samples for Inference: {num_samples}\n\n"
        f"Original Model Parameters: {original_model_memory:,}\n"
        f"Original Model Accuracy (%): {original_model_accuracy_score:.2f}\n"
        f"Original Model Inference Time (avg seconds): {original_model_avg_time:.4f}\n\n"
        f"Pruned Model Parameters: {pruned_model_memory:,}\n"
        f"Pruned Model Accuracy (%): {pruned_model_accuracy_score:.2f}\n"
        f"Pruned Model Inference Time (avg seconds): {pruned_model_avg_time:.4f}\n"
    )
    file_path = os.path.join(os.getcwd(), file_name)
    with open(file_path, "w") as file:
        file.write(content)
    print(f"Summary saved to: {file_path}")

In [21]:
write_file(
    pruning_type="unstructured",
    dataset_used="IMDB",
    num_samples=len(small_test_dataset),
    original_model_memory=original_model_memory,
    original_model_accuracy_score=original_test_results['eval_accuracy'] * 100,
    original_model_avg_time=non_pruned_inference_time,
    pruned_model_memory=pruned_model_memory,
    pruned_model_accuracy_score=pruned_test_results['eval_accuracy'] * 100,
    pruned_model_avg_time=pruned_inference_time
)

Summary saved to: /content/unstructured_pruning_summary.txt
