In [1]:
!pip install datasets



In [1]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer, DistilBertConfig
from datasets import load_dataset
from sklearn.metrics import accuracy_score
import torch
import os

In [2]:
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')

In [3]:
model.eval()

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [4]:
# Using the validation dataset to compare the two models performance
dataset = load_dataset("glue", "sst2", split="validation")

In [5]:
def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=512)

In [6]:
def get_model_size(path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size


In [7]:
def evaluate_model(model, dataset, num_samples=260):
    model.eval()
    with torch.no_grad():
        # Take a subset for evaluation
        subset = dataset.select(range(num_samples))
        inputs = torch.tensor(subset['input_ids'])
        labels = torch.tensor(subset['label'])
        attention_masks = torch.tensor(subset['attention_mask'])

        outputs = model(inputs, attention_mask=attention_masks)
        predictions = torch.argmax(outputs.logits, dim=1)
        accuracy = accuracy_score(labels, predictions)
        return accuracy

In [8]:
tokenized_dataset = dataset.map(tokenize_function, batched=True)

In [9]:
# Using pytorch quantization to quantize the model to int8
quantized_model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)

In [10]:
# Evaluate original model
original_accuracy = evaluate_model(model, tokenized_dataset)
print(f"Original Model Accuracy: {original_accuracy}")

Original Model Accuracy: 0.9153846153846154


In [11]:
# Evaluate quantized model
quantized_accuracy = evaluate_model(quantized_model, tokenized_dataset)
print(f"Quantized Model Accuracy: {quantized_accuracy}")

Quantized Model Accuracy: 0.9115384615384615


In [12]:
# Save the configuration
quantized_model_path = "/content/distilbert-quantized"
model_config = DistilBertConfig.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')
model_config.save_pretrained(quantized_model_path)

# Save the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')
tokenizer.save_pretrained(quantized_model_path)

# Save the model
torch.save(quantized_model.state_dict(), f"{quantized_model_path}/pytorch_model.bin")


In [13]:
# Load the configuration and tokenizer
config = DistilBertConfig.from_pretrained(quantized_model_path)
tokenizer = DistilBertTokenizer.from_pretrained(quantized_model_path)

# Re-create the original model
model = DistilBertForSequenceClassification(config)

# Apply dynamic quantization (if the model was dynamically quantized)
quantized_model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)

# Load the saved state dictionary
quantized_model.load_state_dict(torch.load(f"{quantized_model_path}/pytorch_model.bin"))


  device=storage.device,


<All keys matched successfully>

In [14]:
# Saving the original model for comparison
original_model_path = "/content/distilbert-original"
model.save_pretrained(original_model_path)

In [15]:
# Get the size of the original and quantized models
original_size = get_model_size(original_model_path) / (1024 * 1024)  # Convert to MB
quantized_size = get_model_size(quantized_model_path) / (1024 * 1024)  # Convert to MB

In [16]:
# Evaluate original and quantized models
print(f"Original Model Size: {original_size:.2f} MB")
print(f"Original Model Accuracy: {original_accuracy}")
print(f"Quantized Model Size: {quantized_size:.2f} MB")
print(f"Quantized Model Accuracy: {quantized_accuracy}")

Original Model Size: 255.43 MB
Original Model Accuracy: 0.9153846153846154
Quantized Model Size: 132.51 MB
Quantized Model Accuracy: 0.9115384615384615
