In [None]:
# Language Model Quantization and Tokenization


In [None]:
## Section 1: Replicating the Andrej Karpathy Tokenizer Code


In [27]:
# This code is based on Andrej Karpathy's implementation of a simple tokenizer.
# The tokenizer will tokenize a string into a list of individual characters.

class SimpleTokenizer:
    def __init__(self, text):
        # Initialize the tokenizer with the given text
        self.text = text
        # Create a dictionary to map each unique character to an index
        self.char_to_idx = {ch: idx for idx, ch in enumerate(sorted(set(text)))}
        # Reverse dictionary to map indexes back to characters
        self.idx_to_char = {idx: ch for ch, idx in self.char_to_idx.items()}

    def encode(self):
        # Convert each character in the text to its corresponding index
        return [self.char_to_idx[ch] for ch in self.text]

    def decode(self, encoded_text):
        # Convert a list of indices back to the original string
        return ''.join([self.idx_to_char[idx] for idx in encoded_text])

# Example usage
text = "hello world"
tokenizer = SimpleTokenizer(text)

# Encode the text into a list of indices
encoded_text = tokenizer.encode()
print("Encoded Text:", encoded_text)

# Decode the indices back to the original text
decoded_text = tokenizer.decode(encoded_text)
print("Decoded Text:", decoded_text)


Encoded Text: [3, 2, 4, 4, 5, 0, 7, 5, 6, 4, 1]
Decoded Text: hello world


In [None]:
## Section 2: Convert a Language Model to int8 Using Optimum


In [28]:
# Install necessary libraries for model conversion and quantization
!pip install transformers optimum[onnxruntime] onnxruntime onnx

# Import required modules
from transformers import DistilBertModel, DistilBertTokenizer
from onnxruntime.quantization import quantize_dynamic, QuantType
import torch
import onnx

# Load DistilBERT model and tokenizer
model_name = "distilbert-base-uncased"
model = DistilBertModel.from_pretrained(model_name)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)

# Export the model to ONNX with opset 14
onnx_model_path = "distilbert_base_uncased.onnx"
dummy_input = torch.randint(0, 100, (1, 10))  # Dummy input for ONNX export
torch.onnx.export(model, dummy_input, onnx_model_path, opset_version=14)

# Perform dynamic quantization on the exported ONNX model
quantized_model_path = "quantized_distilbert_base_uncased.onnx"
quantize_dynamic(onnx_model_path, quantized_model_path, weight_type=QuantType.QInt8)

# Load the quantized model to verify it was saved correctly
quantized_model = onnx.load(quantized_model_path)
onnx.checker.check_model(quantized_model)

print(f"Quantized DistilBERT model saved to {quantized_model_path}")






Quantized DistilBERT model saved to quantized_distilbert_base_uncased.onnx
