In [2]:
import torch
from transformers import BertForSequenceClassification, BertTokenizer
from sklearn.metrics import accuracy_score
import time

In [4]:
# Load the pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
model = BertForSequenceClassification.from_pretrained(model_name)
tokenizer = BertTokenizer.from_pretrained(model_name)

# Define training and test data
train_texts = ["This is the first sentence.", "Another sentence for training."]
train_labels = [1, 0]
test_texts = ["A test sentence.", "Yet another test sentence."]
test_labels = [1, 1]

# Tokenize and encode the training data
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
train_input_ids = torch.tensor(train_encodings['input_ids'])
train_attention_mask = torch.tensor(train_encodings['attention_mask'])
train_labels = torch.tensor(train_labels)

# Tokenize and encode the test data
test_encodings = tokenizer(test_texts, truncation=True, padding=True)
test_input_ids = torch.tensor(test_encodings['input_ids'])
test_attention_mask = torch.tensor(test_encodings['attention_mask'])
test_labels = torch.tensor(test_labels)

# Create training dataset
train_dataset = torch.utils.data.TensorDataset(train_input_ids, train_attention_mask, train_labels)

# Fine-tune the BERT model on the training data
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True)

model.train()
for batch in train_loader:
    optimizer.zero_grad()
    input_ids, attention_mask, labels = batch
    outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()

# Evaluate the model on the test data
model.eval()
with torch.no_grad():
    outputs = model(input_ids=test_input_ids, attention_mask=test_attention_mask)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=1)
    accuracy = accuracy_score(predictions, test_labels)
    print("Model accuracy:", accuracy)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Model accuracy: 1.0


In [5]:
# # Load the pre-trained BERT model and tokenizer
# model_name = "bert-base-uncased"
# model = BertForSequenceClassification.from_pretrained(model_name)
# tokenizer = BertTokenizer.from_pretrained(model_name)

# # Define training and test data
# train_texts = ["This is the first sentence.", "Another sentence for training."]
# train_labels = [1, 0]
# test_texts = ["A test sentence.", "Yet another test sentence."]
# test_labels = [1, 1]

# # Tokenize and encode the training data
# train_encodings = tokenizer(train_texts, truncation=True, padding=True)
# train_labels = torch.tensor(train_labels)

# # Tokenize and encode the test data
# test_encodings = tokenizer(test_texts, truncation=True, padding=True)
# test_labels = torch.tensor(test_labels)

# # Fine-tune the BERT model on the training data
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
# train_dataset = torch.utils.data.TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True)

# model.train()
# for batch in train_loader:
#     optimizer.zero_grad()
#     input_ids, attention_mask, labels = batch
#     outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
#     loss = outputs.loss
#     loss.backward()
#     optimizer.step()

# # Evaluate the original model on the test data
# model.eval()
# with torch.no_grad():
#     input_ids = test_encodings['input_ids']
#     attention_mask = test_encodings['attention_mask']
#     labels = test_labels
#     outputs = model(input_ids=input_ids, attention_mask=attention_mask)
#     logits = outputs.logits
#     predictions = torch.argmax(logits, dim=1)
#     accuracy = accuracy_score(predictions, labels)
#     print("Original model accuracy:", accuracy)

# Save the original model
model.save_pretrained("original_model")

In [None]:
# Quantize the model
quantized_model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)

# Evaluate the quantized model on the test data
quantized_model.eval()
with torch.no_grad():
    #input_ids = test_encodings['input_ids']
    #attention_mask = test_encodings['attention_mask']
    outputs = quantized_model(input_ids=test_input_ids, attention_mask=test_attention_mask)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=1)
    accuracy = accuracy_score(predictions, labels)
    print("Quantized model accuracy:", accuracy)

# Save the quantized model, NOTE - save pre-trained did not work directly on quantized model
#quantized_model.save_pretrained("quantized_model")



In [8]:
torch.save(quantized_model.state_dict(), "quantized_model.pt")

In [11]:
!ls -1has original_model.pt

876104 original_model.pt


In [12]:
!ls -1has quantized_model.pt

360576 quantized_model.pt


In [None]:
# # Calculate model sizes
# original_model_size = torch.save(model.state_dict(), "original_model.pt")
# quantized_model_size = torch.save(quantized_model.state_dict(), "quantized_model.pt")
# print("Original model size:", original_model_size)
# print("Quantized model size:", quantized_model_size)

# # Measure inference time
# num_iterations = 1000

# # Inference time for the original model
# model_start_time = time.time()
# for _ in range(num_iterations):
#     with torch.no_grad():
#         model(input_ids=input_ids, attention_mask=attention_mask)
# model_end_time = time.time()
# model_inference_time = (model_end_time - model_start_time) / num_iterations
# print("Original model average inference time:", model_inference_time)

# # Inference time for the quantized model
# quantized_model_start_time = time.time()
# for _ in range(num_iterations):
#     with torch.no_grad():
#         quantized_model(input_ids=input_ids, attention_mask=attention_mask)
# quantized_model_end_time = time.time()
# quantized_model_inference_time = (quantized_model_end_time - quantized_model_start_time) / num_iterations
# print("Quantized model average inference time:", quantized_model_inference_time)
