<a href="https://colab.research.google.com/github/joepareti54/joepareti54/blob/main/quantized-0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class LinearRegressionDataset(Dataset):
    def __init__(self):
        self.x = torch.tensor([1.0, 2.0, 3.0]).view(-1, 1)
        self.y = torch.tensor([2.0, 4.0, 6.0]).view(-1, 1)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return (self.x[idx], self.y[idx])

train_dataset = LinearRegressionDataset()
train_loader = DataLoader(train_dataset, batch_size=2)

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)

# Create the model
model = LinearRegression(1, 1)

# Create the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Define the loss function
loss_fn = nn.MSELoss()

# Train the model without quantization
for epoch in range(100):
    for i, data in enumerate(train_loader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

# Quantize the model
quantizer = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
quantized_model = quantizer.eval()

# You can now use the quantized_model for inference
# But you should not train it further



In [7]:
# Create a new data point
new_data = torch.tensor([4.0]).view(-1, 1)

# Use the quantized model to predict the output for the new data point
with torch.no_grad():  # We don't need gradients for inference
    prediction = quantized_model(new_data)

print("The predicted output for the input 4.0 is:", prediction.item())


The predicted output for the input 4.0 is: 7.389178276062012


In [8]:
import time

# Create a new data point
new_data = torch.tensor([4.0]).view(-1, 1)

# Use the non-quantized model to predict the output for the new data point
start_time = time.time()
with torch.no_grad():  # We don't need gradients for inference
    prediction = model(new_data)
end_time = time.time()
print("The predicted output for the input 4.0 using the non-quantized model is:", prediction.item())
print("Time taken for inference with non-quantized model:", end_time - start_time)

# Use the quantized model to predict the output for the new data point
start_time = time.time()
with torch.no_grad():  # We don't need gradients for inference
    prediction = quantized_model(new_data)
end_time = time.time()
print("The predicted output for the input 4.0 using the quantized model is:", prediction.item())
print("Time taken for inference with quantized model:", end_time - start_time)


The predicted output for the input 4.0 using the non-quantized model is: 7.41409158706665
Time taken for inference with non-quantized model: 0.00041556358337402344
The predicted output for the input 4.0 using the quantized model is: 7.389178276062012
Time taken for inference with quantized model: 0.0005991458892822266


In [9]:
import sys

# Print the size of the non-quantized model
print("Size of the non-quantized model: ", sys.getsizeof(model))

# Print the size of the quantized model
print("Size of the quantized model: ", sys.getsizeof(quantized_model))


Size of the non-quantized model:  48
Size of the quantized model:  48


In [10]:
def get_model_size(model):
    size = sum(param.numel() for param in model.parameters())
    return size

# Print the size of the non-quantized model
print("Size of the non-quantized model: ", get_model_size(model))

# Print the size of the quantized model
print("Size of the quantized model: ", get_model_size(quantized_model))


Size of the non-quantized model:  2
Size of the quantized model:  0
