# Distributed PyTorch Modelling, Model Optimization, and Deployment

##  Distributed Torch Architecture

In [1]:
!pip list

Package             Version
------------------- ------------
asttokens           3.0.0
colorama            0.4.6
comm                0.2.2
contourpy           1.3.0
cycler              0.12.1
debugpy             1.8.11
decorator           5.1.1
exceptiongroup      1.2.2
executing           2.1.0
filelock            3.13.1
fonttools           4.55.3
fsspec              2024.2.0
importlib_metadata  8.5.0
importlib_resources 6.5.2
intel-openmp        2021.4.0
ipykernel           6.29.5
ipython             8.18.1
jedi                0.19.2
Jinja2              3.1.3
joblib              1.4.2
jupyter_client      8.6.3
jupyter_core        5.7.2
kiwisolver          1.4.7
MarkupSafe          2.1.5
matplotlib          3.9.4
matplotlib-inline   0.1.7
mkl                 2021.4.0
mpmath              1.3.0
nest-asyncio        1.6.0
networkx            3.2.1
numpy               1.26.3
packaging           24.2
pandas              2.2.3
parso               0.8.4
pillow              10.2.0
pip         

In [2]:
import os
import sys
import tempfile
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.optim as optim
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP

In [3]:
def setup(rank, world_size):
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '5555'
    # initialize the process group
    dist.init_process_group("gloo", rank=rank, world_size=world_size)

def cleanup():
    dist.destroy_process_group()

class NNET_Model(nn.Module):
    def __init__(self):
        super(NNET_Model, self).__init__()
        self.net1 = nn.Linear(10, 10)
        self.relu = nn.ReLU()
        self.net2 = nn.Linear(10, 5)

    def forward(self, x):
        return self.net2(self.relu(self.net1(x)))

def nnet_basic(rank, world_size):
    print(f"Running basic DDP example on rank {rank}.")
    setup(rank, world_size)
    # create model and move it to CPU with id rank
    model = NNET_Model().to(rank)
    ddp_model = DDP(model, device_ids=[rank])
    loss_fn = nn.MSELoss()
    optimizer = optim.SGD(ddp_model.parameters(), lr=0.001)
    optimizer.zero_grad()
    outputs = ddp_model(torch.randn(20, 10))
    labels = torch.randn(20, 5).to(rank)
    loss_fn(outputs, labels).backward()
    optimizer.step()
    cleanup()

In [4]:
nnet_basic(rank=1,world_size=4)

Running basic DDP example on rank 1.


DistNetworkError: The client socket has timed out after 1800s while trying to connect to (localhost, 5555).

##  Loading Data to Distributed PyTorch

In [5]:
import torch.distributed as dist
def setup(rank, world_size):
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '12355'
    dist.init_process_group("nccl", rank=rank, world_size=world_size)
import torchvision.datasets as datasets
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=None)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:02<00:00, 4061844.02it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 165049.85it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1553751.27it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 905314.30it/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [8]:
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import DataLoader

def prepare(rank, world_size, batch_size=32, pin_memory=False, num_workers=0):
    dataset = mnist_trainset
    sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank, shuffle=False, drop_last=False)
    dataloader = DataLoader(dataset, batch_size=batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False, shuffle=False, sampler=sampler)
    return dataloader

def main(rank, world_size):
    # setup the process groups
    setup(rank, world_size)
    # prepare the dataloader
    dataloader = prepare(rank, world_size)
    # instantiate the model and move it to the right device
    model = NNET_Model().to(rank)
    # wrap the model with DDP
    model = DDP(model, device_ids=[rank], output_device=rank, find_unused_parameters=True)
    
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
    device_ids = list(range(torch.cuda.device_count()))
    gpus = len(device_ids)
    print('GPU detected')
else:
    DEVICE = torch.device("cpu")
    print('No GPU. switching to CPU')

No GPU. switching to CPU


##  Quantization of Models in PyTorch

In [10]:
import torch.quantization
quantized_model = torch.quantization.quantize_dynamic(model,{torch.nn.Linear},dtype=torch.qint8)

print(quantized_model)

NameError: name 'model' is not defined