# Parallel computing with Pytorch in deep learning

author: Jing Zhang    
e-mail: zhangjingnm@hotmail.com    
date: 2024-09    
reference: http://www.idris.fr/eng/jean-zay/gpu/jean-zay-gpu-torch-multi-eng.html

### Data parallelism

### Model parallelism

### Multi-process configuration with SLURM

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# data augmentation and preprocessing
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),  
    transforms.RandomCrop(32, padding=4),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261]) 
])

# load CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# load pretrained ResNet18 model
model = models.resnet18(weights='ResNet18_Weights.DEFAULT')

# modify last fully connected layer, to adapt for CIFAR-10
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)  # 10 classes in CIFAR-10

# move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001)

def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # backword propagation and optmization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # calculate loss and accuracy
        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1) # get predicted class
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    epoch_loss = running_loss / len(test_loader.dataset)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

num_epochs = 10

for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc = evaluate(model, test_loader, criterion, device)

    print(f'Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1/10: Train Loss: 1.1410, Train Acc: 60.10, Test Loss: 0.8326, Test Acc: 71.25%
Epoch 2/10: Train Loss: 0.7750, Train Acc: 73.15, Test Loss: 0.7030, Test Acc: 75.96%
Epoch 3/10: Train Loss: 0.6593, Train Acc: 76.96, Test Loss: 0.6416, Test Acc: 77.75%
Epoch 4/10: Train Loss: 0.5870, Train Acc: 79.59, Test Loss: 0.5958, Test Acc: 79.26%
Epoch 5/10: Train Loss: 0.5420, Train Acc: 81.16, Test Loss: 0.5572, Test Acc: 80.90%
Epoch 6/10: Train Loss: 0.4965, Train Acc: 82.73, Test Loss: 0.5543, Test Acc: 81.10%
Epoch 7/10: Train Loss: 0.4654, Train Acc: 83.87, Test Loss: 0.5373, Test Acc: 81.57%
Epoch 8/10: Train Loss: 0.4363, Train Acc: 84.82, Test Loss: 0.5188, Test Acc: 82.19%
Epoch 9/10: Train Loss: 0.4177, Train Acc: 85.38, Test Loss: 0.5282, Test Acc: 81.94%
Epoch 10/10: Train Loss: 0.3896, Train Acc: 86.30, Test Loss: 0.5135, Test Acc: 82.77%


In [14]:
# dell workstation
import torch 

num_gpus = torch.cuda.device_count()
print(f'{num_gpus} GPUs in total on this machine')

for i in range(num_gpus):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

import socket
hostname = socket.gethostname()
print(f"Node name (hostname): {hostname}")



1 GPUs in total on this machine
GPU 0: Quadro RTX 5000
Node name (hostname): DESKTOP-7GS0DEP


In [5]:
# thinkbook
import torch 

num_gpus = torch.cuda.device_count()
print(f'{num_gpus} GPU in total on this machine')

for i in range(num_gpus):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

import socket
hostname = socket.gethostname()
print(f"Node name (hostname): {hostname}")

print('PyTorch version:', torch.__version__)  # PyTorch version
print('CUDA version:', torch.version.cuda)  # CUDA version

1 GPU in total on this machine
GPU 0: NVIDIA GeForce RTX 3050 4GB Laptop GPU
Node name (hostname): Lenovo
PyTorch version: 2.4.1+cu124
CUDA version: 12.4
