# Pytorch on multiple gpus

For AlexNet:
- Two GTX 580 GPUs with 3GB memory (on HPC we can select up to 8 GPU's per node.)
- The network takes 90 epochs in five or six days to train on two GTX 580 GPUs. 

```
#PBS -l walltime=0:30:0
#PBS -l select=1:ncpus=4:mem=2gb:ngpus=2:gpu_type=RTX6000


module load anaconda3/personal
source activate interiorcardamage

cd $PBS_O_WORKDIR
torchrun --standalone --nproc_per_node=2 multipgu_torchrun.py 50 10
```

```--standalone``` means we are using one node
```--nproc_per_node=gpu``` number of gpus per node. Equalling this to ```gpu``` would let torchrun use the maximum number
We run 50 epochs taking a snapshot every 10th epoch.


### Finding the accuracy on multigpu

In [1]:
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#print(device)

transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(), # flips image with p=0.5 to augment data
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

train_path = 'C:/Users/hanna/Desktop/git/interiorcardamage/Data/train'
test_path = 'C:/Users/hanna/Desktop/git/interiorcardamage/Data/test'

image_folder_train = torchvision.datasets.ImageFolder(root=train_path, transform=transformer)
image_folder_test = torchvision.datasets.ImageFolder(root=test_path, transform=transformer)

train_loader=torch.utils.data.DataLoader(
    image_folder_train,
    batch_size=64, shuffle=True
)
test_loader=torch.utils.data.DataLoader(
    image_folder_test,
    batch_size=32, shuffle=True
)

#print('data loaded')

root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

print('image folder: '+ str(len(image_folder_train))) # gives the TOTAL number of images/ datapoints
print('train loader: '+ str(len(train_loader))) # gives the number of batches

image folder: 81
train loader: 2


In [3]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

class MyTrainDataset(Dataset):
    def __init__(self, size):
        self.size = size
        self.data = [(torch.rand(20), torch.rand(1)) for _ in range(size)]

    def __len__(self):
        return self.size
    
    def __getitem__(self, index):
        return self.data[index]

train_set = MyTrainDataset(2048)
len(train_set)


2048