In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.distributed as dist
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.multiprocessing as mp
import torch.distributed as dist

In [11]:
rank = dist.get_rank()
rank

0

In [2]:
transform = transforms.ToTensor()
from torch.utils.data.distributed import DistributedSampler

os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1' #본인이 사용하고 싶은 GPU 넘버를 써주면 됨
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '23097'         # 좀 큰 숫자로 맞추면 됨 작은 숫자는 에러발생!


torch.distributed.init_process_group(backend='nccl', init_method="env://", rank =0, world_size=1)  # rank should be 0 ~ world_size-1

In [12]:
dev0 = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dev1 = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(f'dev1:{dev0} dev2:{dev1}')

dev1:cuda:0 dev2:cuda:1


In [3]:
train_data = datasets.MNIST(root='../PYTORCH_NOTEBOOKS/Data',train=True, download=True,transform = transform)
test_data = datasets.MNIST(root='../PYTORCH_NOTEBOOKS/Data',train=False, download=True,transform = transform)
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: ../PYTORCH_NOTEBOOKS/Data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [40]:
train_sampler = DistributedSampler(train_data)
train_loader = DataLoader(
    train_data,
    batch_size=100,
    shuffle=False,
    num_workers=8,
    pin_memory=True
    #,sampler=train_sampler
    )
test_sampler = DistributedSampler(test_data)
test_loader = DataLoader(
train_data,
batch_size=500,
shuffle=False,
num_workers=8,
pin_memory=True
#,sampler=test_sampler
)


In [23]:
from torchvision.utils import make_grid
np.set_printoptions(formatter=dict(int=lambda x: f'{x:4}')) # FORMATTING

In [24]:
# FIRST BATCH
for images, labels in train_loader:
    # 60,000 / 100 -> 60 times
    break

In [25]:
class MultilayerPerceptron(nn.Module):

    def __init__(self, dev0,dev1,in_sz=784, out_sz=10, layers=[120,84]):
        super().__init__()
        self.dev0 = dev0
        self.dev1 = dev1
        self.fc1= nn.Linear(in_sz, layers[0]).to(dev0)
        self.fc2= nn.Linear(layers[0],layers[1]).to(dev0)
        self.fc3= nn.Linear(layers[1],out_sz).to(dev1)

    def forward(self,X):
        X = X.to(self.dev0)
        X = self.fc2(self.fc1(X))
        X = X.to(self.dev1)
        X = self.fc3(X).to(self.dev1)

        return F.log_softmax(X,dim=1) # MULTI CLASS CLASSIFICATION

In [26]:

model = MultilayerPerceptron(dev0,dev1)
ddp_model = DDP(model)

In [34]:
# ANN --> CNN
for param in model.parameters():
    print(param.numel()) ## number of elements
# 105000, 214 parameters -> use more efficient CNN
criterion = nn.CrossEntropyLoss().to(dev0)
optimizer = torch.optim.Adam(ddp_model.parameters(), lr =0.001)

94080
120
10080
84
840
10


In [28]:
#flatten the datas
images.shape # --> [100,784] 100 images (28*28) make to 1 dimension (784)
images.view(100,-1).shape # combine the single dim

torch.Size([100, 784])

In [41]:
import time
start_time = time.time()

## Training

epochs =20

# Trackers
train_losses = []
test_losses =[]
train_correct = []
test_correct = []

for i in range(epochs):
    trn_corr = 0
    tst_corr = 0

    for b, (X_train, y_train) in enumerate(train_loader):
        
        # 10 Neurons
        X_train = X_train.to(dev0)
        y_train = y_train.to(dev0)
        b +=1
        y_pred = ddp_model(X_train.view(100,-1)) ## flatten
        y_pred = y_pred.to(dev0)
        loss = criterion(y_pred, y_train)


        predicted = torch.max(y_pred.data,1)[1] # [0.1,0.0,...0.8] get the best probability
        # and get the real value in that index (flattened(1)) into predicted
        batch_corr = (predicted == y_train).sum() # sum of how many correct count
        trn_corr += batch_corr

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if b%400 == 0:
            accuracy = 100* trn_corr.item() /(b*100)
            print(f'Epoch {i} batch {b} loss {loss.item()} accuracy: {accuracy}')
    
    train_losses.append(loss)
    train_correct.append(trn_corr)

    
    
    
    with torch.no_grad(): ## no backprop & gradient

        for b, (X_test, y_test) in enumerate(test_loader):

            X_test = X_test.to(dev1)
            y_test = y_test.to(dev1)
            y_val = model(X_test.view(500,-1))
            y_val = y_val.to(dev1)
            predicted = torch.max(y_val,1)[1]
            tst_corr += (predicted == y_test).sum() ##(batch_corr)

    loss = criterion(y_val, y_test)
    test_losses.append(loss)
    test_correct.append(tst_corr)


total_time = time.time() -start_time
print(f'Duration :{total_time/60} mins')

Epoch 0 batch 400 loss 0.30057597160339355 accuracy: 92.825
Epoch 1 batch 400 loss 0.298120379447937 accuracy: 92.9075
Epoch 2 batch 400 loss 0.2975993752479553 accuracy: 92.9175
Epoch 3 batch 400 loss 0.2966439127922058 accuracy: 92.9625
Epoch 4 batch 400 loss 0.29550302028656006 accuracy: 92.9975
Epoch 5 batch 400 loss 0.29630303382873535 accuracy: 92.995
Epoch 6 batch 400 loss 0.29794514179229736 accuracy: 93.0325
Epoch 7 batch 400 loss 0.30068671703338623 accuracy: 93.0575
Epoch 8 batch 400 loss 0.3027072250843048 accuracy: 93.06
Epoch 9 batch 400 loss 0.30403760075569153 accuracy: 93.0525
Epoch 10 batch 400 loss 0.3051220178604126 accuracy: 93.055
Epoch 11 batch 400 loss 0.30522048473358154 accuracy: 93.0525
Epoch 12 batch 400 loss 0.305276095867157 accuracy: 93.0525
Epoch 13 batch 400 loss 0.30426540970802307 accuracy: 93.0825
Epoch 14 batch 400 loss 0.30487677454948425 accuracy: 93.105
Epoch 15 batch 400 loss 0.3054546117782593 accuracy: 93.1325
Epoch 16 batch 400 loss 0.3050174