### 35 PyTorch on the GPU

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transform

import time
import torchvision
import pandas as pd
import torch
import json
from torch.utils.tensorboard import SummaryWriter

from IPython.display import display, clear_output



torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

from torch.utils.tensorboard import SummaryWriter # <-- new

from collections import OrderedDict, namedtuple
from itertools import product


print(torch.__version__)
print(torchvision.__version__)


2.5.1
0.20.1


In [6]:
torch.cuda.is_available()

True

In [7]:
class Network(nn.Module):
    def __init__(self, channels=1): # default grayscale
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=channels, out_channels=6, kernel_size=5) 
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120) # ((28-5+1)/2 -5 +1)/2 = 4
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, (2, 2), stride=2)

        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, (2, 2), stride=2)

        t = t.reshape(-1, 12*4*4)
        t = F.relu(self.fc1(t))

        t = F.relu(self.fc2(t))

        t = self.out(t)
        
        return t

In [8]:
class RunBuilder:
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

In [9]:
class RunManager:
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None

        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None

        self.model = None
        self.loader = None
        self.tb = None

    def begin_run(self, hyper_params, model, loader):
        self.run_start_time = time.time()
        self.run_params = hyper_params
        self.run_count += 1

        self.model = model
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{hyper_params}')

        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('images', grid)
        self.tb.add_graph(self.model, images)

    def end_run(self):
        self.tb.close()
        self.epoch_count = 0

    def begin_epoch(self, epoch_no):
        self.epoch_start_time = time.time()

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0

        # print(f"Epoch {epoch_no} started ...", end=" ")

    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        for name, param in self.model.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)

        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch_duration"] = epoch_duration
        results["run duration"] = run_duration

        for k, v in self.run_params._asdict().items():
            results[k] = v

        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')

        clear_output(wait=True)
        display(df)
        # print("Ended")

    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size

    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)

    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()

    def save(self, fileName):
        pd.DataFrame.from_dict(self.run_data, orient='columns', ).to_csv(f'{fileName}.csv')
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [35]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data/FashionMNIST',
    download=True,
    transform=transform.Compose([
        transform.ToTensor()
    ]))


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data/FashionMNIST\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data/FashionMNIST\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data/FashionMNIST\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data/FashionMNIST\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw






In [None]:
# params = OrderedDict(
#     lr = [.01],
#     batch_size = [1000, 2000],
#     num_workers = [0, 1, 2, 4, 8]
#     # shuffle = [True, False]
# )

# m = RunManager()

# for run in RunBuilder.get_runs(params):

#     network = Network()
#     loader = DataLoader(train_set, batch_size=run.batch_size, num_workers=run.num_workers) 
#     optimizer = optim.Adam(network.parameters(), lr=run.lr)

#     m.begin_run(run, network, loader)
#     for epoch in range(1):
#         m.begin_epoch(epoch)

#         for batch in loader:
#             images, labels = batch

#             preds = network(images)
#             loss = F.cross_entropy(preds, labels)

#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             m.track_loss(loss)
#             m.track_num_correct(preds, labels)

#         m.end_epoch()
#     m.end_run()

# m.save('results')

#### PyTorch and the CUDA
##### Moving to GPU

In [10]:
t = torch.ones(1,1,28,28)
network = Network()

In [11]:
t = t.cuda()
network = network.cuda()

In [12]:
gpu_pred = network(t)
gpu_pred.device

device(type='cuda', index=0)

##### Moving to CPU

In [13]:
t = t.cpu()
network = network.cpu()

In [14]:
cpu_pred = network(t)
cpu_pred.device

device(type='cpu')

#### Working with Tensors

In [15]:
t1 = torch.tensor([
    [1,2],
    [3,4]
])

t2 = torch.tensor([
    [5,6],
    [7,8]
])

In [16]:
t1.device, t2.device

(device(type='cpu'), device(type='cpu'))

In [17]:
t1 = t1.to('cuda')
t1.device

device(type='cuda', index=0)

In [18]:
try: 
    t1 + t2
except RuntimeError as e:
    print(e)

Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!


In [19]:
try:
    t2 + t1
except RuntimeError as e:
    print(e)

Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!


In [20]:
t2 = t2.to('cuda')
t1+t2

tensor([[ 6,  8],
        [10, 12]], device='cuda:0')

#### Working with NN Modules

In [21]:
network = Network()

In [22]:
for name, param in network.named_parameters():
    print(name, param.shape)

conv1.weight torch.Size([6, 1, 5, 5])
conv1.bias torch.Size([6])
conv2.weight torch.Size([12, 6, 5, 5])
conv2.bias torch.Size([12])
fc1.weight torch.Size([120, 192])
fc1.bias torch.Size([120])
fc2.weight torch.Size([60, 120])
fc2.bias torch.Size([60])
out.weight torch.Size([10, 60])
out.bias torch.Size([10])


In [23]:
for n, p in network.named_parameters():
    print(p.device, "", n)

cpu  conv1.weight
cpu  conv1.bias
cpu  conv2.weight
cpu  conv2.bias
cpu  fc1.weight
cpu  fc1.bias
cpu  fc2.weight
cpu  fc2.bias
cpu  out.weight
cpu  out.bias


In [25]:
network.to('cuda')
for n, p in network.named_parameters():
    print(p.device, "", n)

cuda:0  conv1.weight
cuda:0  conv1.bias
cuda:0  conv2.weight
cuda:0  conv2.bias
cuda:0  fc1.weight
cuda:0  fc1.bias
cuda:0  fc2.weight
cuda:0  fc2.bias
cuda:0  out.weight
cuda:0  out.bias


In [24]:
sample = torch.ones(1,1,28,28)
sample.device

device(type='cpu')

In [26]:
try:
    network(sample)
except RuntimeError as e:
    print(e)

Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor


In [28]:
try:
    sample = sample.to('cuda')
    print(network(sample))
except RuntimeError as e:
    print(e)

tensor([[-0.0890,  0.0210, -0.1216, -0.0170, -0.0413,  0.0780,  0.0395, -0.1822, -0.0800, -0.0393]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


In [29]:
network

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

In [30]:
network.to('cpu')

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

In [31]:
network.to('cuda')

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

#### Using the GPU

In [32]:
class RunManager:
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None

        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None

        self.model = None
        self.loader = None
        self.tb = None

    def begin_run(self, hyper_params, model, loader):
        self.run_start_time = time.time()
        self.run_params = hyper_params
        self.run_count += 1

        self.model = model
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{hyper_params}')

        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('images', grid)
        self.tb.add_graph(         ## <-------------GPU
            self.model, 
            images.to(getattr(run, 'device','cpu'))
        )

    def end_run(self):
        self.tb.close()
        self.epoch_count = 0

    def begin_epoch(self, epoch_no):
        self.epoch_start_time = time.time()

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0

        # print(f"Epoch {epoch_no} started ...", end=" ")

    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        for name, param in self.model.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)

        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch_duration"] = epoch_duration
        results["run duration"] = run_duration

        for k, v in self.run_params._asdict().items():
            results[k] = v

        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')

        clear_output(wait=True)
        display(df)
        # print("Ended")

    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size

    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)

    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()

    def save(self, fileName):
        pd.DataFrame.from_dict(self.run_data, orient='columns', ).to_csv(f'{fileName}.csv')
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [33]:
from torch.utils.data import DataLoader

In [36]:
params = OrderedDict(
    lr = [.01],
    batch_size = [1000, 2000],
    num_workers = [0, 1],
    device = ['cpu', 'cuda']  # <-
)

m = RunManager()

for run in RunBuilder.get_runs(params):
    device = torch.device(run.device)
    network = Network().to(device)
    
    loader = DataLoader(train_set, batch_size=run.batch_size, num_workers=run.num_workers) 
    optimizer = optim.Adam(network.parameters(), lr=run.lr)

    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch(epoch)

        for batch in loader:
            # images, labels = batch
            images = batch[0].to(device)  # <---
            labels = batch[1].to(device)  # <---

            preds = network(images)
            loss = F.cross_entropy(preds, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            m.track_loss(loss)
            m.track_num_correct(preds, labels)

        m.end_epoch()
    m.end_run()

m.save('results')

Unnamed: 0,run,epoch,loss,accuracy,epoch_duration,run duration,lr,batch_size,num_workers,device
0,1,1,1.072381,0.592133,7.704121,8.481627,0.01,1000,0,cpu
1,2,1,0.958981,0.63715,6.601738,7.512621,0.01,1000,0,cuda
2,3,1,1.162702,0.5581,11.032968,18.859688,0.01,1000,1,cpu
3,4,1,1.011904,0.61225,10.001505,15.22804,0.01,1000,1,cuda
4,5,1,1.22739,0.5463,7.795508,8.966387,0.01,2000,0,cpu
5,6,1,1.343844,0.498617,6.348495,7.542449,0.01,2000,0,cuda
6,7,1,1.364553,0.483533,10.913345,16.555874,0.01,2000,1,cpu
7,8,1,1.345977,0.492267,9.758257,15.685858,0.01,2000,1,cuda


In [38]:
pd.DataFrame.from_dict(m.run_data, orient='columns' ).sort_values("epoch_duration")

Unnamed: 0,run,epoch,loss,accuracy,epoch_duration,run duration,lr,batch_size,num_workers,device
5,6,1,1.343844,0.498617,6.348495,7.542449,0.01,2000,0,cuda
1,2,1,0.958981,0.63715,6.601738,7.512621,0.01,1000,0,cuda
0,1,1,1.072381,0.592133,7.704121,8.481627,0.01,1000,0,cpu
4,5,1,1.22739,0.5463,7.795508,8.966387,0.01,2000,0,cpu
7,8,1,1.345977,0.492267,9.758257,15.685858,0.01,2000,1,cuda
3,4,1,1.011904,0.61225,10.001505,15.22804,0.01,1000,1,cuda
6,7,1,1.364553,0.483533,10.913345,16.555874,0.01,2000,1,cpu
2,3,1,1.162702,0.5581,11.032968,18.859688,0.01,1000,1,cpu
