# [5.4 Custom Layers](https://d2l.ai/chapter_deep-learning-computation/custom-layer.html)

## 5.4.1. Layers without Parameters

In [1]:
import torch
from torch import nn
from torch.nn import functional as F


class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, X):
        return X - X.mean()

In [2]:
layer = CenteredLayer()
layer(torch.FloatTensor([1, 2, 3, 4, 5]))

tensor([-2., -1.,  0.,  1.,  2.])

- incorporate the custom layer as a component in constructing more complex models.

In [3]:
net = nn.Sequential(nn.Linear(8, 128), CenteredLayer())
Y = net(torch.rand(4, 8))
Y.mean()

tensor(-1.8626e-09, grad_fn=<MeanBackward0>)

## 5.4.2. Layers with Parameters

- Example: <br>
    Defining layers with parameters (using `nn.Parameters`) that can be adjusted through training. <br>

In [4]:
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)

In [5]:
linear = MyLinear(5, 3)
linear.weight

Parameter containing:
tensor([[-1.1644,  0.2703,  0.1956],
        [ 1.2594, -0.7831, -0.6759],
        [ 1.2482, -1.7118,  0.3356],
        [-1.4289, -1.1213, -0.9168],
        [ 2.3368, -0.9702,  0.5405]], requires_grad=True)

- Can also construct models using custom layers with `nn.Sequential`.

In [6]:
net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))
net(torch.rand(2, 64))

tensor([[5.4942],
        [6.4801]])

# [5.5 File I/O](https://d2l.ai/chapter_deep-learning-computation/read-write.html)

## 5.5.1. Loading and Saving Tensors

In [7]:
import torch
from torch import nn
from torch.nn import functional as F

x = torch.arange(4)
torch.save(x, 'x-file')

In [8]:
x2 = torch.load('x-file')
x2

tensor([0, 1, 2, 3])

- Store a **list**/**dictionary** of tensors and read them back into memory

In [9]:
y = torch.zeros(4)
torch.save([x, y],'x-files')
x2, y2 = torch.load('x-files')
(x2, y2)

(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))

In [10]:
mydict = {'x': x, 'y': y}
torch.save(mydict, 'mydict')
mydict2 = torch.load('mydict')
mydict2

{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}

## 5.5.2 Loading and Saving Model Parameters

In [11]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.output = nn.Linear(256, 10)

    def forward(self, x):
        return self.output(F.relu(self.hidden(x)))

net = MLP()
X = torch.randn(size=(2, 20))
Y = net(X)

In [12]:
# Save all network parameters 
torch.save(net.state_dict(), 'mlp.params')

In [13]:
# To reinstate a model, we need to generate the architecture in code and then load the parameters from disk.
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))

<All keys matched successfully>

In [14]:
# double check if Y_clone == Y
clone.eval()
Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])

# [5.6 GPUs](https://d2l.ai/chapter_deep-learning-computation/use-gpu.html)

In [15]:
# To check the GPU status for NVIDIA GPU
!nvidia-smi

/bin/bash: nvidia-smi: command not found


### 5.6.1 Computing Devices

In [16]:
import torch
from torch import nn

torch.device('cpu'), torch.device('cuda'), torch.device('cuda:1')

(device(type='cpu'), device(type='cuda'), device(type='cuda', index=1))

In [17]:
# count number of available GPUs
torch.cuda.device_count()

0

In [18]:
# Two convenient functions that allow us to run code even if the requested GPUs do not exist.
def try_gpu(i=0):  #@save
    """Return gpu(i) if exists, otherwise return cpu()."""
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():  #@save
    """Return all available GPUs, or [cpu(),] if no GPU exists."""
    devices = [torch.device(f'cuda:{i}')
             for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]

try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cpu'), device(type='cpu'), [device(type='cpu')])

### 5.6.2 Tensors and GPUs

In [19]:
x = torch.tensor([1, 2, 3])
x.device

device(type='cpu')

In [20]:
# Create the tensor variable X on the first (default) gpu.
X = torch.ones(2, 3, device=try_gpu())
X

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [21]:
# Assuming that you have at least two GPUs, the following code will create a random tensor on the second GPU.
Y = torch.rand(2, 3, device=try_gpu(1))
Y

tensor([[0.8106, 0.7162, 0.0196],
        [0.3330, 0.6053, 0.6923]])

In [None]:
# --- temp --- 