In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
torch.cuda.is_available()

True

In [4]:
torch.__version__

'1.12.1'

In [7]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()  # start dim = 1
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(), 
            nn.Linear(512, 512), 
            nn.ReLU(), 
            nn.Linear(512, 10), 
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [8]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [11]:
x = torch.rand((1, 28, 28), device=device)
x

tensor([[[1.3654e-01, 6.0849e-01, 4.3281e-01, 5.7581e-01, 5.5888e-02,
          6.2762e-01, 2.8965e-03, 6.6817e-01, 5.0711e-01, 5.4059e-01,
          5.0116e-01, 5.0628e-01, 7.6455e-01, 9.3490e-01, 2.7472e-01,
          5.4582e-01, 8.9167e-02, 8.8430e-02, 4.7153e-01, 7.8757e-01,
          1.9433e-01, 4.1996e-01, 8.2949e-01, 2.1357e-02, 1.8820e-01,
          1.4203e-01, 9.3706e-01, 5.3404e-02],
         [3.8734e-01, 3.7776e-02, 6.9770e-01, 1.3807e-01, 1.3632e-01,
          7.8843e-01, 8.1617e-01, 9.8317e-01, 8.6007e-01, 7.3541e-01,
          6.7427e-01, 6.1210e-01, 9.4423e-01, 8.3802e-01, 9.7314e-02,
          1.2987e-01, 8.3355e-01, 3.7858e-01, 5.3575e-01, 2.7350e-01,
          9.8886e-01, 8.4706e-01, 7.9903e-01, 5.5084e-03, 2.6210e-01,
          8.0432e-01, 8.8383e-01, 3.2613e-01],
         [9.6638e-01, 8.7985e-01, 1.0334e-01, 4.9691e-01, 8.1116e-01,
          4.3581e-01, 9.4894e-01, 7.4040e-01, 4.3424e-02, 8.4076e-01,
          3.4638e-01, 3.4871e-01, 2.2971e-01, 8.1079e-01, 8.9857e-

In [12]:
x.shape

torch.Size([1, 28, 28])

In [13]:
output = model(x)
output

tensor([[-0.1199,  0.0102, -0.0337, -0.0741, -0.1079,  0.0557,  0.1443,  0.1039,
          0.0513, -0.0376]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [14]:
output.shape

torch.Size([1, 10])

In [15]:
pred = nn.Softmax(1)(output)
pred

tensor([[0.0885, 0.1007, 0.0964, 0.0926, 0.0895, 0.1054, 0.1152, 0.1106, 0.1050,
         0.0960]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [17]:
res = pred.argmax(1)

In [18]:
res

tensor([6], device='cuda:0')

In [20]:
type(res.item())

int

In [21]:
from torchinfo import summary

In [22]:
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [23]:
summary(model, input_size=(16, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
NeuralNetwork                            [16, 10]                  --
├─Flatten: 1-1                           [16, 784]                 --
├─Sequential: 1-2                        [16, 10]                  --
│    └─Linear: 2-1                       [16, 512]                 401,920
│    └─ReLU: 2-2                         [16, 512]                 --
│    └─Linear: 2-3                       [16, 512]                 262,656
│    └─ReLU: 2-4                         [16, 512]                 --
│    └─Linear: 2-5                       [16, 10]                  5,130
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
Total mult-adds (M): 10.72
Input size (MB): 0.05
Forward/backward pass size (MB): 0.13
Params size (MB): 2.68
Estimated Total Size (MB): 2.86

In [24]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0292,  0.0342, -0.0269,  ...,  0.0141,  0.0030, -0.0276],
        [-0.0061, -0.0081, -0.0155,  ...,  0.0031, -0.0124,  0.0080]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0291, -0.0051], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0330,  0.0219,  0.0303,  ...,  0.0277, -0.0145,  0.0035],
        [-0.0062, -0.0435, -0.0365,  ...,  0.0406,  0.0416,  0.0060]],
       device='cuda:0', grad_fn=<Sl