In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cpu device


In [11]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10))
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [14]:
model = NeuralNetwork()
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [16]:
X = torch.randn(1,28,28, device = device)
X

tensor([[[ 2.9345e-01,  2.0813e+00, -2.2888e-01, -4.8922e-01, -1.7449e+00,
           1.1081e+00,  2.3764e+00, -2.4525e-01,  1.2118e+00, -9.5818e-01,
           1.9357e+00,  1.0573e+00,  5.7569e-01,  2.0185e-01,  4.8955e-01,
           8.8850e-01, -2.7712e+00, -9.0686e-01, -3.8500e-01, -2.5258e+00,
           5.2773e-01, -7.3744e-01,  2.1549e+00, -1.7768e-02, -6.0668e-01,
           1.5365e+00,  2.2748e-01, -9.1765e-01],
         [ 1.3392e-01, -1.1576e+00,  3.6619e-01,  1.6462e-02, -1.0017e+00,
           5.0247e-01,  1.0914e-01, -9.5668e-02,  5.1460e-01,  2.7856e-01,
           8.5617e-01, -8.2553e-01,  9.5631e-01, -1.3797e+00, -1.3146e-01,
          -5.9238e-01, -4.1412e-01,  1.2190e-01, -1.1475e-01, -6.9308e-01,
          -1.6181e+00, -5.1963e-01, -1.0019e+00,  4.2974e-01,  1.0147e-01,
           2.2769e+00, -3.2117e-01, -1.5179e-01],
         [-8.2396e-01, -4.0622e-02, -8.6674e-01, -8.5109e-02, -9.7270e-01,
          -6.0344e-01, -1.0144e+00, -7.9973e-01,  4.6056e-01, -3.0441e-01,


In [18]:
logits = model(X)
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([8])


In [19]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [20]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [21]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [22]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.1931, -0.5386, -0.1493, -0.4333, -0.3847, -0.0966, -0.3285,  0.2509,
          0.4000,  0.0806, -0.0731, -0.1993, -0.4481,  0.2596,  0.0403, -0.1552,
         -0.1365,  0.3221,  0.4855,  0.2462],
        [-0.2708, -0.8799,  0.0557, -0.2140, -0.6057, -0.2967, -0.3166, -0.1734,
          0.3975, -0.1308, -0.1001, -0.1462, -0.3875,  0.1915, -0.2594,  0.0361,
          0.3871, -0.0312,  0.4298,  0.3458],
        [-0.2468, -0.8953, -0.0062, -0.6087, -0.3606, -0.4032, -0.5897,  0.1715,
          0.2538, -0.3338, -0.1557, -0.0956, -0.2841,  0.2968, -0.5802,  0.0110,
         -0.0752,  0.4653,  0.5036,  0.4085]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2509, 0.4000,
         0.0806, 0.0000, 0.0000, 0.0000, 0.2596, 0.0403, 0.0000, 0.0000, 0.3221,
         0.4855, 0.2462],
        [0.0000, 0.0000, 0.0557, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3975,
         0.0000, 0.0000, 0.0000, 0.0000, 0.1915, 0.00

In [23]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [26]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
pred_probab

tensor([[0.0972, 0.1176, 0.1117, 0.0861, 0.0819, 0.0886, 0.1030, 0.0980, 0.1132,
         0.1027],
        [0.0930, 0.1164, 0.1117, 0.0886, 0.0827, 0.0933, 0.0995, 0.0990, 0.1070,
         0.1087],
        [0.0981, 0.1212, 0.1150, 0.0823, 0.0855, 0.0799, 0.0978, 0.0994, 0.1072,
         0.1137]], grad_fn=<SoftmaxBackward0>)

In [27]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0143,  0.0179, -0.0023,  ...,  0.0244,  0.0167, -0.0012],
        [ 0.0221,  0.0228,  0.0133,  ...,  0.0191, -0.0160, -0.0113]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([ 0.0231, -0.0275], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0154,  0.0219,  0.0122,  ...,  0.0002, -0.0015,  0.0401],
        [ 0.0092, -0.0051, -0.0244,  ..., -0.0330,  0.0320,  0.0304]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias 