In [2]:
import torch
from torch import nn
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cuda device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [3]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([6], device='cuda:0')


In [4]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [5]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [8]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())
print(hidden1.shape)

torch.Size([3, 20])
torch.Size([3, 20])


In [11]:
print(f"Before ReLU: {hidden1}\n\n")

hidden1_ = hidden1 * (hidden1 > 0)
print(f"{hidden1_}")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")


Before ReLU: tensor([[0.0748, 0.3736, 0.0000, 0.1724, 0.1239, 0.0000, 0.4207, 0.3042, 0.0000,
         0.0000, 0.0000, 0.0263, 0.0000, 0.3385, 0.1005, 0.0000, 0.0000, 0.5949,
         0.0125, 0.0000],
        [0.0000, 0.5874, 0.0000, 0.1899, 0.0860, 0.0000, 0.1350, 0.2278, 0.0000,
         0.0000, 0.0961, 0.0000, 0.0000, 0.4699, 0.0609, 0.0000, 0.0000, 0.3000,
         0.0953, 0.0000],
        [0.0000, 0.4009, 0.0000, 0.5745, 0.1209, 0.0000, 0.2611, 0.2924, 0.0000,
         0.0000, 0.0706, 0.0000, 0.2296, 0.3587, 0.1582, 0.0000, 0.0000, 0.1639,
         0.0000, 0.0000]], grad_fn=<ReluBackward0>)


tensor([[0.0748, 0.3736, 0.0000, 0.1724, 0.1239, 0.0000, 0.4207, 0.3042, 0.0000,
         0.0000, 0.0000, 0.0263, 0.0000, 0.3385, 0.1005, 0.0000, 0.0000, 0.5949,
         0.0125, 0.0000],
        [0.0000, 0.5874, 0.0000, 0.1899, 0.0860, 0.0000, 0.1350, 0.2278, 0.0000,
         0.0000, 0.0961, 0.0000, 0.0000, 0.4699, 0.0609, 0.0000, 0.0000, 0.3000,
         0.0953, 0.0000],
        [0.0000, 0.

In [21]:
x = torch.ones(5) # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

In [22]:
print(f"{loss}")

1.649733543395996


In [23]:
print(f"{z}")

tensor([ 1.9900, -2.4200,  2.6795], grad_fn=<AddBackward0>)


In [24]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x7f8e7e44dc90>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x7f8d73cfebc0>


In [25]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.2932, 0.0272, 0.3119],
        [0.2932, 0.0272, 0.3119],
        [0.2932, 0.0272, 0.3119],
        [0.2932, 0.0272, 0.3119],
        [0.2932, 0.0272, 0.3119]])
tensor([0.2932, 0.0272, 0.3119])


In [26]:
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


In [27]:
z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


In [28]:
inp = torch.eye(4, 5, requires_grad=True)
out = (inp+1).pow(2).t()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"First call\n{inp.grad}")
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nSecond call\n{inp.grad}")
inp.grad.zero_()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nCall after zeroing gradients\n{inp.grad}")

First call
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])

Second call
tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.]])

Call after zeroing gradients
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])
