In [13]:
import torch
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"
device = "mps" if torch.backends.mps.is_available() else device
print(device)
torch.set_default_device(device)

mps


In [14]:
# Create a tensor with random values
tensor = torch.rand(3, 4)
print(tensor)

tensor([[0.3893, 0.6359, 0.8808, 0.2603],
        [0.2704, 0.5518, 0.8437, 0.9421],
        [0.7444, 0.2635, 0.1945, 0.6280]], device='mps:0')


In [20]:
t1 = torch.cat([torch.ones(1, 4), tensor, torch.zeros(1, 4)], dim=0)
t2 = torch.cat([torch.ones(3, 1), tensor, torch.zeros(3, 1)], dim=1)
print(t1)
print()
print(t2)

tensor([[1.0000, 1.0000, 1.0000, 1.0000],
        [0.3893, 0.6359, 0.8808, 0.2603],
        [0.2704, 0.5518, 0.8437, 0.9421],
        [0.7444, 0.2635, 0.1945, 0.6280],
        [0.0000, 0.0000, 0.0000, 0.0000]], device='mps:0')

tensor([[1.0000, 0.3893, 0.6359, 0.8808, 0.2603, 0.0000],
        [1.0000, 0.2704, 0.5518, 0.8437, 0.9421, 0.0000],
        [1.0000, 0.7444, 0.2635, 0.1945, 0.6280, 0.0000]], device='mps:0')


In [None]:
tensor.T @ t2 # matrix multiplication

tensor([[1.4041, 0.7788, 0.5929, 0.7159, 0.8236, 0.0000],
        [1.4512, 0.5929, 0.7782, 1.0768, 0.8508, 0.0000],
        [1.9190, 0.7159, 1.0768, 1.5254, 1.1463, 0.0000],
        [1.8304, 0.8236, 0.8508, 1.1463, 1.3496, 0.0000]], device='mps:0')

In [708]:
tensor.T.mm(torch.rand(3,100000000)).mean().item()

0.8255351185798645

In [562]:
(tensor.sum(dim=0) / 2).mean().item()

0.8255819082260132

In [710]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

ds = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transforms.ToTensor(),
    target_transform=transforms.Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1))
)

In [713]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [719]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([7], device='mps:0')


In [720]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [721]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [722]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [723]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.0821, -0.2227,  0.1668,  0.0994, -0.0725,  0.5485,  0.1941,  0.5541,
         -0.0943, -0.0867,  0.0338,  0.0634, -0.4099,  0.0293, -0.0606,  0.4025,
          0.2113, -0.1499, -0.1027,  0.8937],
        [-0.0141, -0.3880,  0.0390,  0.1374, -0.1624,  0.3601,  0.3256,  0.2113,
         -0.1905, -0.2056, -0.0089,  0.2821, -0.1429, -0.0777,  0.0265,  0.3149,
          0.3567, -0.0527, -0.5803,  0.6257],
        [-0.0262, -0.4693,  0.0760,  0.2517,  0.0448,  0.4530,  0.0131,  0.4960,
         -0.2585,  0.0574,  0.2073,  0.4530, -0.2400, -0.2312, -0.1886,  0.1444,
          0.1624, -0.1995, -0.1772,  0.6203]], device='mps:0',
       grad_fn=<LinearBackward0>)


After ReLU: tensor([[0.0821, 0.0000, 0.1668, 0.0994, 0.0000, 0.5485, 0.1941, 0.5541, 0.0000,
         0.0000, 0.0338, 0.0634, 0.0000, 0.0293, 0.0000, 0.4025, 0.2113, 0.0000,
         0.0000, 0.8937],
        [0.0000, 0.0000, 0.0390, 0.1374, 0.0000, 0.3601, 0.3256, 0.2113, 0.0000,
         0.0000, 0.0000, 0.28

In [724]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [730]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [732]:
print(f'{logits=}')
print(f'{pred_probab=}')
print(f'{nn.Softmax(dim=0)(logits)=}')

logits=tensor([[ 0.1177,  0.2050,  0.4395, -0.2335,  0.1863,  0.1656, -0.3903,  0.1878,
          0.2588,  0.0432],
        [ 0.0981,  0.1806,  0.4436, -0.2735,  0.1769,  0.3117, -0.3011,  0.1822,
          0.2501, -0.0045],
        [ 0.0134,  0.2218,  0.5660, -0.2199,  0.3498,  0.1629, -0.3376,  0.1760,
          0.3138,  0.0840]], device='mps:0', grad_fn=<LinearBackward0>)
pred_probab=tensor([[0.0995, 0.1086, 0.1373, 0.0700, 0.1066, 0.1044, 0.0599, 0.1067, 0.1146,
         0.0924],
        [0.0968, 0.1051, 0.1367, 0.0667, 0.1047, 0.1198, 0.0649, 0.1053, 0.1127,
         0.0873],
        [0.0860, 0.1059, 0.1495, 0.0681, 0.1204, 0.0999, 0.0605, 0.1012, 0.1161,
         0.0923]], device='mps:0', grad_fn=<SoftmaxBackward0>)
nn.Softmax(dim=0)(logits)=tensor([[0.3470, 0.3341, 0.3186, 0.3362, 0.3156, 0.3170, 0.3177, 0.3353, 0.3281,
         0.3339],
        [0.3403, 0.3261, 0.3199, 0.3230, 0.3127, 0.3669, 0.3474, 0.3334, 0.3252,
         0.3183],
        [0.3126, 0.3398, 0.3615, 0.3408, 0.3

In [733]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-2.6595e-02,  3.5484e-02, -2.4074e-02,  ...,  2.9436e-02,
         -1.0814e-06,  3.4782e-02],
        [ 6.6162e-04, -2.5486e-02, -5.2703e-03,  ...,  3.1411e-02,
          2.4849e-02, -1.2483e-05]], device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0263, -0.0042], device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0427, -0.0343,  0.0308,  ..., -0.0144,  0.0040, -0.0194],
        [ 0.0383, -0.0039, -0.0211,  ..., -0.0130