In [61]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import numpy as np

In [2]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [3]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [4]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)


Using mps device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)


In [6]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [7]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [8]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.315897  [   64/60000]
loss: 2.299589  [ 6464/60000]
loss: 2.286253  [12864/60000]
loss: 2.270481  [19264/60000]
loss: 2.257290  [25664/60000]
loss: 2.231647  [32064/60000]
loss: 2.237676  [38464/60000]
loss: 2.207334  [44864/60000]
loss: 2.202379  [51264/60000]
loss: 2.168335  [57664/60000]
Test Error: 
 Accuracy: 37.1%, Avg loss: 2.163441 

Epoch 2
-------------------------------
loss: 2.179944  [   64/60000]
loss: 2.165737  [ 6464/60000]
loss: 2.118680  [12864/60000]
loss: 2.127170  [19264/60000]
loss: 2.079838  [25664/60000]
loss: 2.025388  [32064/60000]
loss: 2.050119  [38464/60000]
loss: 1.975375  [44864/60000]
loss: 1.977877  [51264/60000]
loss: 1.907364  [57664/60000]
Test Error: 
 Accuracy: 56.4%, Avg loss: 1.904318 

Epoch 3
-------------------------------
loss: 1.934278  [   64/60000]
loss: 1.904529  [ 6464/60000]
loss: 1.795844  [12864/60000]
loss: 1.835558  [19264/60000]
loss: 1.727200  [25664/60000]
loss: 1.675634  [32064/600

In [12]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth


In [13]:
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth"))sd

SyntaxError: invalid syntax (701043924.py, line 2)

In [15]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[10][0], test_data[10][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')


Predicted: "Coat", Actual: "Coat"


In [16]:
pred[0]

tensor([ 0.1232, -1.4327,  2.3718, -0.6713,  2.4326, -1.2177,  1.7662, -2.1951,
         0.6749, -1.4333], device='mps:0')

In [11]:
pred = model(x)

NameError: name 'x' is not defined

In [19]:
p=model.parameters()

In [21]:
for a in p:
    print(a)

Parameter containing:
tensor([[ 0.0179,  0.0346,  0.0131,  ...,  0.0124, -0.0231, -0.0154],
        [ 0.0315, -0.0262,  0.0059,  ..., -0.0356, -0.0062, -0.0026],
        [ 0.0097, -0.0227,  0.0082,  ..., -0.0258, -0.0045,  0.0319],
        ...,
        [-0.0291,  0.0208,  0.0300,  ...,  0.0109,  0.0239,  0.0282],
        [ 0.0011,  0.0347, -0.0112,  ...,  0.0084, -0.0272,  0.0259],
        [-0.0299,  0.0239,  0.0134,  ...,  0.0092,  0.0087, -0.0002]],
       device='mps:0', requires_grad=True)
Parameter containing:
tensor([ 2.6905e-02,  3.6230e-02, -1.6542e-02,  2.7734e-02, -6.8030e-03,
         2.9756e-02,  6.9669e-03, -5.1863e-03, -5.8233e-03, -5.2764e-03,
         2.7640e-02, -2.6019e-02, -2.1792e-02,  3.6611e-03, -1.2398e-03,
        -1.4547e-02, -1.2239e-02,  7.4232e-03,  6.0214e-03,  2.8080e-02,
        -2.6775e-02, -1.1291e-02,  1.0311e-02,  1.5304e-02,  9.4879e-03,
        -1.6575e-02,  1.6566e-02, -3.0821e-02, -1.9482e-02, -1.4785e-03,
         5.6605e-03,  1.0055e-02, -3.3401

In [22]:
type(pred)

torch.Tensor

In [25]:
type(y)

int

In [27]:
type(training_data)

torchvision.datasets.mnist.FashionMNIST

In [28]:
dataloader=DataLoader(training_data, batch_size=batch_size)

In [29]:
for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)
    break

In [30]:
type(X),type(y)

(torch.Tensor, torch.Tensor)

In [31]:
pred=model(X)

In [33]:
type(pred),type(y)

(torch.Tensor, torch.Tensor)

In [34]:
loss=(pred-y).sum()

RuntimeError: The size of tensor a (10) must match the size of tensor b (64) at non-singleton dimension 1

In [35]:
pred.shape

torch.Size([64, 10])

In [36]:
y.shape

torch.Size([64])

In [37]:
loss = loss_fn(pred, y)

In [38]:
type(loss)

torch.Tensor

In [40]:
y_onehot=torch.nn.functional.one_hot(y).to(device)

In [42]:
type(y_onehot),y_onehot.shape

(torch.Tensor, torch.Size([64, 10]))

In [43]:
loss_onehot=(pred-y_onehot).sum()

In [44]:
type(loss_onehot)

torch.Tensor

In [51]:
loss.detach().to('cpu').numpy()

array(1.1525317, dtype=float32)

In [52]:
loss_onehot.detach().to('cpu').numpy()

array(-54.646942, dtype=float32)

In [53]:
loss.backward()

In [55]:
loss_onehot.backward(retain_graph=True)

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [58]:
pred_oneval=pred.sum()

In [72]:
m_param_1=list(model.parameters())[0]

In [75]:
m_param_1.grad

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  7.2642e-07,  ...,  7.0276e-05,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -5.4769e-07,  ..., -3.5775e-05,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  1.4980e-06,  ...,  6.7522e-05,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -1.4090e-06,  ..., -6.9300e-05,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -1.7388e-06,  ..., -1.2789e-05,
          0.0000e+00,  0.0000e+00]], device='mps:0')

In [77]:
m_param_1

Parameter containing:
tensor([[ 0.0179,  0.0346,  0.0131,  ...,  0.0124, -0.0231, -0.0154],
        [ 0.0315, -0.0262,  0.0059,  ..., -0.0356, -0.0062, -0.0026],
        [ 0.0097, -0.0227,  0.0082,  ..., -0.0258, -0.0045,  0.0319],
        ...,
        [-0.0291,  0.0208,  0.0300,  ...,  0.0109,  0.0239,  0.0282],
        [ 0.0011,  0.0347, -0.0112,  ...,  0.0084, -0.0272,  0.0259],
        [-0.0299,  0.0239,  0.0134,  ...,  0.0092,  0.0087, -0.0002]],
       device='mps:0', requires_grad=True)

In [109]:
m_param_1.shape

torch.Size([512, 784])

In [78]:
pred_oneval.zero_grad()

AttributeError: 'Tensor' object has no attribute 'zero_grad'

In [59]:
pred_oneval.backward()

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [60]:
nn.functional.softmax(torch.rand(5))

  nn.functional.softmax(torch.rand(5))


tensor([0.2309, 0.1988, 0.2506, 0.1604, 0.1593])

In [68]:
t=torch.from_numpy(np.array([[3.0,2.9,-3.8,5.6]]))

In [70]:
nn.functional.softmax(t).sum()

  nn.functional.softmax(t).sum()


tensor(1., dtype=torch.float64)

In [96]:

a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)


In [97]:
Q = 3*a**3 - b**2

In [98]:
external_grad = torch.tensor([1., 1.])
smallq=Q.sum()
# backward(gradient=external_grad)

In [102]:
b.data

tensor([6., 4.])

In [107]:
smallq.backward(retain_graph=True)

In [108]:
a.grad,b.grad

(tensor([36., 81.]), tensor([-12.,  -8.]))

In [106]:
a.grad.zero_(),b.grad.zero_()

(tensor([0., 0.]), tensor([0., 0.]))