In [8]:
'''Import third-party libraries'''
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [4]:
'''Downloading training data from open datasets'''
training_data = datasets.FashionMNIST(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor()
)
test_data = datasets.FashionMNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data\FashionMNIST\raw\train-images-idx3-ubyte.gz


94.9%





RuntimeError: File not found or corrupted.

In [4]:
'''Data for training preparation'''

batch_size = 64
# Create data loaders
train_dataloader = DataLoader(training_data, batch_size = batch_size)
test_dataloader =  DataLoader(test_data, batch_size = batch_size)

for X, y in test_dataloader:
    print(f"shape of X:{X.shape}")
    print(f"shape and type of y:{y.shape} {y.dtype}")
    break

shape of X:torch.Size([64, 1, 28, 28])
shape and type of y:torch.Size([64]) torch.int64


In [None]:
'''Creaing models'''
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_avaiable() else "cpu"
print(f"Using {device} device") 

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        '''Define the layers of the network'''
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
    def forward(self, x):
        '''Specify how data will pass through the network'''
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

# Instantiate a model 
model = NeuralNetwork().to(device)
print(model)

AttributeError: module 'torch' has no attribute 'accelerator'

In [None]:
'''Define loss function and optimizer'''
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)

#1. model.parameters()会返回模型所有可学习参数（就是weights和bias）的一个iterator。这些参数是torch.nn.Parameter
# 的实例，是require_gradients = True，并且在训练过程中会更新的参数。
# 如果想要打印模型的参数，可以使用如下的方式：

print("="*50)
print(model.parameters())
print("="*50)

params = list(model.parameters())

# Print the number of parameter tensors
print(f"Number of parameter tensors: {len(params)}")

# Print each parameter's shape
for param in params:
    print(f"Parameter {param.name}: Shape = {param.shape}")

In [None]:
'''Define the model training process'''
def train(dataloader, model, loss_fn, optimizer):
    # da
    size = len(dataloader.dataset) #dataloader.dataset will get the original dataset object, and we 
    # can directly access the dataset through “dataloader.dataset”，比如dataloader.dataset[0]访问第一条数据
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y) #计算出来的是一个 PyTorch tensor with additional attributes and functions

        #Backpropagation
        loss.backward() 
        optimizer.step()
        optimizer.zero_grad()

        # print loss
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss:{loss:>7f} [{current:>5d} / {size:>5d}]")



IndentationError: expected an indented block (1355122570.py, line 2)

1. loss常用的和函数有：<br>
![属性](./images/loss常用属性.jpg)<br>
![函数](./images/loss常用函数.jpg)<br>

2. For the optimizer:
2.1 why using optimizer.zero_grad() to clear gradients before next iteration ? 
因为在pytorch中，默认是梯度的计算是累积的，这就意味着每次调用loss.backward()，那么计算出来的梯度就会被累加到模型的参数的.grad属性上，而非直接覆盖原有.grad的值；因此，如果不在下一次迭代前清除梯度的话，梯度就会持续累积，导致模型的梯度更新出错；
而在pytorch中，为什么又要默认支持梯度是累积的呢？
一方面是如果需要在多个batches之后才更新梯度的情况下，支持梯度累加之后更新，比如需要训练很大的模型，这个模型无法放到内存中？
另一方面是为了支持有多个loss的情况下，多次调用了loss.backward()函数计算梯度，允许最后累加起来更新参数。
2.2 optimier常用的属性和函数：<br>
![属性](./images/optimizer常用属性.jpg)<br>
![函数](./images/optimizer常用函数.jpg)<br>