In [18]:
import torch
import torchvision
from torch.utils import data
from torchvision import transforms

In [19]:
# Get the dataset:Fashion mnist from the torchvision.datasets,save it in the 'root'
# Transform is need to convert the images.type = list or other to the torch.Tensor
trans = transforms.ToTensor()

# First code to download the dataset
# mnist_train = torchvision.datasets.FashionMNIST(root='../data',
#                                                 train=True,
#                                                 transform=trans,
#                                                 download=True)
# mnist_test = torchvision.datasets.FashionMNIST(root='../data/',
#                                                 train=False,
#                                                 transform=trans,
#                                                 download=True)

mnist_train = torchvision.datasets.FashionMNIST(root='../data',
                                                train=True,
                                                transform=trans,
                                                download=False)
mnist_test = torchvision.datasets.FashionMNIST(root='../data/',
                                                train=False,
                                                transform=trans,
                                                download=False)

In [20]:
# Generate the dataloader from the datasets, batchSize is need!
device = torch.device('cpu')
batch_size = 256
train_iter = data.DataLoader(mnist_train,batch_size,shuffle=True)
test_iter = data.DataLoader(mnist_test,batch_size,shuffle=False)
# for X,y in train_iter:
#     print(X.shape,y.shape)

In [21]:
# Define the network for the softmax regression
# One way is use the torch.nn.Sequential() which is easy but not recommended for big network
# Another way is using the torch.nn.Module() which is robust
import torch.nn as nn 
# net = nn.Sequential(nn.Flatten(),nn.Linear(28*28,10))
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight,0.01)
# net.apply(init_weights)

class MyNet(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.lay1 = nn.Flatten()
        self.lay2 = nn.Linear(28*28,10)
        
        # Method 2
        for m in self.modules():
            if isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0.01)
                nn.init.constant_(m.bias,0)
            if isinstance(m,nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')

    def forward(self,x):
        return self.lay2(self.lay1(x))

# Torch has set the layer's parameters defaultly. If we want to change the parameters we self,there is two way
# 1. use the apply() which using the DFS mechanism to find the layers
# 2. in the init, use the loop of self.modules() to get the finally check
net = MyNet()
net.to(device)

# Method 2 is show as below:
# net.apply(init_weights)
for X,y in train_iter:
    X=X.to(device)
    print(net(X))
    break

tensor([[ 12.2915,  14.3853,  12.0534,  ...,  14.0313,  16.5090,  -2.0760],
        [  6.6129,   4.9342,  16.3623,  ...,  12.2046,  14.1523,   7.1053],
        [ 13.1024,  16.2782,  15.5495,  ...,  10.9177,   6.0375,  -2.5060],
        ...,
        [  3.5284,   2.7789,   5.9629,  ...,   3.7091,   9.2960,   5.1145],
        [  2.4659,  -0.5861,   6.8151,  ...,   1.8687,  11.2332,   6.6702],
        [ -0.5806,   2.3784,  19.4988,  ..., -13.8162,  10.2793,   1.1812]],
       grad_fn=<AddmmBackward0>)


In [22]:
# Define the loss functions,as in the torch is given 
loss = nn.CrossEntropyLoss(reduction='none')

In [23]:
# Define the optimizer as SGD
opt = torch.optim.SGD(net.parameters(),lr=0.1)

In [24]:
# Super parameters
epoches = 10
for epoch in range(epoches):
    net.train()
    for X,y in train_iter:
        X,y=X.to(device),y.to(device)
        
        y_hat = net(X)
        opt.zero_grad()
        l = torch.mean(loss(y_hat,y))
        l.backward()
        opt.step()
    
    val = torch.tensor(0,dtype=torch.float,device=device)
    with torch.no_grad():
        for X,y in test_iter:
            X,y=X.to(device),y.to(device)
            y_hat = net(X)
            l = loss(y_hat,y)
            val += l.sum()
        print('epoch'+str(epoch)+'is')
        print(val/len(mnist_test))

            

epoch0is
tensor(2.1326)
epoch1is
tensor(1.6963)
epoch2is
tensor(1.4781)
epoch3is
tensor(1.3482)
epoch4is
tensor(1.3198)
epoch5is
tensor(1.2067)
epoch6is
tensor(1.1573)
epoch7is
tensor(1.1165)
epoch8is
tensor(1.0697)
epoch9is
tensor(1.0678)


## 新坑
假如torch的版本小于1.12的话，是不支持Apple的mps设备的，可能会出现inf的情况！

比较重要的代码：
明白如何初始化参数，两种方法
如何使用nn.module来构建网络模型
如何将模型迁移到device上
对于train过程中发现的细节，首先在上班部分的model.train()的过程中，先将优化的梯度设置为zero-grad()，在计算出loss之后的梯度下降的过程中backward()，再更新参数step(),来达到梯度下降的结果