# softmax和分类模型



In [2]:
# 加载各种包或者模块
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("/home/kesci/input")
import d2lzh_pytorch as d2l

print(torch.__version__)

1.2.0


## 初始化参数和获取数据

In [3]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, root='/home/kesci/input/FashionMNIST2065')

## 定义网络模型

In [4]:
num_inputs = 784 # 28*28
num_outputs = 10

class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
    def forward(self, x): # x 的形状: (batch, 1, 28, 28)
        y = self.linear(x.view(x.shape[0], -1))
        return y
    
# net = LinearNet(num_inputs, num_outputs)

class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x 的形状: (batch, *, *, ...)
        return x.view(x.shape[0], -1)

from collections import OrderedDict
net = nn.Sequential(
        # FlattenLayer(),
        # LinearNet(num_inputs, num_outputs) 
        OrderedDict([
           ('flatten', FlattenLayer()),
           ('linear', nn.Linear(num_inputs, num_outputs))]) # 或者写成我们自己定义的 LinearNet(num_inputs, num_outputs) 也可以
        )

## 初始化模型参数

In [5]:
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [6]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[-7.5324e-03,  6.8929e-03,  2.3800e-03,  ...,  1.5972e-02,
         -4.5709e-03,  2.3860e-03],
        [ 5.8065e-03,  8.7452e-04, -5.7572e-03,  ...,  3.9490e-03,
         -4.6580e-03,  1.0868e-02],
        [-7.8826e-03, -5.7571e-03,  4.0761e-03,  ..., -9.6968e-05,
          1.0374e-02, -4.2762e-03],
        ...,
        [ 1.2281e-02,  2.7427e-03,  2.3164e-03,  ...,  5.5495e-03,
         -3.2194e-03, -1.3490e-03],
        [-6.3757e-04, -1.1844e-02, -2.8041e-03,  ..., -1.0192e-02,
         -3.3204e-03,  3.5284e-03],
        [-7.0020e-03,  1.3793e-03,  1.7611e-03,  ...,  1.1572e-03,
         -8.8344e-03,  5.9076e-03]], requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)


## 定义损失函数

In [7]:
loss = nn.CrossEntropyLoss() # 下面是他的函数原型
# class torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')

## 定义优化函数

In [8]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1) # 下面是函数原型
# class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)

## 训练

In [9]:
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0031, train acc 0.750, test acc 0.780
epoch 2, loss 0.0022, train acc 0.813, test acc 0.813
epoch 3, loss 0.0021, train acc 0.827, test acc 0.789
epoch 4, loss 0.0020, train acc 0.832, test acc 0.821
epoch 5, loss 0.0019, train acc 0.836, test acc 0.822
