## PyTorch理解更多神经网络优化方法
Ref:
1. https://morvanzhou.github.io/tutorials/machine-learning/torch/3-06-optimizer/

### PyTorch优化器

In [12]:
from torch import nn

class Net(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(Net, self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.Dropout(0.5), nn.ReLU(True))
        self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.Dropout(0.3), nn.ReLU(True))
        self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x



In [18]:
import torch
from torch import nn, optim
import torch.utils.data as Data
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# 定义超参数
batch_size = 32
LR = 0.01
num_epoches = 10

# 数据预处理。transforms.ToTensor()将图片转换成PyTorch中处理的对象Tensor,并且进行标准化（数据在0~1之间）
data_tf = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize([0.5], [0.5])])

# 数据集的下载器
train_dataset = datasets.MNIST(
    root='./data', train=True, transform=data_tf, download=False)
test_dataset = datasets.MNIST(root='./data', train=False, transform=data_tf)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# 选择模型
#model = SimpleNet(28 * 28, 1200, 400, 10)
#model = ActivationNet(28 * 28, 300, 100, 10)
# model = BatchNet(28 * 28, 300, 100, 10)
#model = Net(28 * 28, 300, 100, 10)

model_SGD = Net(28 * 28, 300, 100, 10)
model_adagrad = Net(28 * 28, 300, 100, 10)
model_Momentum = Net(28 * 28, 300, 100, 10)
model_RMSprop = Net(28 * 28, 300, 100, 10)
model_Adam = Net(28 * 28, 300, 100, 10)
models = [model_SGD, model_Momentum, model_RMSprop, model_Adam] 

print(model)
if torch.cuda.is_available():
    model = model.cuda()

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()

opt_SGD = optim.SGD(model_SGD.parameters(), lr=LR)
opt_Adagrad = optim.Adagrad(model_adagrad.parameters(), lr=LR)
opt_Momentum = optim.SGD(model_Momentum.parameters(), lr=LR, momentum=0.8)
opt_RMSprop = optim.RMSprop(model_RMSprop.parameters(), lr=LR, alpha=0.9)
opt_Adam = optim.Adam(model_Adam.parameters(), lr=LR, betas=(0.9, 0.99))

optimizers = [opt_SGD, opt_Adagrad, opt_Momentum, opt_RMSprop, opt_Adam] 


# 训练模型
'''
epoch = 0
for data in train_loader:
    img, label = data
    img = img.view(img.size(0), -1)
    if torch.cuda.is_available():
        img = img.cuda()
        label = label.cuda()
    else:
        img = Variable(img)
        label = Variable(label)
    out = model(img)
    loss = criterion(out, label)
    print_loss = loss.data.item()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    epoch += 1
    if epoch % 50 == 0:
        print('epoch: {}, loss: {:.4}'.format(epoch, loss.data.item()))
        
'''        
# 记录training时不同神经网络的loss值
loss_history = [[], [], [], [], []]
  
for epoch in range(num_epoches): 
    print('Epoch:', epoch + 1, 'Training...') 
    for data in train_loader: 
        img, label = data
        img = img.view(img.size(0), -1)
        if torch.cuda.is_available():
            img = img.cuda()
            label = label.cuda()
        else:
            img = Variable(img)
            label = Variable(label)
  
    for model, opt, l_his in zip(models, optimizers, loss_history):
        if torch.cuda.is_available():
            model = model.cuda()
        output = model(img) 
        loss = criterion(output, label) 
        opt.zero_grad() 
        loss.backward() 
        opt.step() 
        l_his.append(loss.item()) 



# 模型评估
'''
model.eval()
eval_loss = 0
eval_acc = 0
for data in test_loader:
    img, label = data
    img = img.view(img.size(0), -1)
    if torch.cuda.is_available():
        img = img.cuda()
        label = label.cuda()

    out = model(img)
    loss = criterion(out, label)
    eval_loss += loss.data.item() * label.size(0)
    _, pred = torch.max(out, 1)
    num_correct = (pred == label).sum()
    eval_acc += num_correct.item()
print('Test Loss: {:.4f}, Acc: {:.4f}'.format(eval_loss / (len(test_dataset)),
                                              eval_acc / (len(test_dataset))))
'''

Net(
  (layer1): Sequential(
    (0): Linear(in_features=784, out_features=300, bias=True)
    (1): Dropout(p=0.5)
    (2): ReLU(inplace)
  )
  (layer2): Sequential(
    (0): Linear(in_features=300, out_features=100, bias=True)
    (1): Dropout(p=0.3)
    (2): ReLU(inplace)
  )
  (layer3): Sequential(
    (0): Linear(in_features=100, out_features=10, bias=True)
  )
)
Epoch: 1 Training...
Epoch: 2 Training...
Epoch: 3 Training...
Epoch: 4 Training...
Epoch: 5 Training...
Epoch: 6 Training...
Epoch: 7 Training...
Epoch: 8 Training...
Epoch: 9 Training...
Epoch: 10 Training...


"\nmodel.eval()\neval_loss = 0\neval_acc = 0\nfor data in test_loader:\n    img, label = data\n    img = img.view(img.size(0), -1)\n    if torch.cuda.is_available():\n        img = img.cuda()\n        label = label.cuda()\n\n    out = model(img)\n    loss = criterion(out, label)\n    eval_loss += loss.data.item() * label.size(0)\n    _, pred = torch.max(out, 1)\n    num_correct = (pred == label).sum()\n    eval_acc += num_correct.item()\nprint('Test Loss: {:.4f}, Acc: {:.4f}'.format(eval_loss / (len(test_dataset)),\n                                              eval_acc / (len(test_dataset))))\n"

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

labels = ['SGD', 'Adagrad', 'Momentum', 'RMSprop', 'Adam'] 

for i, l_his in loss_history: 
    plt.plot(l_his, label=labels[i]) 
    plt.legend(loc='best') 
    plt.xlabel('Steps') 
    plt.ylabel('Loss') 
    plt.ylim((0, 0.2)) 
    plt.show()