# 3.9 多层感知机的从零开始实现

In [1]:
import torch
import numpy as np
import sys
from torch import nn
from torch.nn import init
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# sys.path.append("..") # 为了导入上层目录的d2lzh_pytorch
#import d2lzh_pytorch as d2l

print(torch.__version__)

1.3.1


## 3.9.1 获取和读取数据

In [2]:
def load_data(dir):
    """
    读入数据
    """
    with open(dir, 'r') as f:
        lines = f.readlines()
        lines = [line.strip('\n').split(',') for line in lines]
    xData = [line[1:] for line in lines]
    xData = np.array(xData)
    xData = xData.astype(float)
    yLabel = [line[0] for line in lines]
    yLabel = np.array(yLabel)
    yLabel = yLabel.astype(int)
    return xData, yLabel


In [3]:
def get_standard_data(xData):
    """
    将训练集和测试集标准化
    """
    transfer = StandardScaler()
    xData = transfer.fit_transform(xData)
#     X_test = transfer.transform(X_test)
    return xData

In [4]:
def random_split_data(xData, yLabel, rate=0.2, random_seed1=1):
    """
    随机分配训练集和测试集，并给予随机种子，方便复现
    一般数据训练集占80%，测试集占20%
    """
    X_train, X_test, y_train, y_test = train_test_split(xData, yLabel, test_size=rate, random_state=random_seed1)
    return X_train, y_train, X_test, y_test

In [5]:
def load_data_iris(batch_size, path='/Users/zhangxiao/Desktop/Datasets/iris.txt'):
    xData, yLabel = load_data(path)
    xData = get_standard_data(xData)
    X_train, y_train, X_test, y_test = random_split_data(xData,yLabel)
    y_train = y_train -1 # torch 只能从0开始做标签否则报错
    y_test = y_test -1 # torch 只能从0开始做标签否则报错
    X_train = torch.tensor(X_train)
    y_train = torch.from_numpy(y_train)
    X_test = torch.from_numpy(X_test)
    y_test = torch.from_numpy(y_test)
    iris_train = torch.utils.data.TensorDataset(X_train, y_train)
    iris_test = torch.utils.data.TensorDataset(X_test, y_test)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(iris_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(iris_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter, X_test

In [6]:
batch_size = 40
train_iter, test_iter, X_test = load_data_iris(batch_size)

# 定义拉平函数

In [7]:
class FlattenLayer(torch.nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1)

## 3.9.4 定义模型

In [8]:
num_inputs, num_outputs, num_hiddens = 4, 3, 64
net = nn.Sequential(
        FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens),
#         nn.BatchNorm1d(num_hiddens),
#         nn.ReLU(),
#         nn.Linear(num_hiddens, num_hiddens),
#         nn.BatchNorm1d(num_hiddens),
#         nn.ReLU(),
#         nn.Linear(num_hiddens, num_hiddens),
        nn.BatchNorm1d(num_hiddens),
        nn.ReLU(),
        nn.Linear(num_hiddens, num_outputs), 
        )
    
for params in net.parameters():
    init.normal_(params, mean=0, std=1)

## 3.9.5 定义损失函数

In [9]:
loss = torch.nn.CrossEntropyLoss() #里面包括一个softmax计算和一个交叉熵损失函数

# 梯度下降函数

In [10]:
optimizer = torch.optim.Adam(net.parameters(), lr=0.05, weight_decay=0)

# 定义准确率函数

In [11]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        X = X.clone().detach().float()
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n, net(X)
# def evaluate_accuracy(data_iter, net):
#     acc_sum, n = 0.0, 0
#     for X, y in data_iter:
#         if isinstance(net, torch.nn.Module):
#             net.eval() # 评估模式, 这会关闭dropout
#             acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
#             net.train() # 改回训练模式
#         else: # 自定义的模型
#             if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
#                 # 将is_training设置成False
#                 acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
#             else:
#                 acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
#         n += y.shape[0]
#     return acc_sum / n

# 定义训练函数

In [12]:
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            #X = torch.tensor(X, dtype=torch.float32)
            X = X.clone().detach().float() #新版本需要的对X进行float转化
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            
            l.backward()
            if optimizer is None:
                sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到
            
            
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc, pro = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
    return pro, net
    

## 3.9.6 训练模型

In [13]:
num_epochs = 5
pro, net = train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
# print(pro)
# print(net)

epoch 1, loss 0.0643, train acc 0.475, test acc 0.900
epoch 2, loss 0.0209, train acc 0.792, test acc 0.900
epoch 3, loss 0.0059, train acc 0.925, test acc 0.700
epoch 4, loss 0.0086, train acc 0.858, test acc 0.700
epoch 5, loss 0.0053, train acc 0.917, test acc 0.900


In [14]:
# for name, param in net.named_parameters(): 
#     print(name, param.size())

In [15]:
# X_test = X_test.clone().detach().float()
# print(X_test)
# net(X_test)