In [27]:
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time
import sys
sys.path.append("..") # 为了导⼊上层⽬录的d2lzh_pytorch
import d2lzh_pytorch as d2l
import torch
import numpy as np

In [28]:
mnist_train = torchvision.datasets.FashionMNIST(root='/Users/yuxiang/Datasets/FashionMNIST',
train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='/Users/yuxiang/Datasets/FashionMNIST',
train=False, download=True, transform=transforms.ToTensor())

In [29]:
print(type(mnist_train))
print(len(mnist_train),len(mnist_test))

<class 'torchvision.datasets.mnist.FashionMNIST'>
60000 10000


In [30]:
feature, label = mnist_train[0]
print(feature.shape, label) # Channel x Height X Width

torch.Size([1, 28, 28]) 9


In [31]:
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress','coat','sandal', 'shirt', 'sneaker', 'bag', 'ankleboot']
    return [text_labels[int(i)] for i in labels]

In [32]:
def show_fashion_mnist(images, labels):
    d2l.use_svg_display()
    # 这⾥的_表示我们忽略（不使⽤）的变量
    _, figs = plt.subplots(1, len(images), figsize=(12, 12))
    for f, img, lbl in zip(figs, images, labels):
        f.imshow(img.view((28, 28)).numpy())
        f.set_title(lbl) 
        f.axes.get_xaxis().set_visible(False) 
        f.axes.get_yaxis().set_visible(False)
    plt.show()

In [33]:
X,y = [],[]
for i in range(10):
    X.append(mnist_train[i][0])
    y.append(mnist_train[i][1])
show_fashion_mnist(X,get_fashion_mnist_labels(y))


RuntimeError: In set_text: could not load glyph

<Figure size 864x864 with 10 Axes>

In [34]:
batch_size = 256
if sys.platform.startswith('win'):
    num_workers = 0
else:
    num_workers =4
train_iter = torch.utils.data.DataLoader(mnist_train,
batch_size=batch_size,shuffle=True,num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test,
batch_size=batch_size,shuffle=False,num_workers=num_workers)


In [35]:
start = time.time()
for X,y in train_iter:
    continue
print('%.2f sec' % (time.time() - start))

1.21 sec


## SOFTMAX

In [36]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)


In [37]:
num_inputs = 784
num_outputs = 10
W = torch.tensor(np.random.normal(0,0.1,(num_inputs,num_outputs)),dtype=torch.float)
b = torch.zeros(num_outputs,dtype=torch.float)

In [38]:
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

### 多维Tensor操作


In [39]:
# X = torch.tensor([[1,2,3],[4,5,6]])
# print(X.sum(dim=0,keepdim=True))
# print(X.sum(dim=1,keepdim=True))

In [40]:
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(dim=1,keepdim=True)
    return X_exp / partition

In [41]:
X = torch.rand((2,5))
X_prop = softmax(X)
print(X_prop,X_prop.sum(dim=1))

tensor([[0.1579, 0.2403, 0.1529, 0.1989, 0.2500],
        [0.1647, 0.2391, 0.1768, 0.2418, 0.1776]]) tensor([1., 1.])


In [42]:
def net(X):
    return softmax(torch.mm(X.view(-1,num_inputs),W) + b)

In [43]:
y_hat = torch.tensor([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y = torch.LongTensor([0,2])
y_hat.gather(1,y.view(-1,1))

tensor([[0.1000],
        [0.5000]])

In [44]:
def cross_entropy(y_hat,y):
    return - torch.log(y_hat.gather(1,y.view(-1,1)))

In [45]:
def accuracy(y_hat,y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()

In [46]:
print(accuracy(y_hat,y))

0.5


In [47]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [48]:
# print(evaluate_accuracy(test_iter,net))

In [53]:
num_epochs, lr = 5, 0.1
# 本函数已保存在d2lzh包中⽅便以后使⽤
def train_ch3(net, train_iter, test_iter, loss, num_epochs,
    batch_size,
    params=None, lr=None, optimizer=None):
    
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X) 
            l = loss(y_hat, y).sum()
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step() # “softmax回归的简洁实现”⼀节将⽤到
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) ==y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
        % (epoch + 1, train_l_sum / n, train_acc_sum / n,
        test_acc))

In [57]:
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs,batch_size, [W, b], lr)

epoch 1, loss 0.4177, train acc 0.857, test acc 0.839
epoch 2, loss 0.4166, train acc 0.858, test acc 0.837
epoch 3, loss 0.4157, train acc 0.857, test acc 0.838
epoch 4, loss 0.4145, train acc 0.858, test acc 0.838
epoch 5, loss 0.4125, train acc 0.859, test acc 0.841
