In [1]:
import torch
import torchvision
from torchvision import transforms
import numpy as np
import sys

In [2]:
batch_size=256
mnist_train=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=True,
    download=True,transform=transforms.ToTensor())
mnist_test=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=False,
    download=True,transform=transforms.ToTensor())

In [4]:
mnist_train[0][0].shape

torch.Size([1, 28, 28])

In [116]:
x=torch.tensor([[1,2,3],[4,5,6]],dtype=torch.float)
x.sum(dim=0,keepdim=True),x.sum(dim=1,keepdim=True),x.sum()

(tensor([[5., 7., 9.]]), tensor([[ 6.],
         [15.]]), tensor(21.))

In [42]:
def softmax(x):
    x_exp=x.exp()
    partition=x_exp.sum(dim=1,keepdim=True)
    return x_exp / partition

In [49]:
x=torch.rand((2,5))
x_prob=softmax(x)
print(x_prob,'\n',x_prob.sum(dim=1))

tensor([[0.1517, 0.1776, 0.1652, 0.3058, 0.1998],
        [0.1770, 0.1462, 0.1900, 0.2445, 0.2423]]) 
 tensor([1., 1.])


In [117]:
def net(x):
    return softmax(torch.mm(x.view((-1,num_inputs)),w)+b)

In [93]:
y_hat=torch.tensor([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y=torch.LongTensor([0,2])
y_hat.gather(1,y.view(-1,1))

tensor([[0.1000],
        [0.5000]])

In [118]:
def cross_entropy(y_hat,y):
    return -torch.log(y_hat.gather(1,y.view(-1,1)))

In [95]:
y,y_hat

(tensor([0, 2]), tensor([[0.1000, 0.3000, 0.6000],
         [0.3000, 0.2000, 0.5000]]))

In [94]:
cross_entropy(y_hat,y)

tensor([[2.3026],
        [0.6931]])

In [97]:
np.log(0.5)

-0.6931471805599453

In [119]:
def accuracy(y_hat,y):
    return (y_hat.argmax(dim=1) ==y).float().mean().item()

In [59]:
print(accuracy(y_hat,y))

0.5


In [120]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for x, y in data_iter:
        acc_sum += (net(x).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [109]:
print(evaluate_accuracy(test_iter,net))

0.1


In [121]:
def sgd(x,lm,params):
    for p in params:
        p.data -= lm * p.grad /len(x)

In [148]:
num_epochs, lr = 5, 0.1


def train_ch3(net,
              train_iter,
              test_iter,
              loss,
              num_epochs,
              batch_size,
              params=None,
              lr=None,
              optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for x, y in train_iter:
            y_hat = net(x)
            l = loss(y_hat, y).sum()
#             print('-----------')
#             print('y_hat: \n',y_hat)
#             print('y: \n',y)
#             print(l.item())
#             print('-----------')
            
            if params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
#             if optimizer is None:
            sgd(x, lr, params)

            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d ,loss %.4f,train acc %.3f,test acc %.3f' %
              (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

In [149]:
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size,
          [w, b], lr)

epoch 1 ,loss 0.4728,train acc 0.841,test acc 0.828
epoch 2 ,loss 0.4653,train acc 0.843,test acc 0.824
epoch 3 ,loss 0.4582,train acc 0.846,test acc 0.832
epoch 4 ,loss 0.4524,train acc 0.847,test acc 0.819
epoch 5 ,loss 0.4477,train acc 0.848,test acc 0.833


In [150]:
num_inputs=784
num_outputs=10
w=torch.tensor(np.random.normal(0,0.01,(num_inputs,num_outputs)),dtype=torch.float)
b=torch.zeros(num_outputs,dtype=torch.float)
w.requires_grad_(True)
b.requires_grad_(True)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [151]:
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size,
          [w, b], lr)

epoch 1 ,loss 0.7826,train acc 0.749,test acc 0.787
epoch 2 ,loss 0.5702,train acc 0.813,test acc 0.809
epoch 3 ,loss 0.5242,train acc 0.828,test acc 0.817
epoch 4 ,loss 0.5008,train acc 0.833,test acc 0.814
epoch 5 ,loss 0.4854,train acc 0.836,test acc 0.826


Predict

In [152]:
def get_fashion_mnist_labels(labels):
    text_labels = [
        't-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt',
        'sneaker', 'bag', 'ankleboot'
    ]
    return [text_labels[int(i)] for i in labels]

In [153]:
import matplotlib.pyplot as plt
%matplotlib auto

Using matplotlib backend: Qt5Agg


In [156]:
x,y=iter(test_iter).next()
true_labels=get_fashion_mnist_labels(y.numpy())
pred_labels=get_fashion_mnist_labels(net(x).argmax(dim=1).numpy())
titles=[true+'\n'+pred for true,pred in zip(true_labels,pred_labels)]

_,axs=plt.subplots(1,10,figsize=(12,12))
for ax1,x1,y1 in zip(axs,x[:10],titles[:10]):
    ax1.imshow(x1.view(28,28).numpy())
    ax1.set_title(y1)
plt.show()    
    
    
