In [8]:
import torch
from torch import nn
import torchvision
from torch.utils import data
from torchvision import transforms
import matplotlib.pyplot as plt
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'

In [9]:
batch_size = 256
trans = transforms.ToTensor()
mnist_train = torchvision.datasets.FashionMNIST(root='D:\桌面\pyTorch\data', train=True, transform=trans, download=False)
mnist_test = torchvision.datasets.FashionMNIST(root='D:\桌面\pyTorch\data', train=False, transform=trans, download=False)
train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4)
test_iter = data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=4)

In [21]:
num_inputs = 28 * 28
num_outputs = 10
num_hiddens = 256

W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01)
b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True) * 0.01)
b2 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True))

params = [W1, b1, W2, b2]
params

[Parameter containing:
 tensor([[ 0.0280, -0.0095,  0.0028,  ..., -0.0146,  0.0120, -0.0131],
         [ 0.0066,  0.0016,  0.0005,  ...,  0.0016,  0.0043,  0.0058],
         [ 0.0017,  0.0143, -0.0135,  ..., -0.0027,  0.0023,  0.0040],
         ...,
         [-0.0020,  0.0052,  0.0163,  ..., -0.0046,  0.0300, -0.0017],
         [ 0.0153,  0.0084,  0.0124,  ..., -0.0105,  0.0132,  0.0062],
         [ 0.0199,  0.0079,  0.0034,  ...,  0.0130,  0.0014,  0.0019]],
        requires_grad=True),
 Parameter containing:
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [22]:
## 0 - 1
def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)
x[0,0] = -1
x, relu(x)

(tensor([[-1,  2,  3],
         [ 4,  5,  6]]),
 tensor([[0, 2, 3],
         [4, 5, 6]]))

In [14]:
def net(X):
    X = X.reshape((-1, num_inputs))
    H = relu(X@W1 + b1)
    return (H@W2 + b2)
loss = nn.CrossEntropyLoss(reduction='none')

In [23]:
#device = torch.device('cuda')
#net = net.to(device)
class Accumulator:
    def __init__(self, n):
        self.data = [0.0] * n
    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]
    def reset(self):
        self.data = [0.0] * len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]
def accuracy(y_hat, y):
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())

def evaluate_accuracy(net, data_iter):
    if isinstance(net, torch.nn.Module):
        net.eval()
    metric = Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    
    return metric[0] / metric[1]
num_epochs = 10
lr = 0.1
updater = torch.optim.SGD(params, lr=lr)
for epoch in range(num_epochs):
    #net.train()
    metric = Accumulator(3)
    for X, y in train_iter:

        #X = X.to(device)
        #y = y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        updater.zero_grad()
        l.mean().backward()
        #l.sum().backward()
        updater.step()
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    train_metrics = metric[0] / metric[2], metric[1] / metric[2]
    train_loss, train_acc = train_metrics
    print(f'loss:{train_loss}, acc:{train_acc}')

loss:1.036898611577352, acc:0.6375666666666666
loss:0.5987261549631755, acc:0.7904166666666667
loss:0.5170480818430583, acc:0.8187
loss:0.47896006870269775, acc:0.83215
loss:0.4564180746714274, acc:0.83975
loss:0.43108238143920896, acc:0.8493
loss:0.4182767381032308, acc:0.8536166666666667
loss:0.4058627873102824, acc:0.8579166666666667
loss:0.39535801480611166, acc:0.8607833333333333
loss:0.38394451745351155, acc:0.8646333333333334


In [26]:
# 高级API CPU
import time
net = nn.Sequential(nn.Flatten(),
                    nn.Linear(28*28, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10))

def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights)

batch_size = 256
lr = 0.1
num_epochs = 10
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=lr)
start = time.time()
for epoch in range(num_epochs):
    #net.train()
    metric = Accumulator(3)
    for X, y in train_iter:

        #X = X.to(device)
        #y = y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        trainer.zero_grad()
        l.mean().backward()
        #l.sum().backward()
        trainer.step()
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    train_metrics = metric[0] / metric[2], metric[1] / metric[2]
    train_loss, train_acc = train_metrics
    print(f'loss:{train_loss}, acc:{train_acc}')
print('CPU cost time:', time.time() - start)

loss:1.0434727289835612, acc:0.6387
loss:0.5982210199991862, acc:0.7891166666666667
loss:0.5177928411483764, acc:0.81795
loss:0.47709314778645834, acc:0.8314666666666667
loss:0.4526896909713745, acc:0.8391666666666666
loss:0.4343919444402059, acc:0.8462666666666666
loss:0.4173558848063151, acc:0.8531
loss:0.4060263474146525, acc:0.8564833333333334
loss:0.3932321421941121, acc:0.8609833333333333
loss:0.3833125560760498, acc:0.8654
CPU cost time: 70.77964067459106


In [28]:
# 高级API GPU
device = torch.device('cuda')

import time
net = nn.Sequential(nn.Flatten(),
                    nn.Linear(28*28, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10))
net = net.to(device)
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights)

batch_size = 256
lr = 0.1
num_epochs = 10
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=lr)
start = time.time()
for epoch in range(num_epochs):
    #net.train()
    metric = Accumulator(3)
    for X, y in train_iter:
        X = X.to(device)
        y = y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        trainer.zero_grad()
        l.mean().backward()
        #l.sum().backward()
        trainer.step()
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    train_metrics = metric[0] / metric[2], metric[1] / metric[2]
    train_loss, train_acc = train_metrics
    print(f'loss:{train_loss}, acc:{train_acc}')
print('GPU cost time:', time.time() - start)

loss:1.037903574625651, acc:0.6416666666666667
loss:0.5977546078999837, acc:0.7898
loss:0.5200334239959716, acc:0.81805
loss:0.4790892908732096, acc:0.8305333333333333
loss:0.4503727414449056, acc:0.8411333333333333
loss:0.43004126262664794, acc:0.8490333333333333
loss:0.4187354042371114, acc:0.8526333333333334
loss:0.40321351121266685, acc:0.8574666666666667
loss:0.39181247444152834, acc:0.8615
loss:0.38217796007792154, acc:0.8646166666666667
GPU cost time: 64.6415102481842
