In [1]:
import torch
from torch import nn
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torch import optim
import time

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
# 模型的构建
lenet = nn.Sequential(
    nn.Conv2d(1,6,kernel_size=5, padding=2),
    nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5),
    nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120),
    nn.Sigmoid(),
    nn.Linear(120,84),
    nn.Sigmoid(),
    nn.Linear(84,10)
)

In [16]:
X = torch.rand(size=(1,1,28,28), dtype=torch.float32)
for layer in lenet:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape: \t', X.shape)

Conv2d output shape: 	 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([1, 6, 14, 14])
Conv2d output shape: 	 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([1, 16, 5, 5])
Flatten output shape: 	 torch.Size([1, 400])
Linear output shape: 	 torch.Size([1, 120])
Sigmoid output shape: 	 torch.Size([1, 120])
Linear output shape: 	 torch.Size([1, 84])
Sigmoid output shape: 	 torch.Size([1, 84])
Linear output shape: 	 torch.Size([1, 10])


In [4]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=False,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=False,
    transform=ToTensor(),
)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [5]:
batch_size = 128

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)


In [6]:
def test_model(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batchs = len(dataloader)
    model.eval()
    test_loss, test_correct = 0, 0

    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            pred = model(inputs)
            test_loss += loss_fn(pred, targets).item()
            test_correct += (pred.argmax(1) == targets).type(torch.float).sum().item()
    test_loss /= num_batchs
    test_correct /= size

    return test_loss, test_correct

In [16]:
def train_model(model, train_dataloader, test_dataloader,creterion, optimizer, scheduler=None, epoch_num=10):
    # 初始化
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform(m.weight)
    model.apply(init_weights)
    print('training on ', device)
    model.to(device)

    train_size = len(train_dataloader.dataset)
    num_batchs = len(train_dataloader)

    for epoch in range(epoch_num):
        since_time = time.time()
        train_loss, train_correct = 0, 0
        model.train()
        
        for batch, (inputs, targets) in enumerate(train_dataloader):
            inputs = inputs.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()

            pred = model(inputs)
            loss = creterion(pred, targets)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_correct += (pred.argmax(1) == targets).type(torch.float).sum().item()
        
        end_time = time.time()
        train_loss /= num_batchs
        train_correct /= train_size
        _,test_correct = test_model(test_dataloader, model, creterion)
        print(f"epoch: {epoch+1},\t\
                train_loss: {train_loss:>3f},\t\
                train_correct:{train_correct:>3f},\t\
                test_correct:{test_correct:>3f},\t\
                test time:{(end_time - since_time):>3f}")

    

In [17]:
creterion = torch.nn.CrossEntropyLoss()

optimizer = optim.SGD(lenet.parameters(), lr=0.9)

epoch_nums = 10

train_model(lenet, train_dataloader, test_dataloader,creterion, optimizer, epoch_num=epoch_nums)

  """


training on  cuda:0
epoch: 1,	                train_loss: 2.160958,	                train_correct:0.164517,	                test_correct:0.532300,	                test time:8.987120
epoch: 2,	                train_loss: 0.895872,	                train_correct:0.639300,	                test_correct:0.659900,	                test time:8.921419
epoch: 3,	                train_loss: 0.650022,	                train_correct:0.744917,	                test_correct:0.738800,	                test time:8.824039
epoch: 4,	                train_loss: 0.553729,	                train_correct:0.785933,	                test_correct:0.783000,	                test time:8.830374
epoch: 5,	                train_loss: 0.494288,	                train_correct:0.813000,	                test_correct:0.818300,	                test time:8.793091
epoch: 6,	                train_loss: 0.455769,	                train_correct:0.829000,	                test_correct:0.832900,	                test time:8.742310
epoch: 7