In [1]:
import time
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms 

In [10]:
# hyper params
batch_size = 256
learning_rate = 1e-2
num_epochs = 100

In [11]:
# downlaod Fashion MNIST dataset
train_dataset = datasets.FashionMNIST(
    root='./datasets', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.FashionMNIST(
    root='./datasets', train=False, transform=transforms.ToTensor())

In [12]:
# using dataset to build data loader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [13]:
# check about data
len(train_dataset), len(train_loader)
# for one epoch, we train all the data once. In one epoch we use "for" to data loader to traverse all batchs.
# Here one epoch we train 60000 records, in one epoch we run 938 batchs, each batch contains 64 records(batch size)

(60000, 235)

In [16]:
# logistic regression model
class logistic_regression(nn.Module):
    def __init__(self, in_dim, n_class):
        super(logistic_regression, self).__init__()
        self.fc = nn.Linear(in_dim, n_class)

    def forward(self, x):
        out = self.fc(x)
        # out = torch.sigmoid(out)
        return out

In [17]:
if __name__ == "__main__":
    # init 
    model = logistic_regression(28 * 28, 10)
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        model = model.cuda()
    criterion = nn.CrossEntropyLoss() # loss function
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    # training
    print("Begin training...")

    for epoch in range(num_epochs):
        print("*" * 10)
        print(f"epoch {epoch + 1}")
        since = time.time()
        running_loss = 0.
        running_acc = 0.

        model.train()
        for i, data in enumerate(train_loader):
            img, label = data
            img = img.view(img.size(0), -1)
            if use_gpu:
                img = img.cuda()
                label = label.cuda()

            # forward
            out = model(img)
            loss = criterion(out, label)
            running_loss += loss.item()
            _, pred = torch.max(out, 1)
            running_acc += (pred == label).float().sum()

            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # log
            if (i+1) % 50 == 0:
                print(f"Finish [{i+1} / {len(train_loader)}] batch, accuracy = {(pred == label).float().mean()}")
        print(f"Finish [{epoch+1} / {num_epochs}], running loss = {running_loss}, accuracy = {running_acc / len(train_dataset)}")


Begin training...
**********
epoch 1
Finish [50 / 235] batch, accuracy = 0.62109375
Finish [100 / 235] batch, accuracy = 0.625
Finish [150 / 235] batch, accuracy = 0.69921875
Finish [200 / 235] batch, accuracy = 0.7109375
Finish [1 / 100], running loss = 321.83249312639236, accuracy = 0.6301666498184204
**********
epoch 2
Finish [50 / 235] batch, accuracy = 0.6796875
Finish [100 / 235] batch, accuracy = 0.71875
Finish [150 / 235] batch, accuracy = 0.703125
Finish [200 / 235] batch, accuracy = 0.75
Finish [2 / 100], running loss = 215.75921231508255, accuracy = 0.7149166464805603
**********
epoch 3
Finish [50 / 235] batch, accuracy = 0.7578125
Finish [100 / 235] batch, accuracy = 0.7734375
Finish [150 / 235] batch, accuracy = 0.75
Finish [200 / 235] batch, accuracy = 0.73828125
Finish [3 / 100], running loss = 189.03332167863846, accuracy = 0.7481499910354614
**********
epoch 4
Finish [50 / 235] batch, accuracy = 0.74609375
Finish [100 / 235] batch, accuracy = 0.7421875
Finish [150 / 23

In [23]:
for i in train_loader:
    print(i)
    break

[tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0039],
          ...,
          [0.0000, 0.0000, 0.2039,  ..., 0.7137, 0.7961, 0.3137],
          [0.0000, 0.0000, 0.0000,  ..., 0.3608, 0.3686, 0.0314],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]],


        [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]],


        [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  .

In [32]:
i[0].numpy().shape, i[1].numpy().shape
# batch size 64

((64, 1, 28, 28), (64,))

In [36]:
i[0].view(64, -1).shape

torch.Size([64, 784])