<a href="https://colab.research.google.com/github/anjal-ai/PyTorch/blob/master/08_ann_fashion_mnist_pytorch_in_gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x7b6ca4791f70>

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # normalize to [-1, 1]
])

In [5]:
train_dataset = datasets.FashionMNIST(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

test_dataset = datasets.FashionMNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

100%|██████████| 26.4M/26.4M [00:02<00:00, 10.9MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 169kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.15MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 14.4MB/s]


In [6]:
train_dataset.__len__()

60000

In [7]:
test_dataset.__len__()

10000

In [8]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, pin_memory=True)

In [9]:
class Model(nn.Module):
  def __init__(self, in_features):
    super().__init__()
    self.network = nn.Sequential(
        nn.Linear(784, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 10)
    )

  def forward(self, X):
    return self.network(X)

In [10]:
# set learning rate and epochs
learning_rate = 0.01
epochs = 100

In [12]:
# instatiate the model
model = Model(28*28).to(device)

# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [13]:
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    for batch_feature, batch_label in train_loader:
        batch_feature = batch_feature.view(batch_feature.size(0), -1).to(device)  # flatten to [B, 784]
        batch_label = batch_label.to(device)

        pred = model(batch_feature)
        loss = criterion(pred, batch_label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1:02d}, Loss: {total_loss/len(train_loader):.4f}")

Epoch 01, Loss: 1.3419
Epoch 02, Loss: 0.6681
Epoch 03, Loss: 0.5736
Epoch 04, Loss: 0.5242
Epoch 05, Loss: 0.4924
Epoch 06, Loss: 0.4698
Epoch 07, Loss: 0.4529
Epoch 08, Loss: 0.4399
Epoch 09, Loss: 0.4289
Epoch 10, Loss: 0.4186
Epoch 11, Loss: 0.4105
Epoch 12, Loss: 0.4022
Epoch 13, Loss: 0.3953
Epoch 14, Loss: 0.3889
Epoch 15, Loss: 0.3831
Epoch 16, Loss: 0.3774
Epoch 17, Loss: 0.3722
Epoch 18, Loss: 0.3665
Epoch 19, Loss: 0.3629
Epoch 20, Loss: 0.3576
Epoch 21, Loss: 0.3542
Epoch 22, Loss: 0.3498
Epoch 23, Loss: 0.3454
Epoch 24, Loss: 0.3421
Epoch 25, Loss: 0.3376
Epoch 26, Loss: 0.3349
Epoch 27, Loss: 0.3316
Epoch 28, Loss: 0.3276
Epoch 29, Loss: 0.3252
Epoch 30, Loss: 0.3216
Epoch 31, Loss: 0.3185
Epoch 32, Loss: 0.3156
Epoch 33, Loss: 0.3127
Epoch 34, Loss: 0.3101
Epoch 35, Loss: 0.3072
Epoch 36, Loss: 0.3041
Epoch 37, Loss: 0.3017
Epoch 38, Loss: 0.2990
Epoch 39, Loss: 0.2968
Epoch 40, Loss: 0.2945
Epoch 41, Loss: 0.2922
Epoch 42, Loss: 0.2894
Epoch 43, Loss: 0.2874
Epoch 44, L

In [14]:
# set model to eval mode
model.eval()

Model(
  (network): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [15]:
total = 0
correct = 0

with torch.no_grad():
  for batch_features, batch_labels in test_loader:
    batch_features = batch_feature.to(device)
    batch_labels = batch_label.to(device)
    outputs = model(batch_features)
    _, predicted = torch.max(outputs, 1)
    total = total + batch_labels.shape[0]
    correct = correct + (predicted == batch_labels).sum().item()
print(correct/total)

0.9791666666666666


In [19]:
print(model)

Model(
  (network): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)


In [20]:
from torchsummary import summary
summary(model, input_size=(784,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 128]         100,480
              ReLU-2                  [-1, 128]               0
            Linear-3                   [-1, 64]           8,256
              ReLU-4                   [-1, 64]               0
            Linear-5                   [-1, 10]             650
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.42
Estimated Total Size (MB): 0.42
----------------------------------------------------------------
