In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


In [3]:
train_data = datasets.FashionMNIST(
                                    root='data',
                                    download='True',
                                    train='True',
                                    transform=ToTensor(),

)

test_data = datasets.FashionMNIST(
                                    root='data',
                                    download='True',
                                    train='False',
                                    transform=ToTensor(),

)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:03<00:00, 7862749.50it/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 1212914.27it/s]


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 5077844.67it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 9863991.32it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw






In [4]:
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

#### Build a Neural Network architecture

In [11]:
class NeuralNetwork(nn.Module):

    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
                                                nn.Linear(28*28, 512),
                                                nn.ReLU(),
                                                nn.Linear(512, 512),
                                                nn.ReLU(),
                                                nn.Linear(512, 10)
                                            )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


In [12]:
model = NeuralNetwork()

### Hyperparameters

In [18]:
learning_rate = 2e-3
batch_size = 64
epochs = 3


### Loss Function

In [29]:
loss_fn = nn.CrossEntropyLoss()

### Optimizer to update the model params

In [19]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

### 3 Steps of optimizationm

- Reset the gradients of model params optimizer.zero_grad()
- Backpropagate the prediction loss with loss.backward(). Record gradient of loss wrt each param.
- Update the params with gradients collected in backward pass optimizer.step()

### Full Training Loop

In [30]:
def train_loop(dataloader, model, loss_fn, optimizer):

    size = len(dataloader.dataset)
    for  batch, (X, y) in enumerate(dataloader):

        #Prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        #Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print loss every 100 epochs
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f'loss: {loss}', f'current: {current}')


In [32]:
def test_loop(dataloader, model, loss_fn):

    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct  = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1)==y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f'Accuracy: {100*correct}', f'Avg loss: {test_loss}')



#### Initialise the loss and optimizer, and pass to train and test loop

In [33]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

epochs = 10
for i in range(epochs):
    print(f'Epoch::: {i+1}\n---------------')
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)

print('training done!!!')


Epoch::: 1
---------------
loss: 1.8821470737457275 current: 0
loss: 1.8111536502838135 current: 6400
loss: 1.638181447982788 current: 12800
loss: 1.6668294668197632 current: 19200
loss: 1.5005906820297241 current: 25600
loss: 1.4504729509353638 current: 32000
loss: 1.43965482711792 current: 38400
loss: 1.3389925956726074 current: 44800
loss: 1.343194603919983 current: 51200
loss: 1.2226999998092651 current: 57600
Accuracy: 63.54666666666666 Avg loss: 1.233265733413859
Epoch::: 2
---------------
loss: 1.3171625137329102 current: 0
loss: 1.2770317792892456 current: 6400
loss: 1.0923062562942505 current: 12800
loss: 1.198470115661621 current: 19200
loss: 1.048308253288269 current: 25600
loss: 1.070151686668396 current: 32000
loss: 1.0853731632232666 current: 38400
loss: 1.0226857662200928 current: 44800
loss: 1.0491219758987427 current: 51200
loss: 0.9650514125823975 current: 57600
Accuracy: 66.875 Avg loss: 0.9678520476385983
Epoch::: 3
---------------
loss: 1.0346461534500122 current: 

### Save and Load torch model

In [34]:
import torchvision.models as models

In [35]:
model = models.vgg16(pretrained=True)
torch.save(model.state_dict(), 'model_weights.pth')

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /Users/vivek.rawat/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:10<00:00, 53.7MB/s] 


### Load model

In [36]:
model = models.vgg16()

model.load_state_dict(torch.load('model_weights.pth'))
model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [37]:
torch.save(model, 'model.pth')

model = torch.load('model.pth')