In [1]:
import matplotlib.pyplot as plt
import torch
from torch import nn,optim
import torch.nn.functional as F
from torchvision import datasets, transforms

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

In [6]:
class Network(nn.Module):
    def __init__(self, input_size, output_size, hidden_sizes, p=0.5):
        super().__init__()
        self.hidden = nn.ModuleList([nn.Linear(input_size, hidden_sizes[0])])
        
        hidden_layers = zip(hidden_sizes[:-1], hidden_sizes[1:])
        self.hidden.extend(nn.Linear(h1, h2) for h1,h2 in hidden_layers)
        
        self.output = nn.Linear(hidden_sizes[-1], output_size)
        
        self.dropout = nn.Dropout(p=p)
        
    def forward(self, x):
        x = x.view(x.shape[0],-1)
        for each in self.hidden:
            x = self.dropout(F.relu(each(x)))
        
        x = self.output(x)
        x = F.log_softmax(x, dim=1)
        return x

In [7]:
model = Network(784, 10, [256,128,64])
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

In [8]:
epochs = 25

train_losses, test_losses = [],[]

for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        log_ps = model(images)
        optimizer.zero_grad()
        loss = criterion(log_ps, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss
    else:
        test_loss = 0
        accuracy = 0
        with torch.no_grad():
            model.eval()
            for images, labels in testloader:
                log_ps = model(images)
                ps = torch.exp(log_ps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                test_loss += criterion(log_ps, labels)
        model.train()
    train_losses.append(running_loss/len(trainloader))
    test_losses.append(test_loss/len(testloader))
        
    print('Epoch: {}/{}, Train_loss: {}, Test_loss: {}, Accuracy: {}'.format(e+1,epochs,
                                                                            running_loss/len(trainloader),
                                                                            test_loss/len(testloader),
                                                                            accuracy*100/len(testloader)))        

Epoch: 1/25, Train_loss: 0.8543415665626526, Test_loss: 0.5640466213226318, Accuracy: 79.49840545654297
Epoch: 2/25, Train_loss: 0.6849862933158875, Test_loss: 0.5042765140533447, Accuracy: 82.1158447265625
Epoch: 3/25, Train_loss: 0.6574461460113525, Test_loss: 0.5122542977333069, Accuracy: 81.8869400024414
Epoch: 4/25, Train_loss: 0.6459218859672546, Test_loss: 0.47910070419311523, Accuracy: 82.52388763427734
Epoch: 5/25, Train_loss: 0.6398714780807495, Test_loss: 0.4938146471977234, Accuracy: 82.60350036621094
Epoch: 6/25, Train_loss: 0.6210704445838928, Test_loss: 0.4684646427631378, Accuracy: 84.13614654541016
Epoch: 7/25, Train_loss: 0.6311630606651306, Test_loss: 0.4866037964820862, Accuracy: 83.3996810913086
Epoch: 8/25, Train_loss: 0.6317572593688965, Test_loss: 0.4733387231826782, Accuracy: 83.29020690917969
Epoch: 9/25, Train_loss: 0.6231968998908997, Test_loss: 0.4892164468765259, Accuracy: 82.71297454833984
Epoch: 10/25, Train_loss: 0.6168392896652222, Test_loss: 0.4708630

Saving model

In [11]:
checkpoint = {'input_size': 784,
             'output_size': 10,
             'hidden_sizes': [each.out_features for each in model.hidden],
             'state_dict': model.state_dict()}

torch.save(checkpoint, 'checkpoint.pth')

Loading back a model

In [12]:
def load_model(path):
    check = torch.load(path)
    model = Network(check['input_size'], check['output_size'], check['hidden_sizes'])
    model.load_state_dict(check['state_dict']) 
    return model

In [13]:
print(model)
loaded_model = load_model('checkpoint.pth')
print(loaded_model)

Network(
  (hidden): ModuleList(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=128, bias=True)
    (2): Linear(in_features=128, out_features=64, bias=True)
  )
  (output): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.5)
)
Network(
  (hidden): ModuleList(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=128, bias=True)
    (2): Linear(in_features=128, out_features=64, bias=True)
  )
  (output): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.5)
)
