# Inference and Validation

We can use the trained network to make a prediction which is typically called **inference**. However, neural network have a tendency to perform well on training data and not generalize on unseen data. This problem is known as **overfitting**. To test the neural network for overfiting, we measure the performance on data not in the training set called the **validation** set

In [15]:
import torch
from torchvision import datasets, transforms


transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])
# Download and load the training data
trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)


# Download and load the test data
testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)


In [16]:
from torch import nn, optim
import torch.nn.functional as F

class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        
    def forward(self, x):
        
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x))
    
        return x
        
        


In [17]:
model = Classifier()

images, label = next(iter(trainloader))

ps = torch.exp(model(images))

print(ps[0:3])

#TopK returns the index of largest k element. When we pass 1, it will return one index which is the highest element of Tensor.
top_probabilities, top_classses = ps.topk(1, dim = 1)

print(top_class[0:3])
print(top_p[0:3])



tensor([[0.0966, 0.0916, 0.1085, 0.1042, 0.1115, 0.0905, 0.0950, 0.1109, 0.0954,
         0.0958],
        [0.0949, 0.0914, 0.1051, 0.1041, 0.1171, 0.0914, 0.0946, 0.1094, 0.0934,
         0.0988],
        [0.0962, 0.0915, 0.1067, 0.1039, 0.1097, 0.0905, 0.0971, 0.1102, 0.0989,
         0.0953]], grad_fn=<SliceBackward>)
tensor([[9],
        [9],
        [9]])
tensor([[0.1153],
        [0.1149],
        [0.1176]], grad_fn=<SliceBackward>)


Now we check if the predicted classes match the labels. This is as simple as equating the top_class and labels but we need to be careful with shapes. Top class is 2D tensor with (64, 1) while labels is 1D with shape (64). TO get the equatiy work, we need them to have same shape which can be done with the following trick.

**equals = top_class == label.view(*top_class.shape)**


In [18]:
equals.shape

torch.Size([64, 1])

In [21]:
label.shape

torch.Size([64])

In [22]:
equals = top_class == label.view(*top_class.shape)

In [25]:
equals.type(torch.FloatTensor)

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])

Now, we can use the above tensor to calculate accuracy just by taking the mean. 

In [26]:
accuracy = torch.mean(equals.type(torch.FloatTensor))
print('Accuracy is', accuracy.item()*100, '%')

Accuracy is 9.375 %


In [None]:
model = Classifier()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.003)
epoch = 30
steps = 0

train_losses, test_losses = [], []

for e in range(epoch):
    running_loss = 0    
    for images, label in trainloader:
        
        optimizer.zero_grad()
        
        log_ps = model(images)
        loss = criterion(log_ps, label)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        
        test_loss = 0
        accuracy = 0
        
        with torch.no_grad():
            for images, labels in testloader:
                log_ps = model(images)
                
                test_loss += criterion(log_ps, labels)
                
                ps = torch.exp(log_ps)
                
                top_p, top_class = ps.topk(1, dim= 1)
                
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
            

