In [8]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms




class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784,300)
        self.fc2 = nn.Linear(300,100)
        self.fc3 = nn.Linear(100,10)
        
    def forward(self,x):
#         print(x.shape)
        x = x.view(10,-1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x) 
#         print(x)
        return F.log_softmax(x,dim=1)
        

In [9]:

def train(log_interval, model, device, train_loader, optimizer, epoch):
    model.train(mode = True)
    stasiscounter =0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
#         print(output.shape)
#         print(target.shape)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    return loss.item()

In [10]:
no_classes = 10

def test(args, model, device, test_loader):
    model.train(mode=False)
    model.eval()
    confusion_matrix=torch.zeros(no_classes,no_classes)
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for i ,(data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            for t,p in zip(target.view(-1),pred.view(-1)):
                confusion_matrix[t.long(),p.long()] +=1
            
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    
    accuracy = 100. * correct / len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return test_loss, confusion_matrix, accuracy

In [11]:
def per_class_accuracy(confusion_matrix):
    return (confusion_matrix.diag()/confusion_matrix.sum(1))

In [12]:
def rndsplit_simple(x,y):
    
    numtr = x.shape[0]//2
    
    inds=np.arange(y.size)
    np.random.shuffle(inds)

    xtr=x[inds[0:numtr],:]
    ytr=y[inds[0:numtr]]

    xv=x[inds[numtr:],:]
    yv=y[inds[numtr:]]

    return xtr,ytr,xv,yv

In [13]:
def main():
    batch_size = 10
    test_batch_size = 10
    epochs = 10
    lr = 0.01
    momentum = 0.001
    log_interval=1000
    save_model = False
    device = torch.device("cpu")
    
    kwargs = {}
    
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=test_batch_size, shuffle=True, **kwargs)
    
    
    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    
    best_confusion_matrix = []
    least_loss = 10000 #best loss
    train_loss=[]
    val_loss=[]
    best_accuracy = 0
    for epoch in range(1, epochs + 1):
        train_loss.append(train(log_interval, model, device, train_loader, optimizer, epoch))
        test_loss, confusion_matrix, accuracy = test(log_interval, model, device, test_loader)
        val_loss.append(test_loss)
        if test_loss < least_loss:
            least_loss = test_loss
            best_epoch=epoch
            best_confusion_matrix = confusion_matrix
            best_accuracy = accuracy
    if (save_model):
        torch.save(model.state_dict(),"FashionMnist_cnn.pt")
        
#     per_class_accuracy = per_class_accuracy(best_confusion_matrix)
    return best_confusion_matrix, least_loss,best_epoch,train_loss,val_loss, best_accuracy
    
    

In [14]:
best_confusion_matrix,least_loss,best_epoch,train_losses, valid_losses,best_accuracy = main()



Test set: Average loss: 0.1541, Accuracy: 9527/10000 (95%)


Test set: Average loss: 0.1081, Accuracy: 9659/10000 (97%)


Test set: Average loss: 0.0991, Accuracy: 9717/10000 (97%)


Test set: Average loss: 0.0777, Accuracy: 9743/10000 (97%)


Test set: Average loss: 0.0730, Accuracy: 9753/10000 (98%)


Test set: Average loss: 0.0664, Accuracy: 9791/10000 (98%)


Test set: Average loss: 0.0639, Accuracy: 9797/10000 (98%)


Test set: Average loss: 0.0658, Accuracy: 9798/10000 (98%)


Test set: Average loss: 0.0664, Accuracy: 9796/10000 (98%)


Test set: Average loss: 0.0644, Accuracy: 9809/10000 (98%)



In [15]:
import matplotlib.pyplot as plt

plt.plot(train_losses)
plt.plot(valid_losses)

plt.title('model losses')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['training loss', 'validation loss with test set'], loc='upper right')
plt.show()


<Figure size 640x480 with 1 Axes>

In [16]:
print("best epoch: " + str(best_epoch))
print("least loss = " + str(least_loss))
print("best accuracy obtained as an average over all classes = " + str(best_accuracy))
class_accuracy = per_class_accuracy(best_confusion_matrix)
for target,accuracy in enumerate(class_accuracy):
    print('class ' + str(target)+ ' : ' + str(accuracy.item()))

# mapped = map (lambda k: (k,class_accuracy[k]),class_accuracy)
# for idx in mapped:
#     print(idx)
accuracy_rank = sorted(range(len(class_accuracy)), key=lambda k: -class_accuracy[k])
print()
print("Sorted classwise accuracies")
for idx in accuracy_rank:
    print("class " + str(idx) + ': ' + str(class_accuracy[idx].item()) )
# print("ranking of class by classwise-accuracy is", accuracy_rank)
#     print(accuracy.item())
# print("classwise accuracy: " , per_class_accuracy(best_confusion_matrix))
print('Hardest class to predict: {} with accuracy {}'.format(
                accuracy_rank[-1], class_accuracy[accuracy_rank[-1]].item()))

best epoch: 7
least loss = 0.06391231496334077
best accuracy obtained as an average over all classes = 97.97
class 0 : 0.9877551198005676
class 1 : 0.9903083443641663
class 2 : 0.9825581312179565
class 3 : 0.9831683039665222
class 4 : 0.9806517362594604
class 5 : 0.9809417128562927
class 6 : 0.9739039540290833
class 7 : 0.9678988456726074
class 8 : 0.9733059406280518
class 9 : 0.9752230048179626

Sorted classwise accuracies
class 1: 0.9903083443641663
class 0: 0.9877551198005676
class 3: 0.9831683039665222
class 2: 0.9825581312179565
class 5: 0.9809417128562927
class 4: 0.9806517362594604
class 9: 0.9752230048179626
class 6: 0.9739039540290833
class 8: 0.9733059406280518
class 7: 0.9678988456726074
Hardest class to predict: 7 with accuracy 0.9678988456726074
