In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

import torchvision.datasets as datasets
import torchvision.transforms as transforms

from scipy.stats import mode

from matplotlib import pyplot as plt

In [2]:
class NNwithDropout(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, p=0.0, use_cuda=False):
        super(NNwithDropout, self).__init__()
        self.p = p
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
                
        if use_cuda:
            self.cuda()
            
        self.use_cuda = use_cuda
        
    def forward(self, x):
        output = self.fc1(x)
        output = F.relu(F.dropout(output, p=self.p, training=True))
        output = self.out(output)
        return output

In [6]:
def to_vector(t):
    return t.view(-1,28*28)

In [9]:
batch_size = 128
use_cuda = True

kwargs = {'num_workers': 1, 'pin_memory': use_cuda, 'batch_size': batch_size}

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(
        './mnist-data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ])
    ),
    shuffle=True,
    **kwargs
)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(
        './mnist-data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ])
    ),
    shuffle=False,
    **kwargs
)

In [238]:
net = NNwithDropout(28*28, 1024, 10, p=0.5, use_cuda=use_cuda)

In [239]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1)

In [241]:
for epoch in range(30):
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        images, labels = data
        images = images.view(-1,28*28) 
        if use_cuda:
            images = images.cuda()
            labels = labels.cuda()
        optimizer.zero_grad()
        
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    print('Loss after Ep. {}: {}'.format(epoch, running_loss))
    running_loss = 0.0
        
        #if i % 100 == 99:
        #    print('[{}, {}] loss: {}'.format(epoch, i, running_loss/100))
        #    running_loss = 0.0

Loss after Ep. 0: 45.80068784579635
Loss after Ep. 1: 43.029464315623045
Loss after Ep. 2: 40.64955406636
Loss after Ep. 3: 38.087418584153056
Loss after Ep. 4: 35.977309830486774
Loss after Ep. 5: 34.169311521574855
Loss after Ep. 6: 32.68178976140916
Loss after Ep. 7: 30.914681425318122
Loss after Ep. 8: 30.36680606007576
Loss after Ep. 9: 28.714897906407714
Loss after Ep. 10: 27.627061719074845
Loss after Ep. 11: 26.463005805388093
Loss after Ep. 12: 25.56579397059977
Loss after Ep. 13: 24.21134540066123
Loss after Ep. 14: 23.456627294421196
Loss after Ep. 15: 22.99835648946464
Loss after Ep. 16: 21.996352955698967
Loss after Ep. 17: 21.128285998478532
Loss after Ep. 18: 20.520638460293412
Loss after Ep. 19: 20.415224539116025
Loss after Ep. 20: 19.50062849279493
Loss after Ep. 21: 18.60268199443817
Loss after Ep. 22: 18.01242395211011
Loss after Ep. 23: 17.389047322794795
Loss after Ep. 24: 16.90783824585378
Loss after Ep. 25: 16.30215534940362
Loss after Ep. 26: 16.50446315575391


In [260]:
# accuracy on train set
num_samples = 50

train_outputs = []
train_labels = []

train_predictions = []
for data in train_loader:
    img, lbl = data
    train_labels.append(lbl)
    current_prediction = []
    img = img.view(-1, 28*28).cuda()
    with torch.no_grad():
        [current_prediction.append(net(img).cpu()) for _ in range(num_samples)]
    train_predictions.append(torch.stack(current_prediction).transpose(0,1))
    
train_predictions = torch.cat(train_predictions)
train_labels = torch.cat(train_labels)
predicted_train_labels = train_predictions.mean(1).argmax(1)
train_accuracy = (train_labels == predicted_train_labels).sum().float() / len(train_labels)

print('Accuracy on train set with {} samples for prediction: {}'.format(num_samples, train_accuracy))

Accuracy on train set with 50 samples for prediction: 0.996483325958252


In [261]:
# accuracy on test set
num_samples = 50

test_outputs = []
test_labels = []

test_predictions = []
for data in test_loader:
    img, lbl = data
    test_labels.append(lbl)
    current_prediction = []
    img = img.view(-1, 28*28).cuda()
    with torch.no_grad():
        [current_prediction.append(net(img).cpu()) for _ in range(num_samples)]
    test_predictions.append(torch.stack(current_prediction).transpose(0,1))
    
test_predictions = torch.cat(test_predictions)
test_labels = torch.cat(test_labels)
predicted_test_labels = test_predictions.mean(1).argmax(1)
test_accuracy = (test_labels == predicted_test_labels).sum().float() / len(test_labels)

print('Accuracy on test set with {} samples for prediction: {}'.format(num_samples, test_accuracy))

Accuracy on test set with 50 samples for prediction: 0.9829000234603882


In [269]:
train_predictions[1].std(0)

tensor([0.8238, 0.6741, 0.9447, 1.0293, 0.9717, 1.1997, 1.0119, 1.0451, 0.9248,
        1.0264])