In [16]:
import torch
import torchvision

batch_size_train = 4

trainloader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST(root='../data', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


use_gpu = torch.cuda.is_available()

class BernoulliDropout(nn.Module):
    def __init__(self, p=0.5):
        super(BernoulliDropout, self).__init__()
        if p < 0 or p > 1:
            raise ValueError("dropout probability has to be between 0 and 1, "
                             "but got {}".format(p))
        self.p = p
        
    def forward(self, x):
        binomial = torch.distributions.binomial.Binomial(probs=self.p)
        return x * binomial.sample(x.size())

class GaussianDropout(nn.Module):
    def __init__(self, p=0.5):
        super(GaussianDropout, self).__init__()
        alpha = p/(1-p)
        self.alpha = torch.Tensor([alpha])
        
    def forward(self, x):
#         Sample noise   e ~ N(1, alpha)
        epsilon = Variable(torch.randn(x.size()) * self.alpha + 1)
        if use_gpu:
            epsilon = epsilon.cuda()
        return x * epsilon
        
'''
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 1024)
        self.fc2 = nn.Linear(1024, 10)

    def forward(self, x):
        x = x.view(-1,28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x
'''

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        #self.do1 = nn.Dropout(p=dropout_rate_hidden) 
        #self.do2 = nn.Dropout(p=dropout_rate_input)

        #self.do1 = GaussianDropout(dropout_rate_hidden)
        #self.do2 = GaussianDropout(dropout_rate_input)
        
        self.fc1 = nn.Linear(28*28, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 10)

    def forward(self, x):
        x = x.view(-1,28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()

if use_gpu:
    net = net.cuda()


import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        if use_gpu:
            inputs = inputs.cuda()
            labels = labels.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
            niter = epoch * len(trainloader) + i
print('Finished Training')

testSet = torchvision.datasets.MNIST(
    root='../data',
    train=False,
    download=True,
    transform=torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.1307,), (0.3081,))])
)

testLoader = torch.utils.data.DataLoader(
    testSet,
    batch_size=batch_size_train,
    shuffle=False,
    num_workers=0
)

total = 0
correct = 0
print(len(testLoader))
with torch.no_grad():
    for data in testLoader:
        images, labels = data
        if use_gpu:
            images = images.cuda()
            labels = labels.cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: ' + str(
    100 * correct / total) + "%")

[1,   100] loss: 1.995
[1,   200] loss: 1.363
[1,   300] loss: 0.799
[1,   400] loss: 0.587
[1,   500] loss: 0.554
[1,   600] loss: 0.501
[1,   700] loss: 0.468
[1,   800] loss: 0.335
[1,   900] loss: 0.423
[1,  1000] loss: 0.397
[1,  1100] loss: 0.436
[1,  1200] loss: 0.346
[1,  1300] loss: 0.320
[1,  1400] loss: 0.346
[1,  1500] loss: 0.350
[1,  1600] loss: 0.372
[1,  1700] loss: 0.359
[1,  1800] loss: 0.267
[1,  1900] loss: 0.281
[1,  2000] loss: 0.279
[1,  2100] loss: 0.363
[1,  2200] loss: 0.371
[1,  2300] loss: 0.313
[1,  2400] loss: 0.305
[1,  2500] loss: 0.262
[1,  2600] loss: 0.230
[1,  2700] loss: 0.259
[1,  2800] loss: 0.299
[1,  2900] loss: 0.243
[1,  3000] loss: 0.235
[1,  3100] loss: 0.353
[1,  3200] loss: 0.314
[1,  3300] loss: 0.349
[1,  3400] loss: 0.280
[1,  3500] loss: 0.207
[1,  3600] loss: 0.168
[1,  3700] loss: 0.158
[1,  3800] loss: 0.233
[1,  3900] loss: 0.274
[1,  4000] loss: 0.191
[1,  4100] loss: 0.167
[1,  4200] loss: 0.194
[1,  4300] loss: 0.236
[1,  4400] 