# Lab 09-3. Dropout

In [1]:
import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [2]:
# params
learning_rate = 1e-3
training_epochs = 15
batch_size = 100
drop_prob = 0.5

**Data Loader**

In [3]:
mnist_train = datasets.MNIST(root='MNIST_data/',
                             train=True,
                             transform=transforms.ToTensor(),
                             download=True)

mnist_test  = datasets.MNIST(root='MNIST_data/',
                             train=True,
                             transform=transforms.ToTensor(),
                             download=True)

In [4]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

**MNIST_nn_dropout**

In [5]:
# nn Layer
linear1 = torch.nn.Linear(784, 512, bias=True)
linear2 = torch.nn.Linear(512, 512, bias=True)
linear3 = torch.nn.Linear(512, 512, bias=True)
linear4 = torch.nn.Linear(512, 512, bias=True)
linear5 = torch.nn.Linear(512,  10, bias=True)
relu    = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=drop_prob)

In [6]:
model = torch.nn.Sequential(linear1, relu, dropout,
                            linear2, relu, dropout,
                            linear3, relu, dropout,
                            linear4, relu, dropout,
                            linear5).to(device)

In [7]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
total_batch = len(data_loader)
model.train() # set the model to train mode (dropout=True)

for epoch in range(training_epochs):
    
    avg_cost = 0

    for X, Y in data_loader:
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print(f'Epoch: {epoch+1:4d}, Cost: {avg_cost:.9f}')

Epoch:    1, Cost: 0.427145123
Epoch:    2, Cost: 0.199661180
Epoch:    3, Cost: 0.166740030
Epoch:    4, Cost: 0.147552133
Epoch:    5, Cost: 0.130847901
Epoch:    6, Cost: 0.126773059
Epoch:    7, Cost: 0.118709169
Epoch:    8, Cost: 0.112484708
Epoch:    9, Cost: 0.104674287
Epoch:   10, Cost: 0.104111642
Epoch:   11, Cost: 0.096737869
Epoch:   12, Cost: 0.095933877
Epoch:   13, Cost: 0.094434753
Epoch:   14, Cost: 0.088721618
Epoch:   15, Cost: 0.088298284


**Test**

In [9]:
import warnings
warnings.filterwarnings(action='ignore')

In [10]:
with torch.no_grad():
    model.eval() # set the model to evaluation mode (dropout=False)

    X_test = mnist_test.test_data.view(-1, 28*28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print(f"Accuracy: {accuracy.item()*100:.4f}%")

Accuracy: 98.8233%
