# Dropout

## mnist_nn_dropout

In [1]:
import torch
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)
    
mnist_train = dsets.MNIST(root = "MNIST_data/", train = True, transform = torchvision.transforms.ToTensor(), download = True)

mnist_test = dsets.MNIST(root = "MNIST_data/", train = False, transform = torchvision.transforms.ToTensor(), download = True)
#DataLoader를 dateset으로 바꾸니 됨
data_loader = torch.utils.data.DataLoader(dataset = mnist_train, batch_size = 100, shuffle = True, drop_last = True)


#parameters
training_epochs = 15
batch_size = 100
lr = 0.001
drop_prob = 0.5

#MNIST data image of shape 28 * 28 = 784
linear1 = torch.nn.Linear(784, 512 ,bias= True).to(device)
linear2 = torch.nn.Linear(512, 512 ,bias= True).to(device)
linear3 = torch.nn.Linear(512, 512 ,bias= True).to(device)
linear4 = torch.nn.Linear(512, 512 ,bias= True).to(device)
linear5 = torch.nn.Linear(512, 10 ,bias= True).to(device)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p = drop_prob)

#initialization
torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)
torch.nn.init.xavier_uniform(linear4.weight)
torch.nn.init.xavier_uniform(linear5.weight)

#model
model = torch.nn.Sequential(linear1,relu,linear2,relu,linear3).to(device)

# define cost/Loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr)

#Train
total_batch = len(data_loader)
model.train() # set the model to train mode (dropout = True)
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(data_loader)
    
    for X, Y in data_loader:
        # reshape input image into [batch_size by 784]
        # Label is not one-hot encoded
        X = X.view(-1,28 * 28).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
        
    print("Epoch: ", "%04d" % (epoch+1), "cost = ", "{:.9f}".format(avg_cost))



Epoch:  0001 cost =  0.279324949
Epoch:  0002 cost =  0.091839999
Epoch:  0003 cost =  0.059177421
Epoch:  0004 cost =  0.040364165
Epoch:  0005 cost =  0.030351555
Epoch:  0006 cost =  0.025038466
Epoch:  0007 cost =  0.019285610
Epoch:  0008 cost =  0.019576581
Epoch:  0009 cost =  0.015468242
Epoch:  0010 cost =  0.013505809
Epoch:  0011 cost =  0.014784114
Epoch:  0012 cost =  0.010054612
Epoch:  0013 cost =  0.012086227
Epoch:  0014 cost =  0.007986077
Epoch:  0015 cost =  0.014596798


In [None]:
#Test the model using test sets
with torch.no_grad():
    model.eval() # set the model to evaluation mode (dropout = False)
    X_test = mnist_test.test_data.view(-1,28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction , 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print("Accuracy: ", accuracy.item())