In [None]:
from __future__ import print_function
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
class MLP(nn.Module):
    def __init__(self, node_list):
        '''
        params:
        node_list (int list): n elements where the first element is the input layers' node number,
                                the middle n-1 elements are hidden layers' node numbers,
                                and the last element is the output layers' node number
        '''
        super(MLP, self).__init__()
        self.classifier = self._make_layers(node_list)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        out = self.classifier(x)
        return out

    def _make_layers(self, node_list):
        layers = []
        in_dim = node_list[0]
        for idx in range(1, len(node_list) - 1):
            num = node_list[idx]
            layers += [AdvancedDropout(in_dim),
                        nn.Linear(in_dim, num),
                        nn.ReLU(inplace=True)]
            in_dim = num
        layers += [AdvancedDropout(in_dim),
                    nn.Linear(in_dim, node_list[-1])]
        return nn.Sequential(*layers)

In [None]:
import torch
import numpy as np
from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter
import torch.nn.functional as F

class AdvancedDropout(Module):

    def __init__(self, num, init_mu=0, init_sigma=1.2, reduction=16 ): #Change this expirement 1#
        '''
        params:
        num (int): node number
        init_mu (float): intial mu
        init_sigma (float): initial sigma
        reduction (int, power of two): reduction of dimention of hidden states h
        '''
        super(AdvancedDropout, self).__init__()
        if init_sigma <= 0:
            raise ValueError("Sigma has to be larger than 0, but got init_sigma=" + str(init_sigma))
        self.init_mu = init_mu
        self.init_sigma = init_sigma

        self.weight_h = Parameter(torch.rand([num // reduction, num]).mul(0.01))
        self.bias_h = Parameter(torch.rand([1]).mul(0.01))

        self.weight_mu = Parameter(torch.rand([1, num // reduction]).mul(0.01))
        self.bias_mu = Parameter(torch.Tensor([self.init_mu]))
        self.weight_sigma = Parameter(torch.rand([1, num // reduction]).mul(0.01))
        self.bias_sigma = Parameter(torch.Tensor([self.init_sigma]))

    def forward(self, input):
        if self.training:
            c, n = input.size()
            # parameterized prior
            h = F.linear(input, self.weight_h, self.bias_h)
            mu = F.linear(h, self.weight_mu, self.bias_mu).mean()
            sigma = F.softplus(F.linear(h, self.weight_sigma, self.bias_sigma)).mean()
            # mask
            epsilon = mu + sigma * torch.randn([c, n]).cuda()
            mask = torch.sigmoid(epsilon)

            out = input.mul(mask).div(torch.sigmoid(mu.data / torch.sqrt(1. + 3.14 / 8. * sigma.data ** 2.))) #added alpha and change function in expirement 3
        else:
            out = input

        return out

In [None]:
def toggle_dropout(net, epoch, interval=20): #Control the intervals for expiremnet 2
    is_dropout_active = (epoch // interval) % 2 == 0
    for module in net.classifier.classifier:
        if isinstance(module, AdvancedDropout):
            module.training = is_dropout_active
            print('Dropout: ' + str(is_dropout_active))

In [None]:
def main():
 #os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # use GPU 0
    nb_epoch = 300  # number of epochs
    lr = 0.1  # initial learning rate
    dropout_epochs = int(nb_epoch * 0.2)  # first 20% of epochs
    train_accuracies = []
    train_losses = []
    test_accuracies = []
    test_losses = []



    print("OPENING " + 'results_train.csv')
    results_train_file = open('results_train.csv', 'a')
    results_train_file.write('epoch,train_acc,train_loss\n')
    results_train_file.flush()

    print("OPENING " + 'results_test.csv')
    results_test_file = open('results_test.csv', 'a')
    results_test_file.write('epoch,test_acc,test_loss\n')
    results_test_file.flush()

    # Data
    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset = torchvision.datasets.CIFAR10(
        root='/data/xiejiyang/data',  # your data path
        train=True,
        download=True,
        transform=transform_train
    )
    trainloader = torch.utils.data.DataLoader(
        trainset,
        batch_size=256,
        shuffle=True,
        num_workers=4
    )

    testset = torchvision.datasets.CIFAR10(
        root='/data/xiejiyang/data',  # your data path
        train=False,
        download=True,
        transform=transform_test
    )
    testloader = torch.utils.data.DataLoader(
        testset,
        batch_size=256,
        shuffle=False,
        num_workers=4
    )

    # Model
    print('==> Building model..')
    net = models.vgg16_bn(pretrained=False)
    net = nn.Sequential(*list(net.children())[:-2])  # get vgg16 backbone (only all the conv layers)

    class model_vgg(nn.Module):
        def __init__(self, model, node_list):
            '''
            params:
            model (nn.Sequential): backbone
            node_list (int list): n elements where the first element is the input layers' node number,
                                    the middle n-1 elements are hidden layers' node numbers,
                                    and the last element is the output layers' node number
            '''
            super(model_vgg, self).__init__()
            self.features = model
            self.classifier = MLP(node_list)  # fc layers

        def forward(self, x):
            x = self.features(x)
            x = self.classifier(x)
            return x

    net = model_vgg(net, [512*1*1, 512, 10])  # construct the model

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = net.to(device)
    net.features.to(device)
    net.classifier.to(device)

    criterion = nn.CrossEntropyLoss()

    # set optimizer
    dp_params = []
    res_params = []
    for m in net.classifier.classifier:
        if isinstance(m, AdvancedDropout):
            dp_params.append(m.weight_h)
            dp_params.append(m.bias_h)
            dp_params.append(m.weight_mu)
            dp_params.append(m.bias_mu)
            dp_params.append(m.weight_sigma)
            dp_params.append(m.bias_sigma)
        elif isinstance(m, nn.Linear):
            res_params.append(m.weight)
            if hasattr(m, "bias"):
                res_params.append(m.bias)

    optimizer = optim.SGD([
        {'params': net.features.parameters(), 'lr': lr},
        {'params': res_params, 'lr': lr},
        {'params': dp_params, 'lr': 1e-4}
    ], momentum=0.9, weight_decay=5e-4)

    def train(epoch):
        # Toggle dropout layers based on the current epoch
        toggle_dropout(net, epoch)

        net.train()
        train_loss = 0
        correct = 0
        total = 0
        idx = 0

        for batch_idx, (inputs, targets) in enumerate(trainloader):
            idx = batch_idx
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = net(inputs)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.detach().cpu()
            _, predicted = torch.max(outputs, -1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).sum().item()

        train_acc = 100. * correct / total
        train_loss = train_loss / (idx + 1)
        print('Iteration %d, train_acc = %.4f, train_loss = %.4f' % (epoch, train_acc, train_loss))
        results_train_file.write('%d,%.4f,%.4f\n' % (epoch, train_acc, train_loss))
        results_train_file.flush()

        train_accuracies.append(train_acc)
        train_losses.append(train_loss)

        return train_acc, train_loss


    def test(epoch):
        net.eval()
        test_loss = 0
        correct = 0
        total = 0
        idx = 0
        for batch_idx, (inputs, targets) in enumerate(testloader):
            idx = batch_idx
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)

            loss = criterion(outputs, targets)

            test_loss += loss.detach().cpu()
            _, predicted = torch.max(outputs, -1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).sum().item()

        test_acc = 100. * correct / total
        test_loss = test_loss / (idx + 1)
        print('Iteration %d, test_acc = %.4f, test_loss = %.4f' % (epoch, test_acc, test_loss))
        results_test_file.write('%d,%.4f,%.4f\n' % (epoch, test_acc, test_loss))
        results_test_file.flush()

        test_accuracies.append(test_acc)
        test_losses.append(test_loss)

        return test_acc, test_loss

    for epoch in tqdm(range(0, nb_epoch)):
      print('\nEpoch: %d' % epoch)
      if epoch in [75, 110]:  # learning rate drop
        for param_group in optimizer.param_groups:
            param_group['lr'] /= 10
      train_acc, train_loss = train(epoch)
      test_acc, test_loss = test(epoch)
      torch.save(net.state_dict(), 'checkpoint.pth')

    # Plotting results
    plt.figure(figsize=(12, 5))

    # Plot training and test accuracy
    plt.subplot(1, 2, 1)
    plt.plot(range(nb_epoch), train_accuracies, label='Train Accuracy')
    plt.plot(range(nb_epoch), test_accuracies, label='Test Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Training and Test Accuracy')

    # Plot training and test loss
    plt.subplot(1, 2, 2)
    plt.plot(range(nb_epoch), train_losses, label='Train Loss')
    plt.plot(range(nb_epoch), test_losses, label='Test Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training and Test Loss')

    plt.tight_layout()
    plt.show()

In [None]:
main()

OPENING results_train.csv
OPENING results_test.csv
==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified
==> Building model..


  0%|          | 0/300 [00:00<?, ?it/s]


Epoch: 0
Dropout: True
Dropout: True
Iteration 0, train_acc = 35.4520, train_loss = 1.7039


  0%|          | 1/300 [00:12<1:00:41, 12.18s/it]

Iteration 0, test_acc = 44.6300, test_loss = 1.4569

Epoch: 1
Dropout: True
Dropout: True
Iteration 1, train_acc = 55.4300, train_loss = 1.2637


  1%|          | 2/300 [00:24<1:00:48, 12.24s/it]

Iteration 1, test_acc = 63.4200, test_loss = 1.0643

Epoch: 2
Dropout: True
Dropout: True
Iteration 2, train_acc = 66.9000, train_loss = 0.9670


  1%|          | 3/300 [00:36<1:00:39, 12.25s/it]

Iteration 2, test_acc = 69.0900, test_loss = 0.8823

Epoch: 3
Dropout: True
Dropout: True
Iteration 3, train_acc = 73.3340, train_loss = 0.7941


  1%|▏         | 4/300 [00:49<1:00:41, 12.30s/it]

Iteration 3, test_acc = 64.2800, test_loss = 1.1317

Epoch: 4
Dropout: True
Dropout: True
Iteration 4, train_acc = 77.0440, train_loss = 0.6922


  2%|▏         | 5/300 [01:01<1:00:39, 12.34s/it]

Iteration 4, test_acc = 72.1600, test_loss = 0.8389

Epoch: 5
Dropout: True
Dropout: True
Iteration 5, train_acc = 79.2020, train_loss = 0.6293


  2%|▏         | 6/300 [01:14<1:00:48, 12.41s/it]

Iteration 5, test_acc = 76.9700, test_loss = 0.6974

Epoch: 6
Dropout: True
Dropout: True
Iteration 6, train_acc = 81.1980, train_loss = 0.5686


  2%|▏         | 7/300 [01:26<1:00:32, 12.40s/it]

Iteration 6, test_acc = 80.6600, test_loss = 0.5975

Epoch: 7
Dropout: True
Dropout: True
Iteration 7, train_acc = 82.1840, train_loss = 0.5411


  3%|▎         | 8/300 [01:38<1:00:15, 12.38s/it]

Iteration 7, test_acc = 71.4800, test_loss = 0.9087

Epoch: 8
Dropout: True
Dropout: True
Iteration 8, train_acc = 83.3460, train_loss = 0.5035


  3%|▎         | 9/300 [01:51<1:00:15, 12.42s/it]

Iteration 8, test_acc = 77.8300, test_loss = 0.6920

Epoch: 9
Dropout: True
Dropout: True
Iteration 9, train_acc = 84.0360, train_loss = 0.4860


  3%|▎         | 10/300 [02:03<59:51, 12.38s/it] 

Iteration 9, test_acc = 78.9600, test_loss = 0.5953

Epoch: 10
Dropout: True
Dropout: True
Iteration 10, train_acc = 84.9980, train_loss = 0.4580


  4%|▎         | 11/300 [02:15<59:30, 12.35s/it]

Iteration 10, test_acc = 75.4400, test_loss = 0.7611

Epoch: 11
Dropout: True
Dropout: True
Iteration 11, train_acc = 85.5640, train_loss = 0.4402


  4%|▍         | 12/300 [02:28<59:13, 12.34s/it]

Iteration 11, test_acc = 73.7800, test_loss = 0.7995

Epoch: 12
Dropout: True
Dropout: True
Iteration 12, train_acc = 86.1640, train_loss = 0.4190


  4%|▍         | 13/300 [02:40<58:56, 12.32s/it]

Iteration 12, test_acc = 79.1700, test_loss = 0.6669

Epoch: 13
Dropout: True
Dropout: True
Iteration 13, train_acc = 86.8060, train_loss = 0.4009


  5%|▍         | 14/300 [02:52<58:39, 12.31s/it]

Iteration 13, test_acc = 80.8200, test_loss = 0.5943

Epoch: 14
Dropout: True
Dropout: True
Iteration 14, train_acc = 86.9140, train_loss = 0.3998


  5%|▌         | 15/300 [03:05<58:28, 12.31s/it]

Iteration 14, test_acc = 79.4800, test_loss = 0.6040

Epoch: 15
Dropout: True
Dropout: True
Iteration 15, train_acc = 87.3480, train_loss = 0.3873


  5%|▌         | 16/300 [03:17<58:20, 12.33s/it]

Iteration 15, test_acc = 80.4500, test_loss = 0.6164

Epoch: 16
Dropout: True
Dropout: True
Iteration 16, train_acc = 87.8960, train_loss = 0.3729


  6%|▌         | 17/300 [03:29<58:15, 12.35s/it]

Iteration 16, test_acc = 79.7500, test_loss = 0.6386

Epoch: 17
Dropout: True
Dropout: True
Iteration 17, train_acc = 87.7020, train_loss = 0.3765


  6%|▌         | 18/300 [03:42<58:02, 12.35s/it]

Iteration 17, test_acc = 79.0300, test_loss = 0.6992

Epoch: 18
Dropout: True
Dropout: True
Iteration 18, train_acc = 88.1460, train_loss = 0.3592


  6%|▋         | 19/300 [03:54<57:50, 12.35s/it]

Iteration 18, test_acc = 82.7700, test_loss = 0.5220

Epoch: 19
Dropout: True
Dropout: True
Iteration 19, train_acc = 88.0980, train_loss = 0.3637


  7%|▋         | 20/300 [04:06<57:45, 12.38s/it]

Iteration 19, test_acc = 84.2200, test_loss = 0.4691

Epoch: 20
Dropout: False
Dropout: False
Iteration 20, train_acc = 88.6380, train_loss = 0.3513


  7%|▋         | 21/300 [04:19<57:43, 12.41s/it]

Iteration 20, test_acc = 73.9000, test_loss = 0.9649

Epoch: 21
Dropout: False
Dropout: False
Iteration 21, train_acc = 88.4460, train_loss = 0.3537


  7%|▋         | 22/300 [04:32<57:54, 12.50s/it]

Iteration 21, test_acc = 80.3700, test_loss = 0.5925

Epoch: 22
Dropout: False
Dropout: False
Iteration 22, train_acc = 88.4680, train_loss = 0.3492


  8%|▊         | 23/300 [04:45<58:20, 12.64s/it]

Iteration 22, test_acc = 85.9100, test_loss = 0.4386

Epoch: 23
Dropout: False
Dropout: False
Iteration 23, train_acc = 89.1600, train_loss = 0.3331


  8%|▊         | 24/300 [04:57<57:46, 12.56s/it]

Iteration 23, test_acc = 84.4400, test_loss = 0.4738

Epoch: 24
Dropout: False
Dropout: False
Iteration 24, train_acc = 88.6540, train_loss = 0.3438


  8%|▊         | 25/300 [05:09<57:14, 12.49s/it]

Iteration 24, test_acc = 83.4300, test_loss = 0.5066

Epoch: 25
Dropout: False
Dropout: False
Iteration 25, train_acc = 89.0620, train_loss = 0.3343


  9%|▊         | 26/300 [05:22<56:50, 12.45s/it]

Iteration 25, test_acc = 85.6400, test_loss = 0.4362

Epoch: 26
Dropout: False
Dropout: False
Iteration 26, train_acc = 89.2980, train_loss = 0.3306


  9%|▉         | 27/300 [05:34<56:29, 12.41s/it]

Iteration 26, test_acc = 84.1800, test_loss = 0.5065

Epoch: 27
Dropout: False
Dropout: False
Iteration 27, train_acc = 89.6160, train_loss = 0.3198


  9%|▉         | 28/300 [05:46<56:06, 12.38s/it]

Iteration 27, test_acc = 74.7900, test_loss = 0.8516

Epoch: 28
Dropout: False
Dropout: False
Iteration 28, train_acc = 89.3800, train_loss = 0.3272


 10%|▉         | 29/300 [05:59<55:42, 12.34s/it]

Iteration 28, test_acc = 83.5100, test_loss = 0.5284

Epoch: 29
Dropout: False
Dropout: False
Iteration 29, train_acc = 89.5600, train_loss = 0.3144


 10%|█         | 30/300 [06:11<55:28, 12.33s/it]

Iteration 29, test_acc = 85.9100, test_loss = 0.4497

Epoch: 30
Dropout: False
Dropout: False
Iteration 30, train_acc = 89.4980, train_loss = 0.3158


 10%|█         | 31/300 [06:23<55:18, 12.34s/it]

Iteration 30, test_acc = 85.2900, test_loss = 0.4647

Epoch: 31
Dropout: False
Dropout: False
Iteration 31, train_acc = 90.2140, train_loss = 0.3030


 11%|█         | 32/300 [06:36<55:09, 12.35s/it]

Iteration 31, test_acc = 83.7400, test_loss = 0.5277

Epoch: 32
Dropout: False
Dropout: False
Iteration 32, train_acc = 89.9360, train_loss = 0.3117


 11%|█         | 33/300 [06:48<54:51, 12.33s/it]

Iteration 32, test_acc = 78.8700, test_loss = 0.6634

Epoch: 33
Dropout: False
Dropout: False
Iteration 33, train_acc = 89.7340, train_loss = 0.3116


 11%|█▏        | 34/300 [07:00<54:42, 12.34s/it]

Iteration 33, test_acc = 84.8100, test_loss = 0.4688

Epoch: 34
Dropout: False
Dropout: False
Iteration 34, train_acc = 90.0080, train_loss = 0.3050


 12%|█▏        | 35/300 [07:13<54:31, 12.34s/it]

Iteration 34, test_acc = 81.7200, test_loss = 0.5804

Epoch: 35
Dropout: False
Dropout: False
Iteration 35, train_acc = 89.9120, train_loss = 0.3025


 12%|█▏        | 36/300 [07:25<54:29, 12.39s/it]

Iteration 35, test_acc = 85.8800, test_loss = 0.4560

Epoch: 36
Dropout: False
Dropout: False
Iteration 36, train_acc = 89.9000, train_loss = 0.3060


 12%|█▏        | 37/300 [07:37<54:16, 12.38s/it]

Iteration 36, test_acc = 84.5300, test_loss = 0.4877

Epoch: 37
Dropout: False
Dropout: False
Iteration 37, train_acc = 90.1140, train_loss = 0.3050


 13%|█▎        | 38/300 [07:50<53:55, 12.35s/it]

Iteration 37, test_acc = 81.0500, test_loss = 0.6429

Epoch: 38
Dropout: False
Dropout: False
Iteration 38, train_acc = 90.3140, train_loss = 0.2957


 13%|█▎        | 39/300 [08:02<53:45, 12.36s/it]

Iteration 38, test_acc = 85.4100, test_loss = 0.4593

Epoch: 39
Dropout: False
Dropout: False
Iteration 39, train_acc = 90.5160, train_loss = 0.2892


 13%|█▎        | 40/300 [08:14<53:33, 12.36s/it]

Iteration 39, test_acc = 84.1700, test_loss = 0.5205

Epoch: 40
Dropout: True
Dropout: True
Iteration 40, train_acc = 90.7500, train_loss = 0.2880


 14%|█▎        | 41/300 [08:27<53:29, 12.39s/it]

Iteration 40, test_acc = 85.4800, test_loss = 0.4516

Epoch: 41
Dropout: True
Dropout: True
Iteration 41, train_acc = 90.7680, train_loss = 0.2842


 14%|█▍        | 42/300 [08:39<53:11, 12.37s/it]

Iteration 41, test_acc = 84.1500, test_loss = 0.5148

Epoch: 42
Dropout: True
Dropout: True
Iteration 42, train_acc = 90.2480, train_loss = 0.2929


 14%|█▍        | 43/300 [08:52<52:53, 12.35s/it]

Iteration 42, test_acc = 80.2400, test_loss = 0.6696

Epoch: 43
Dropout: True
Dropout: True
Iteration 43, train_acc = 90.8740, train_loss = 0.2826


 15%|█▍        | 44/300 [09:04<52:39, 12.34s/it]

Iteration 43, test_acc = 85.9400, test_loss = 0.4459

Epoch: 44
Dropout: True
Dropout: True
Iteration 44, train_acc = 90.4360, train_loss = 0.2870


 15%|█▌        | 45/300 [09:16<52:26, 12.34s/it]

Iteration 44, test_acc = 85.2400, test_loss = 0.4743

Epoch: 45
Dropout: True
Dropout: True
Iteration 45, train_acc = 90.8640, train_loss = 0.2797


 15%|█▌        | 46/300 [09:29<52:15, 12.35s/it]

Iteration 45, test_acc = 75.4200, test_loss = 0.8539

Epoch: 46
Dropout: True
Dropout: True
Iteration 46, train_acc = 90.8240, train_loss = 0.2817


 16%|█▌        | 47/300 [09:41<52:03, 12.35s/it]

Iteration 46, test_acc = 83.8800, test_loss = 0.5038

Epoch: 47
Dropout: True
Dropout: True
Iteration 47, train_acc = 90.7800, train_loss = 0.2832


 16%|█▌        | 48/300 [09:53<51:44, 12.32s/it]

Iteration 47, test_acc = 82.1100, test_loss = 0.5411

Epoch: 48
Dropout: True
Dropout: True
Iteration 48, train_acc = 90.6180, train_loss = 0.2859


 16%|█▋        | 49/300 [10:05<51:31, 12.32s/it]

Iteration 48, test_acc = 85.1000, test_loss = 0.4544

Epoch: 49
Dropout: True
Dropout: True
Iteration 49, train_acc = 90.8240, train_loss = 0.2781


 17%|█▋        | 50/300 [10:18<51:14, 12.30s/it]

Iteration 49, test_acc = 84.1300, test_loss = 0.5146

Epoch: 50
Dropout: True
Dropout: True
Iteration 50, train_acc = 90.9780, train_loss = 0.2740


 17%|█▋        | 51/300 [10:30<51:01, 12.30s/it]

Iteration 50, test_acc = 86.2400, test_loss = 0.4315

Epoch: 51
Dropout: True
Dropout: True
Iteration 51, train_acc = 90.9820, train_loss = 0.2771


 17%|█▋        | 52/300 [10:42<50:48, 12.29s/it]

Iteration 51, test_acc = 83.3900, test_loss = 0.5289

Epoch: 52
Dropout: True
Dropout: True
Iteration 52, train_acc = 90.9460, train_loss = 0.2789


 18%|█▊        | 53/300 [10:55<50:34, 12.28s/it]

Iteration 52, test_acc = 80.4600, test_loss = 0.7409

Epoch: 53
Dropout: True
Dropout: True
Iteration 53, train_acc = 91.0200, train_loss = 0.2756


 18%|█▊        | 54/300 [11:07<50:24, 12.29s/it]

Iteration 53, test_acc = 84.7900, test_loss = 0.4632

Epoch: 54
Dropout: True
Dropout: True
Iteration 54, train_acc = 91.1600, train_loss = 0.2697


 18%|█▊        | 55/300 [11:19<50:06, 12.27s/it]

Iteration 54, test_acc = 86.1700, test_loss = 0.4399

Epoch: 55
Dropout: True
Dropout: True
Iteration 55, train_acc = 91.1720, train_loss = 0.2707


 19%|█▊        | 56/300 [11:31<49:54, 12.27s/it]

Iteration 55, test_acc = 82.2700, test_loss = 0.5882

Epoch: 56
Dropout: True
Dropout: True
Iteration 56, train_acc = 91.0680, train_loss = 0.2691


 19%|█▉        | 57/300 [11:44<49:44, 12.28s/it]

Iteration 56, test_acc = 85.1200, test_loss = 0.4650

Epoch: 57
Dropout: True
Dropout: True
Iteration 57, train_acc = 90.7760, train_loss = 0.2811


 19%|█▉        | 58/300 [11:56<49:43, 12.33s/it]

Iteration 57, test_acc = 85.2300, test_loss = 0.4626

Epoch: 58
Dropout: True
Dropout: True
Iteration 58, train_acc = 91.2300, train_loss = 0.2652


 20%|█▉        | 59/300 [12:08<49:33, 12.34s/it]

Iteration 58, test_acc = 85.0600, test_loss = 0.4857

Epoch: 59
Dropout: True
Dropout: True
Iteration 59, train_acc = 90.9960, train_loss = 0.2741


 20%|██        | 60/300 [12:21<49:20, 12.34s/it]

Iteration 59, test_acc = 83.6600, test_loss = 0.5411

Epoch: 60
Dropout: False
Dropout: False
Iteration 60, train_acc = 91.1680, train_loss = 0.2686


 20%|██        | 61/300 [12:33<49:07, 12.33s/it]

Iteration 60, test_acc = 83.6000, test_loss = 0.5427

Epoch: 61
Dropout: False
Dropout: False
Iteration 61, train_acc = 91.1700, train_loss = 0.2676


 21%|██        | 62/300 [12:46<48:58, 12.35s/it]

Iteration 61, test_acc = 82.8900, test_loss = 0.5701

Epoch: 62
Dropout: False
Dropout: False
Iteration 62, train_acc = 91.2440, train_loss = 0.2660


 21%|██        | 63/300 [12:58<48:43, 12.33s/it]

Iteration 62, test_acc = 80.9900, test_loss = 0.6586

Epoch: 63
Dropout: False
Dropout: False
Iteration 63, train_acc = 91.2780, train_loss = 0.2672


 21%|██▏       | 64/300 [13:10<48:26, 12.32s/it]

Iteration 63, test_acc = 84.6300, test_loss = 0.4946

Epoch: 64
Dropout: False
Dropout: False
Iteration 64, train_acc = 91.1960, train_loss = 0.2693


 22%|██▏       | 65/300 [13:22<48:16, 12.33s/it]

Iteration 64, test_acc = 82.3100, test_loss = 0.6170

Epoch: 65
Dropout: False
Dropout: False
Iteration 65, train_acc = 91.2300, train_loss = 0.2699


 22%|██▏       | 66/300 [13:35<48:03, 12.32s/it]

Iteration 65, test_acc = 84.5500, test_loss = 0.5187

Epoch: 66
Dropout: False
Dropout: False
Iteration 66, train_acc = 91.2640, train_loss = 0.2656


 22%|██▏       | 67/300 [13:47<47:46, 12.30s/it]

Iteration 66, test_acc = 83.2200, test_loss = 0.5562

Epoch: 67
Dropout: False
Dropout: False
Iteration 67, train_acc = 91.4460, train_loss = 0.2611


 23%|██▎       | 68/300 [13:59<47:30, 12.29s/it]

Iteration 67, test_acc = 82.4900, test_loss = 0.5525

Epoch: 68
Dropout: False
Dropout: False
Iteration 68, train_acc = 91.1800, train_loss = 0.2638


 23%|██▎       | 69/300 [14:12<47:18, 12.29s/it]

Iteration 68, test_acc = 79.3100, test_loss = 0.6857

Epoch: 69
Dropout: False
Dropout: False
Iteration 69, train_acc = 91.2960, train_loss = 0.2648


 23%|██▎       | 70/300 [14:24<47:03, 12.27s/it]

Iteration 69, test_acc = 82.9200, test_loss = 0.5373

Epoch: 70
Dropout: False
Dropout: False
Iteration 70, train_acc = 91.5440, train_loss = 0.2578


 24%|██▎       | 71/300 [14:36<47:00, 12.32s/it]

Iteration 70, test_acc = 84.1600, test_loss = 0.5707

Epoch: 71
Dropout: False
Dropout: False
Iteration 71, train_acc = 91.6040, train_loss = 0.2585


 24%|██▍       | 72/300 [14:49<46:47, 12.31s/it]

Iteration 71, test_acc = 87.4600, test_loss = 0.3870

Epoch: 72
Dropout: False
Dropout: False
Iteration 72, train_acc = 91.3240, train_loss = 0.2651


 24%|██▍       | 73/300 [15:01<46:30, 12.29s/it]

Iteration 72, test_acc = 86.2100, test_loss = 0.4235

Epoch: 73
Dropout: False
Dropout: False
Iteration 73, train_acc = 91.5980, train_loss = 0.2586


 25%|██▍       | 74/300 [15:13<46:21, 12.31s/it]

Iteration 73, test_acc = 83.6700, test_loss = 0.5137

Epoch: 74
Dropout: False
Dropout: False
Iteration 74, train_acc = 91.3020, train_loss = 0.2656


 25%|██▌       | 75/300 [15:26<46:17, 12.34s/it]

Iteration 74, test_acc = 86.5300, test_loss = 0.4297

Epoch: 75
Dropout: False
Dropout: False
Iteration 75, train_acc = 95.5680, train_loss = 0.1356


 25%|██▌       | 76/300 [15:38<46:15, 12.39s/it]

Iteration 75, test_acc = 92.1100, test_loss = 0.2414

Epoch: 76
Dropout: False
Dropout: False
Iteration 76, train_acc = 97.0300, train_loss = 0.0914


 26%|██▌       | 77/300 [15:50<45:53, 12.35s/it]

Iteration 76, test_acc = 92.4400, test_loss = 0.2409

Epoch: 77
Dropout: False
Dropout: False
Iteration 77, train_acc = 97.5540, train_loss = 0.0745


 26%|██▌       | 78/300 [16:03<45:39, 12.34s/it]

Iteration 77, test_acc = 92.9500, test_loss = 0.2436

Epoch: 78
Dropout: False
Dropout: False
Iteration 78, train_acc = 97.9840, train_loss = 0.0627


 26%|██▋       | 79/300 [16:15<45:22, 12.32s/it]

Iteration 78, test_acc = 92.9100, test_loss = 0.2481

Epoch: 79
Dropout: False
Dropout: False
Iteration 79, train_acc = 98.2140, train_loss = 0.0563


 27%|██▋       | 80/300 [16:27<45:08, 12.31s/it]

Iteration 79, test_acc = 93.0300, test_loss = 0.2502

Epoch: 80
Dropout: True
Dropout: True
Iteration 80, train_acc = 98.2960, train_loss = 0.0516


 27%|██▋       | 81/300 [16:39<44:55, 12.31s/it]

Iteration 80, test_acc = 93.1400, test_loss = 0.2405

Epoch: 81
Dropout: True
Dropout: True
Iteration 81, train_acc = 98.5380, train_loss = 0.0453


 27%|██▋       | 82/300 [16:52<44:44, 12.31s/it]

Iteration 81, test_acc = 93.1300, test_loss = 0.2547

Epoch: 82
Dropout: True
Dropout: True
Iteration 82, train_acc = 98.6840, train_loss = 0.0405


 28%|██▊       | 83/300 [17:04<44:27, 12.29s/it]

Iteration 82, test_acc = 93.2100, test_loss = 0.2523

Epoch: 83
Dropout: True
Dropout: True
Iteration 83, train_acc = 98.7820, train_loss = 0.0376


 28%|██▊       | 84/300 [17:16<44:17, 12.31s/it]

Iteration 83, test_acc = 93.3000, test_loss = 0.2586

Epoch: 84
Dropout: True
Dropout: True
Iteration 84, train_acc = 98.8140, train_loss = 0.0352


 28%|██▊       | 85/300 [17:29<44:02, 12.29s/it]

Iteration 84, test_acc = 93.4600, test_loss = 0.2562

Epoch: 85
Dropout: True
Dropout: True
Iteration 85, train_acc = 98.9920, train_loss = 0.0316


 29%|██▊       | 86/300 [17:41<43:54, 12.31s/it]

Iteration 85, test_acc = 93.1600, test_loss = 0.2690

Epoch: 86
Dropout: True
Dropout: True
Iteration 86, train_acc = 99.0380, train_loss = 0.0307


 29%|██▉       | 87/300 [17:53<43:37, 12.29s/it]

Iteration 86, test_acc = 93.2900, test_loss = 0.2734

Epoch: 87
Dropout: True
Dropout: True
Iteration 87, train_acc = 99.1200, train_loss = 0.0267


 29%|██▉       | 88/300 [18:06<43:28, 12.30s/it]

Iteration 87, test_acc = 93.1500, test_loss = 0.2783

Epoch: 88
Dropout: True
Dropout: True
Iteration 88, train_acc = 99.2080, train_loss = 0.0240


 30%|██▉       | 89/300 [18:18<43:18, 12.31s/it]

Iteration 88, test_acc = 93.3300, test_loss = 0.2820

Epoch: 89
Dropout: True
Dropout: True
Iteration 89, train_acc = 99.2480, train_loss = 0.0229


 30%|███       | 90/300 [18:30<43:08, 12.33s/it]

Iteration 89, test_acc = 92.9600, test_loss = 0.3014

Epoch: 90
Dropout: True
Dropout: True
Iteration 90, train_acc = 99.2860, train_loss = 0.0228


 30%|███       | 91/300 [18:43<42:55, 12.32s/it]

Iteration 90, test_acc = 92.7600, test_loss = 0.3030

Epoch: 91
Dropout: True
Dropout: True
Iteration 91, train_acc = 99.2720, train_loss = 0.0223


 31%|███       | 92/300 [18:55<42:45, 12.33s/it]

Iteration 91, test_acc = 92.9600, test_loss = 0.2912

Epoch: 92
Dropout: True
Dropout: True
Iteration 92, train_acc = 99.3660, train_loss = 0.0208


 31%|███       | 93/300 [19:07<42:31, 12.33s/it]

Iteration 92, test_acc = 92.9900, test_loss = 0.3065

Epoch: 93
Dropout: True
Dropout: True
Iteration 93, train_acc = 99.3080, train_loss = 0.0213


 31%|███▏      | 94/300 [19:20<42:18, 12.32s/it]

Iteration 93, test_acc = 92.6500, test_loss = 0.3164

Epoch: 94
Dropout: True
Dropout: True
Iteration 94, train_acc = 99.2680, train_loss = 0.0215


 32%|███▏      | 95/300 [19:32<42:00, 12.30s/it]

Iteration 94, test_acc = 93.0500, test_loss = 0.3031

Epoch: 95
Dropout: True
Dropout: True
Iteration 95, train_acc = 99.3700, train_loss = 0.0196


 32%|███▏      | 96/300 [19:44<41:48, 12.30s/it]

Iteration 95, test_acc = 93.1700, test_loss = 0.2989

Epoch: 96
Dropout: True
Dropout: True
Iteration 96, train_acc = 99.4600, train_loss = 0.0167


 32%|███▏      | 97/300 [19:56<41:38, 12.31s/it]

Iteration 96, test_acc = 93.0100, test_loss = 0.3078

Epoch: 97
Dropout: True
Dropout: True
Iteration 97, train_acc = 99.3660, train_loss = 0.0200


 33%|███▎      | 98/300 [20:09<41:26, 12.31s/it]

Iteration 97, test_acc = 92.6100, test_loss = 0.3182

Epoch: 98
Dropout: True
Dropout: True
Iteration 98, train_acc = 99.3560, train_loss = 0.0192


 33%|███▎      | 99/300 [20:21<41:10, 12.29s/it]

Iteration 98, test_acc = 92.8000, test_loss = 0.3055

Epoch: 99
Dropout: True
Dropout: True
Iteration 99, train_acc = 99.4140, train_loss = 0.0177


 33%|███▎      | 100/300 [20:33<40:53, 12.27s/it]

Iteration 99, test_acc = 92.7400, test_loss = 0.3094

Epoch: 100
Dropout: False
Dropout: False
Iteration 100, train_acc = 99.3340, train_loss = 0.0200


 34%|███▎      | 101/300 [20:46<40:45, 12.29s/it]

Iteration 100, test_acc = 92.9600, test_loss = 0.3063

Epoch: 101
Dropout: False
Dropout: False
Iteration 101, train_acc = 99.3360, train_loss = 0.0188


 34%|███▍      | 102/300 [20:58<40:36, 12.31s/it]

Iteration 101, test_acc = 92.5000, test_loss = 0.3451

Epoch: 102
Dropout: False
Dropout: False
Iteration 102, train_acc = 99.4400, train_loss = 0.0165


 34%|███▍      | 103/300 [21:10<40:27, 12.32s/it]

Iteration 102, test_acc = 92.9100, test_loss = 0.3136

Epoch: 103
Dropout: False
Dropout: False
Iteration 103, train_acc = 99.3960, train_loss = 0.0176


 35%|███▍      | 104/300 [21:22<40:10, 12.30s/it]

Iteration 103, test_acc = 92.9200, test_loss = 0.3168

Epoch: 104
Dropout: False
Dropout: False
Iteration 104, train_acc = 99.3920, train_loss = 0.0174


 35%|███▌      | 105/300 [21:35<39:54, 12.28s/it]

Iteration 104, test_acc = 92.9300, test_loss = 0.3134

Epoch: 105
Dropout: False
Dropout: False
Iteration 105, train_acc = 99.4060, train_loss = 0.0180


 35%|███▌      | 106/300 [21:47<39:43, 12.29s/it]

Iteration 105, test_acc = 93.0000, test_loss = 0.3034

Epoch: 106
Dropout: False
Dropout: False
Iteration 106, train_acc = 99.3980, train_loss = 0.0188


 36%|███▌      | 107/300 [21:59<39:26, 12.26s/it]

Iteration 106, test_acc = 92.2000, test_loss = 0.3384

Epoch: 107
Dropout: False
Dropout: False
Iteration 107, train_acc = 99.3860, train_loss = 0.0175


 36%|███▌      | 108/300 [22:11<39:14, 12.26s/it]

Iteration 107, test_acc = 92.9100, test_loss = 0.3189

Epoch: 108
Dropout: False
Dropout: False
Iteration 108, train_acc = 99.4160, train_loss = 0.0176


 36%|███▋      | 109/300 [22:24<38:58, 12.24s/it]

Iteration 108, test_acc = 92.6800, test_loss = 0.3280

Epoch: 109
Dropout: False
Dropout: False
Iteration 109, train_acc = 99.3500, train_loss = 0.0201


 37%|███▋      | 110/300 [22:36<38:47, 12.25s/it]

Iteration 109, test_acc = 92.5300, test_loss = 0.3327

Epoch: 110
Dropout: False
Dropout: False
Iteration 110, train_acc = 99.6000, train_loss = 0.0131


 37%|███▋      | 111/300 [22:48<38:40, 12.28s/it]

Iteration 110, test_acc = 93.1900, test_loss = 0.2944

Epoch: 111
Dropout: False
Dropout: False
Iteration 111, train_acc = 99.7360, train_loss = 0.0089


 37%|███▋      | 112/300 [23:01<38:26, 12.27s/it]

Iteration 111, test_acc = 93.3400, test_loss = 0.2922

Epoch: 112
Dropout: False
Dropout: False
Iteration 112, train_acc = 99.7980, train_loss = 0.0075


 38%|███▊      | 113/300 [23:13<38:17, 12.28s/it]

Iteration 112, test_acc = 93.3500, test_loss = 0.2911

Epoch: 113
Dropout: False
Dropout: False
Iteration 113, train_acc = 99.8080, train_loss = 0.0071


 38%|███▊      | 114/300 [23:25<38:04, 12.28s/it]

Iteration 113, test_acc = 93.3300, test_loss = 0.2917

Epoch: 114
Dropout: False
Dropout: False
Iteration 114, train_acc = 99.8340, train_loss = 0.0059


 38%|███▊      | 115/300 [23:37<37:52, 12.28s/it]

Iteration 114, test_acc = 93.3700, test_loss = 0.2912

Epoch: 115
Dropout: False
Dropout: False
Iteration 115, train_acc = 99.8220, train_loss = 0.0065


 39%|███▊      | 116/300 [23:50<37:43, 12.30s/it]

Iteration 115, test_acc = 93.5100, test_loss = 0.2908

Epoch: 116
Dropout: False
Dropout: False
Iteration 116, train_acc = 99.8580, train_loss = 0.0053


 39%|███▉      | 117/300 [24:02<37:31, 12.30s/it]

Iteration 116, test_acc = 93.4600, test_loss = 0.2908

Epoch: 117
Dropout: False
Dropout: False
Iteration 117, train_acc = 99.8560, train_loss = 0.0051


 39%|███▉      | 118/300 [24:14<37:15, 12.28s/it]

Iteration 117, test_acc = 93.4400, test_loss = 0.2884

Epoch: 118
Dropout: False
Dropout: False
Iteration 118, train_acc = 99.8360, train_loss = 0.0058


 40%|███▉      | 119/300 [24:27<37:01, 12.27s/it]

Iteration 118, test_acc = 93.5400, test_loss = 0.2881

Epoch: 119
Dropout: False
Dropout: False
Iteration 119, train_acc = 99.8800, train_loss = 0.0046


 40%|████      | 120/300 [24:39<36:54, 12.30s/it]

Iteration 119, test_acc = 93.6500, test_loss = 0.2896

Epoch: 120
Dropout: True
Dropout: True
Iteration 120, train_acc = 99.8900, train_loss = 0.0043


 40%|████      | 121/300 [24:51<36:43, 12.31s/it]

Iteration 120, test_acc = 93.5100, test_loss = 0.2902

Epoch: 121
Dropout: True
Dropout: True
Iteration 121, train_acc = 99.8860, train_loss = 0.0046


 41%|████      | 122/300 [25:04<36:30, 12.30s/it]

Iteration 121, test_acc = 93.6900, test_loss = 0.2936

Epoch: 122
Dropout: True
Dropout: True
Iteration 122, train_acc = 99.8920, train_loss = 0.0043


 41%|████      | 123/300 [25:16<36:13, 12.28s/it]

Iteration 122, test_acc = 93.6700, test_loss = 0.2916

Epoch: 123
Dropout: True
Dropout: True
Iteration 123, train_acc = 99.9140, train_loss = 0.0040


 41%|████▏     | 124/300 [25:28<36:02, 12.29s/it]

Iteration 123, test_acc = 93.5500, test_loss = 0.2938

Epoch: 124
Dropout: True
Dropout: True
Iteration 124, train_acc = 99.9120, train_loss = 0.0038


 42%|████▏     | 125/300 [25:40<35:49, 12.28s/it]

Iteration 124, test_acc = 93.6200, test_loss = 0.2916

Epoch: 125
Dropout: True
Dropout: True
Iteration 125, train_acc = 99.9100, train_loss = 0.0037


 42%|████▏     | 126/300 [25:53<35:34, 12.27s/it]

Iteration 125, test_acc = 93.6200, test_loss = 0.2916

Epoch: 126
Dropout: True
Dropout: True
Iteration 126, train_acc = 99.9180, train_loss = 0.0034


 42%|████▏     | 127/300 [26:05<35:23, 12.27s/it]

Iteration 126, test_acc = 93.5800, test_loss = 0.2920

Epoch: 127
Dropout: True
Dropout: True
Iteration 127, train_acc = 99.9340, train_loss = 0.0032


 43%|████▎     | 128/300 [26:17<35:09, 12.26s/it]

Iteration 127, test_acc = 93.6200, test_loss = 0.2946

Epoch: 128
Dropout: True
Dropout: True
Iteration 128, train_acc = 99.9080, train_loss = 0.0037


 43%|████▎     | 129/300 [26:29<35:00, 12.29s/it]

Iteration 128, test_acc = 93.5700, test_loss = 0.2949

Epoch: 129
Dropout: True
Dropout: True
Iteration 129, train_acc = 99.9200, train_loss = 0.0028


 43%|████▎     | 130/300 [26:42<34:52, 12.31s/it]

Iteration 129, test_acc = 93.5200, test_loss = 0.2968

Epoch: 130
Dropout: True
Dropout: True
Iteration 130, train_acc = 99.9280, train_loss = 0.0034


 44%|████▎     | 131/300 [26:54<34:40, 12.31s/it]

Iteration 130, test_acc = 93.6700, test_loss = 0.2954

Epoch: 131
Dropout: True
Dropout: True
Iteration 131, train_acc = 99.9380, train_loss = 0.0029


 44%|████▍     | 132/300 [27:06<34:27, 12.30s/it]

Iteration 131, test_acc = 93.6200, test_loss = 0.2987

Epoch: 132
Dropout: True
Dropout: True
Iteration 132, train_acc = 99.9320, train_loss = 0.0030


 44%|████▍     | 133/300 [27:19<34:12, 12.29s/it]

Iteration 132, test_acc = 93.6200, test_loss = 0.2992

Epoch: 133
Dropout: True
Dropout: True
Iteration 133, train_acc = 99.9220, train_loss = 0.0034


 45%|████▍     | 134/300 [27:31<33:58, 12.28s/it]

Iteration 133, test_acc = 93.6100, test_loss = 0.3000

Epoch: 134
Dropout: True
Dropout: True
Iteration 134, train_acc = 99.9380, train_loss = 0.0029


 45%|████▌     | 135/300 [27:43<33:46, 12.28s/it]

Iteration 134, test_acc = 93.6300, test_loss = 0.2986

Epoch: 135
Dropout: True
Dropout: True
Iteration 135, train_acc = 99.9400, train_loss = 0.0028


 45%|████▌     | 136/300 [27:55<33:34, 12.28s/it]

Iteration 135, test_acc = 93.6800, test_loss = 0.2968

Epoch: 136
Dropout: True
Dropout: True
Iteration 136, train_acc = 99.9440, train_loss = 0.0025


 46%|████▌     | 137/300 [28:08<33:20, 12.27s/it]

Iteration 136, test_acc = 93.6100, test_loss = 0.2993

Epoch: 137
Dropout: True
Dropout: True
Iteration 137, train_acc = 99.9440, train_loss = 0.0028


 46%|████▌     | 138/300 [28:20<33:06, 12.26s/it]

Iteration 137, test_acc = 93.6600, test_loss = 0.2974

Epoch: 138
Dropout: True
Dropout: True
Iteration 138, train_acc = 99.9360, train_loss = 0.0029


 46%|████▋     | 139/300 [28:32<32:55, 12.27s/it]

Iteration 138, test_acc = 93.5800, test_loss = 0.3008

Epoch: 139
Dropout: True
Dropout: True
Iteration 139, train_acc = 99.9440, train_loss = 0.0029


 47%|████▋     | 140/300 [28:45<32:42, 12.27s/it]

Iteration 139, test_acc = 93.6100, test_loss = 0.2991

Epoch: 140
Dropout: False
Dropout: False
Iteration 140, train_acc = 99.9200, train_loss = 0.0032


 47%|████▋     | 141/300 [28:57<32:28, 12.26s/it]

Iteration 140, test_acc = 93.6700, test_loss = 0.3042

Epoch: 141
Dropout: False
Dropout: False
Iteration 141, train_acc = 99.9360, train_loss = 0.0028


 47%|████▋     | 142/300 [29:09<32:16, 12.26s/it]

Iteration 141, test_acc = 93.6200, test_loss = 0.3007

Epoch: 142
Dropout: False
Dropout: False
Iteration 142, train_acc = 99.9400, train_loss = 0.0028


 48%|████▊     | 143/300 [29:21<32:08, 12.28s/it]

Iteration 142, test_acc = 93.5500, test_loss = 0.2996

Epoch: 143
Dropout: False
Dropout: False
Iteration 143, train_acc = 99.9420, train_loss = 0.0025


 48%|████▊     | 144/300 [29:34<31:56, 12.29s/it]

Iteration 143, test_acc = 93.6700, test_loss = 0.3035

Epoch: 144
Dropout: False
Dropout: False
Iteration 144, train_acc = 99.9480, train_loss = 0.0027


 48%|████▊     | 145/300 [29:46<31:46, 12.30s/it]

Iteration 144, test_acc = 93.6500, test_loss = 0.3077

Epoch: 145
Dropout: False
Dropout: False
Iteration 145, train_acc = 99.9460, train_loss = 0.0025


 49%|████▊     | 146/300 [29:58<31:35, 12.31s/it]

Iteration 145, test_acc = 93.6400, test_loss = 0.3032

Epoch: 146
Dropout: False
Dropout: False
Iteration 146, train_acc = 99.9580, train_loss = 0.0024


 49%|████▉     | 147/300 [30:11<31:22, 12.30s/it]

Iteration 146, test_acc = 93.6700, test_loss = 0.3031

Epoch: 147
Dropout: False
Dropout: False
Iteration 147, train_acc = 99.9620, train_loss = 0.0021


 49%|████▉     | 148/300 [30:23<31:08, 12.29s/it]

Iteration 147, test_acc = 93.7100, test_loss = 0.3039

Epoch: 148
Dropout: False
Dropout: False
Iteration 148, train_acc = 99.9580, train_loss = 0.0022


 50%|████▉     | 149/300 [30:35<30:51, 12.26s/it]

Iteration 148, test_acc = 93.7500, test_loss = 0.3030

Epoch: 149
Dropout: False
Dropout: False
Iteration 149, train_acc = 99.9680, train_loss = 0.0021


 50%|█████     | 150/300 [30:47<30:34, 12.23s/it]

Iteration 149, test_acc = 93.6700, test_loss = 0.3028

Epoch: 150
Dropout: False
Dropout: False
Iteration 150, train_acc = 99.9580, train_loss = 0.0021


 50%|█████     | 151/300 [30:59<30:17, 12.20s/it]

Iteration 150, test_acc = 93.6000, test_loss = 0.3052

Epoch: 151
Dropout: False
Dropout: False
Iteration 151, train_acc = 99.9420, train_loss = 0.0022


 51%|█████     | 152/300 [31:12<30:04, 12.19s/it]

Iteration 151, test_acc = 93.7200, test_loss = 0.3066

Epoch: 152
Dropout: False
Dropout: False
Iteration 152, train_acc = 99.9540, train_loss = 0.0023


 51%|█████     | 153/300 [31:24<29:49, 12.17s/it]

Iteration 152, test_acc = 93.6800, test_loss = 0.3030

Epoch: 153
Dropout: False
Dropout: False
Iteration 153, train_acc = 99.9820, train_loss = 0.0017


 51%|█████▏    | 154/300 [31:36<29:39, 12.19s/it]

Iteration 153, test_acc = 93.6200, test_loss = 0.3052

Epoch: 154
Dropout: False
Dropout: False
Iteration 154, train_acc = 99.9580, train_loss = 0.0020


 52%|█████▏    | 155/300 [31:48<29:27, 12.19s/it]

Iteration 154, test_acc = 93.6800, test_loss = 0.3090

Epoch: 155
Dropout: False
Dropout: False
Iteration 155, train_acc = 99.9560, train_loss = 0.0023


 52%|█████▏    | 156/300 [32:00<29:14, 12.18s/it]

Iteration 155, test_acc = 93.7800, test_loss = 0.3064

Epoch: 156
Dropout: False
Dropout: False
Iteration 156, train_acc = 99.9660, train_loss = 0.0020


 52%|█████▏    | 157/300 [32:12<29:02, 12.19s/it]

Iteration 156, test_acc = 93.7700, test_loss = 0.3043

Epoch: 157
Dropout: False
Dropout: False
Iteration 157, train_acc = 99.9660, train_loss = 0.0021


 53%|█████▎    | 158/300 [32:25<28:48, 12.18s/it]

Iteration 157, test_acc = 93.6900, test_loss = 0.3054

Epoch: 158
Dropout: False
Dropout: False
Iteration 158, train_acc = 99.9680, train_loss = 0.0019


 53%|█████▎    | 159/300 [32:37<28:38, 12.18s/it]

Iteration 158, test_acc = 93.7500, test_loss = 0.3048

Epoch: 159
Dropout: False
Dropout: False
Iteration 159, train_acc = 99.9580, train_loss = 0.0020


 53%|█████▎    | 160/300 [32:49<28:30, 12.22s/it]

Iteration 159, test_acc = 93.6400, test_loss = 0.3067

Epoch: 160
Dropout: True
Dropout: True
Iteration 160, train_acc = 99.9580, train_loss = 0.0020


 54%|█████▎    | 161/300 [33:01<28:17, 12.21s/it]

Iteration 160, test_acc = 93.6700, test_loss = 0.3079

Epoch: 161
Dropout: True
Dropout: True
Iteration 161, train_acc = 99.9640, train_loss = 0.0019


 54%|█████▍    | 162/300 [33:13<28:04, 12.21s/it]

Iteration 161, test_acc = 93.6500, test_loss = 0.3082

Epoch: 162
Dropout: True
Dropout: True
Iteration 162, train_acc = 99.9640, train_loss = 0.0019


 54%|█████▍    | 163/300 [33:26<27:52, 12.21s/it]

Iteration 162, test_acc = 93.6300, test_loss = 0.3080

Epoch: 163
Dropout: True
Dropout: True
Iteration 163, train_acc = 99.9620, train_loss = 0.0018


 55%|█████▍    | 164/300 [33:38<27:39, 12.20s/it]

Iteration 163, test_acc = 93.7000, test_loss = 0.3114

Epoch: 164
Dropout: True
Dropout: True
Iteration 164, train_acc = 99.9500, train_loss = 0.0023


 55%|█████▌    | 165/300 [33:50<27:24, 12.19s/it]

Iteration 164, test_acc = 93.6800, test_loss = 0.3078

Epoch: 165
Dropout: True
Dropout: True
Iteration 165, train_acc = 99.9560, train_loss = 0.0021


 55%|█████▌    | 166/300 [34:02<27:14, 12.20s/it]

Iteration 165, test_acc = 93.6700, test_loss = 0.3090

Epoch: 166
Dropout: True
Dropout: True
Iteration 166, train_acc = 99.9660, train_loss = 0.0019


 56%|█████▌    | 167/300 [34:14<27:01, 12.20s/it]

Iteration 166, test_acc = 93.7700, test_loss = 0.3071

Epoch: 167
Dropout: True
Dropout: True
Iteration 167, train_acc = 99.9620, train_loss = 0.0020


 56%|█████▌    | 168/300 [34:27<26:48, 12.19s/it]

Iteration 167, test_acc = 93.6700, test_loss = 0.3094

Epoch: 168
Dropout: True
Dropout: True
Iteration 168, train_acc = 99.9600, train_loss = 0.0021


 56%|█████▋    | 169/300 [34:39<26:37, 12.20s/it]

Iteration 168, test_acc = 93.6000, test_loss = 0.3134

Epoch: 169
Dropout: True
Dropout: True
Iteration 169, train_acc = 99.9680, train_loss = 0.0019


 57%|█████▋    | 170/300 [34:51<26:23, 12.18s/it]

Iteration 169, test_acc = 93.6500, test_loss = 0.3104

Epoch: 170
Dropout: True
Dropout: True
Iteration 170, train_acc = 99.9780, train_loss = 0.0019


 57%|█████▋    | 171/300 [35:03<26:11, 12.18s/it]

Iteration 170, test_acc = 93.7300, test_loss = 0.3094

Epoch: 171
Dropout: True
Dropout: True
Iteration 171, train_acc = 99.9720, train_loss = 0.0016


 57%|█████▋    | 172/300 [35:15<25:58, 12.18s/it]

Iteration 171, test_acc = 93.6600, test_loss = 0.3113

Epoch: 172
Dropout: True
Dropout: True
Iteration 172, train_acc = 99.9680, train_loss = 0.0019


 58%|█████▊    | 173/300 [35:27<25:46, 12.17s/it]

Iteration 172, test_acc = 93.6300, test_loss = 0.3089

Epoch: 173
Dropout: True
Dropout: True
Iteration 173, train_acc = 99.9560, train_loss = 0.0021


 58%|█████▊    | 174/300 [35:40<25:38, 12.21s/it]

Iteration 173, test_acc = 93.7400, test_loss = 0.3122

Epoch: 174
Dropout: True
Dropout: True
Iteration 174, train_acc = 99.9700, train_loss = 0.0019


 58%|█████▊    | 175/300 [35:52<25:27, 12.22s/it]

Iteration 174, test_acc = 93.6700, test_loss = 0.3131

Epoch: 175
Dropout: True
Dropout: True
Iteration 175, train_acc = 99.9820, train_loss = 0.0016


 59%|█████▊    | 176/300 [36:04<25:17, 12.24s/it]

Iteration 175, test_acc = 93.7500, test_loss = 0.3105

Epoch: 176
Dropout: True
Dropout: True
Iteration 176, train_acc = 99.9640, train_loss = 0.0017


 59%|█████▉    | 177/300 [36:17<25:07, 12.26s/it]

Iteration 176, test_acc = 93.7000, test_loss = 0.3134

Epoch: 177
Dropout: True
Dropout: True
Iteration 177, train_acc = 99.9680, train_loss = 0.0017


 59%|█████▉    | 178/300 [36:29<24:54, 12.25s/it]

Iteration 177, test_acc = 93.6100, test_loss = 0.3137

Epoch: 178
Dropout: True
Dropout: True
Iteration 178, train_acc = 99.9820, train_loss = 0.0015


 60%|█████▉    | 179/300 [36:41<24:40, 12.24s/it]

Iteration 178, test_acc = 93.6000, test_loss = 0.3124

Epoch: 179
Dropout: True
Dropout: True
Iteration 179, train_acc = 99.9800, train_loss = 0.0016


 60%|██████    | 180/300 [36:53<24:31, 12.26s/it]

Iteration 179, test_acc = 93.5800, test_loss = 0.3121

Epoch: 180
Dropout: False
Dropout: False
Iteration 180, train_acc = 99.9720, train_loss = 0.0016


 60%|██████    | 181/300 [37:06<24:18, 12.26s/it]

Iteration 180, test_acc = 93.6600, test_loss = 0.3114

Epoch: 181
Dropout: False
Dropout: False
Iteration 181, train_acc = 99.9500, train_loss = 0.0022


 61%|██████    | 182/300 [37:18<24:05, 12.25s/it]

Iteration 181, test_acc = 93.6700, test_loss = 0.3124

Epoch: 182
Dropout: False
Dropout: False
Iteration 182, train_acc = 99.9740, train_loss = 0.0015


 61%|██████    | 183/300 [37:30<23:48, 12.21s/it]

Iteration 182, test_acc = 93.7200, test_loss = 0.3143

Epoch: 183
Dropout: False
Dropout: False
Iteration 183, train_acc = 99.9640, train_loss = 0.0020


 61%|██████▏   | 184/300 [37:42<23:37, 12.22s/it]

Iteration 183, test_acc = 93.8000, test_loss = 0.3116

Epoch: 184
Dropout: False
Dropout: False
Iteration 184, train_acc = 99.9580, train_loss = 0.0019


 62%|██████▏   | 185/300 [37:54<23:25, 12.22s/it]

Iteration 184, test_acc = 93.6800, test_loss = 0.3132

Epoch: 185
Dropout: False
Dropout: False
Iteration 185, train_acc = 99.9680, train_loss = 0.0019


 62%|██████▏   | 186/300 [38:07<23:12, 12.21s/it]

Iteration 185, test_acc = 93.6700, test_loss = 0.3101

Epoch: 186
Dropout: False
Dropout: False
Iteration 186, train_acc = 99.9720, train_loss = 0.0017


 62%|██████▏   | 187/300 [38:19<23:00, 12.21s/it]

Iteration 186, test_acc = 93.6900, test_loss = 0.3140

Epoch: 187
Dropout: False
Dropout: False
Iteration 187, train_acc = 99.9780, train_loss = 0.0015


 63%|██████▎   | 188/300 [38:31<22:45, 12.19s/it]

Iteration 187, test_acc = 93.6500, test_loss = 0.3122

Epoch: 188
Dropout: False
Dropout: False
Iteration 188, train_acc = 99.9700, train_loss = 0.0017


 63%|██████▎   | 189/300 [38:43<22:31, 12.18s/it]

Iteration 188, test_acc = 93.6300, test_loss = 0.3156

Epoch: 189
Dropout: False
Dropout: False
Iteration 189, train_acc = 99.9740, train_loss = 0.0016


 63%|██████▎   | 190/300 [38:55<22:18, 12.17s/it]

Iteration 189, test_acc = 93.7000, test_loss = 0.3150

Epoch: 190
Dropout: False
Dropout: False
Iteration 190, train_acc = 99.9800, train_loss = 0.0016


 64%|██████▎   | 191/300 [39:07<22:07, 12.18s/it]

Iteration 190, test_acc = 93.7500, test_loss = 0.3139

Epoch: 191
Dropout: False
Dropout: False
Iteration 191, train_acc = 99.9760, train_loss = 0.0017


 64%|██████▍   | 192/300 [39:20<21:55, 12.18s/it]

Iteration 191, test_acc = 93.6700, test_loss = 0.3148

Epoch: 192
Dropout: False
Dropout: False
Iteration 192, train_acc = 99.9680, train_loss = 0.0016


 64%|██████▍   | 193/300 [39:32<21:40, 12.15s/it]

Iteration 192, test_acc = 93.6500, test_loss = 0.3158

Epoch: 193
Dropout: False
Dropout: False
Iteration 193, train_acc = 99.9740, train_loss = 0.0015


 65%|██████▍   | 194/300 [39:44<21:30, 12.17s/it]

Iteration 193, test_acc = 93.6600, test_loss = 0.3139

Epoch: 194
Dropout: False
Dropout: False
Iteration 194, train_acc = 99.9680, train_loss = 0.0016


 65%|██████▌   | 195/300 [39:56<21:20, 12.20s/it]

Iteration 194, test_acc = 93.6700, test_loss = 0.3154

Epoch: 195
Dropout: False
Dropout: False
Iteration 195, train_acc = 99.9820, train_loss = 0.0015


 65%|██████▌   | 196/300 [40:08<21:11, 12.22s/it]

Iteration 195, test_acc = 93.7100, test_loss = 0.3153

Epoch: 196
Dropout: False
Dropout: False
Iteration 196, train_acc = 99.9700, train_loss = 0.0016


 66%|██████▌   | 197/300 [40:21<21:02, 12.26s/it]

Iteration 196, test_acc = 93.7000, test_loss = 0.3167

Epoch: 197
Dropout: False
Dropout: False
Iteration 197, train_acc = 99.9760, train_loss = 0.0015


 66%|██████▌   | 198/300 [40:33<20:50, 12.26s/it]

Iteration 197, test_acc = 93.6800, test_loss = 0.3131

Epoch: 198
Dropout: False
Dropout: False
Iteration 198, train_acc = 99.9800, train_loss = 0.0013


 66%|██████▋   | 199/300 [40:45<20:38, 12.26s/it]

Iteration 198, test_acc = 93.7500, test_loss = 0.3153

Epoch: 199
Dropout: False
Dropout: False
Iteration 199, train_acc = 99.9820, train_loss = 0.0014


 67%|██████▋   | 200/300 [40:58<20:24, 12.25s/it]

Iteration 199, test_acc = 93.7200, test_loss = 0.3139

Epoch: 200
Dropout: True
Dropout: True
Iteration 200, train_acc = 99.9660, train_loss = 0.0017


 67%|██████▋   | 201/300 [41:10<20:11, 12.23s/it]

Iteration 200, test_acc = 93.8200, test_loss = 0.3102

Epoch: 201
Dropout: True
Dropout: True
Iteration 201, train_acc = 99.9740, train_loss = 0.0015


 67%|██████▋   | 202/300 [41:22<19:59, 12.24s/it]

Iteration 201, test_acc = 93.7700, test_loss = 0.3132

Epoch: 202
Dropout: True
Dropout: True
Iteration 202, train_acc = 99.9740, train_loss = 0.0017


 68%|██████▊   | 203/300 [41:34<19:46, 12.23s/it]

Iteration 202, test_acc = 93.7400, test_loss = 0.3148

Epoch: 203
Dropout: True
Dropout: True
Iteration 203, train_acc = 99.9740, train_loss = 0.0016


 68%|██████▊   | 204/300 [41:47<19:35, 12.25s/it]

Iteration 203, test_acc = 93.6600, test_loss = 0.3135

Epoch: 204
Dropout: True
Dropout: True
Iteration 204, train_acc = 99.9820, train_loss = 0.0013


 68%|██████▊   | 205/300 [41:59<19:22, 12.24s/it]

Iteration 204, test_acc = 93.6800, test_loss = 0.3140

Epoch: 205
Dropout: True
Dropout: True
Iteration 205, train_acc = 99.9800, train_loss = 0.0015


 69%|██████▊   | 206/300 [42:11<19:10, 12.24s/it]

Iteration 205, test_acc = 93.7900, test_loss = 0.3103

Epoch: 206
Dropout: True
Dropout: True
Iteration 206, train_acc = 99.9840, train_loss = 0.0013


 69%|██████▉   | 207/300 [42:23<18:58, 12.25s/it]

Iteration 206, test_acc = 93.7200, test_loss = 0.3120

Epoch: 207
Dropout: True
Dropout: True
Iteration 207, train_acc = 99.9840, train_loss = 0.0015


 69%|██████▉   | 208/300 [42:36<18:47, 12.26s/it]

Iteration 207, test_acc = 93.8000, test_loss = 0.3116

Epoch: 208
Dropout: True
Dropout: True
Iteration 208, train_acc = 99.9840, train_loss = 0.0013


 70%|██████▉   | 209/300 [42:48<18:33, 12.24s/it]

Iteration 208, test_acc = 93.8400, test_loss = 0.3138

Epoch: 209
Dropout: True
Dropout: True
Iteration 209, train_acc = 99.9840, train_loss = 0.0013


 70%|███████   | 210/300 [43:00<18:20, 12.23s/it]

Iteration 209, test_acc = 93.7600, test_loss = 0.3132

Epoch: 210
Dropout: True
Dropout: True
Iteration 210, train_acc = 99.9700, train_loss = 0.0015


 70%|███████   | 211/300 [43:12<18:08, 12.23s/it]

Iteration 210, test_acc = 93.7100, test_loss = 0.3163

Epoch: 211
Dropout: True
Dropout: True
Iteration 211, train_acc = 99.9780, train_loss = 0.0014


 71%|███████   | 212/300 [43:24<17:57, 12.24s/it]

Iteration 211, test_acc = 93.7700, test_loss = 0.3143

Epoch: 212
Dropout: True
Dropout: True
Iteration 212, train_acc = 99.9860, train_loss = 0.0014


 71%|███████   | 213/300 [43:37<17:48, 12.28s/it]

Iteration 212, test_acc = 93.7000, test_loss = 0.3130

Epoch: 213
Dropout: True
Dropout: True
Iteration 213, train_acc = 99.9740, train_loss = 0.0015


 71%|███████▏  | 214/300 [43:49<17:36, 12.28s/it]

Iteration 213, test_acc = 93.7300, test_loss = 0.3143

Epoch: 214
Dropout: True
Dropout: True
Iteration 214, train_acc = 99.9720, train_loss = 0.0015


 72%|███████▏  | 215/300 [44:01<17:24, 12.29s/it]

Iteration 214, test_acc = 93.6500, test_loss = 0.3172

Epoch: 215
Dropout: True
Dropout: True
Iteration 215, train_acc = 99.9880, train_loss = 0.0014


 72%|███████▏  | 216/300 [44:14<17:09, 12.26s/it]

Iteration 215, test_acc = 93.8100, test_loss = 0.3131

Epoch: 216
Dropout: True
Dropout: True
Iteration 216, train_acc = 99.9800, train_loss = 0.0013


 72%|███████▏  | 217/300 [44:26<16:58, 12.28s/it]

Iteration 216, test_acc = 93.8000, test_loss = 0.3150

Epoch: 217
Dropout: True
Dropout: True
Iteration 217, train_acc = 99.9860, train_loss = 0.0013


 73%|███████▎  | 218/300 [44:38<16:45, 12.26s/it]

Iteration 217, test_acc = 93.7800, test_loss = 0.3156

Epoch: 218
Dropout: True
Dropout: True
Iteration 218, train_acc = 99.9720, train_loss = 0.0016


 73%|███████▎  | 219/300 [44:50<16:33, 12.26s/it]

Iteration 218, test_acc = 93.8500, test_loss = 0.3154

Epoch: 219
Dropout: True
Dropout: True
Iteration 219, train_acc = 99.9820, train_loss = 0.0014


 73%|███████▎  | 220/300 [45:03<16:20, 12.25s/it]

Iteration 219, test_acc = 93.6700, test_loss = 0.3133

Epoch: 220
Dropout: False
Dropout: False
Iteration 220, train_acc = 99.9740, train_loss = 0.0016


 74%|███████▎  | 221/300 [45:15<16:06, 12.24s/it]

Iteration 220, test_acc = 93.6900, test_loss = 0.3154

Epoch: 221
Dropout: False
Dropout: False
Iteration 221, train_acc = 99.9840, train_loss = 0.0013


 74%|███████▍  | 222/300 [45:27<15:55, 12.25s/it]

Iteration 221, test_acc = 93.7600, test_loss = 0.3125

Epoch: 222
Dropout: False
Dropout: False
Iteration 222, train_acc = 99.9780, train_loss = 0.0013


 74%|███████▍  | 223/300 [45:39<15:41, 12.23s/it]

Iteration 222, test_acc = 93.6500, test_loss = 0.3154

Epoch: 223
Dropout: False
Dropout: False
Iteration 223, train_acc = 99.9840, train_loss = 0.0013


 75%|███████▍  | 224/300 [45:51<15:28, 12.22s/it]

Iteration 223, test_acc = 93.8400, test_loss = 0.3151

Epoch: 224
Dropout: False
Dropout: False
Iteration 224, train_acc = 99.9900, train_loss = 0.0011


 75%|███████▌  | 225/300 [46:04<15:17, 12.23s/it]

Iteration 224, test_acc = 93.7500, test_loss = 0.3151

Epoch: 225
Dropout: False
Dropout: False
Iteration 225, train_acc = 99.9800, train_loss = 0.0014


 75%|███████▌  | 226/300 [46:16<15:05, 12.24s/it]

Iteration 225, test_acc = 93.7400, test_loss = 0.3129

Epoch: 226
Dropout: False
Dropout: False
Iteration 226, train_acc = 99.9740, train_loss = 0.0013


 76%|███████▌  | 227/300 [46:28<14:54, 12.25s/it]

Iteration 226, test_acc = 93.7500, test_loss = 0.3132

Epoch: 227
Dropout: False
Dropout: False
Iteration 227, train_acc = 99.9820, train_loss = 0.0013


 76%|███████▌  | 228/300 [46:41<14:42, 12.25s/it]

Iteration 227, test_acc = 93.7700, test_loss = 0.3136

Epoch: 228
Dropout: False
Dropout: False
Iteration 228, train_acc = 99.9700, train_loss = 0.0015


 76%|███████▋  | 229/300 [46:53<14:29, 12.25s/it]

Iteration 228, test_acc = 93.7700, test_loss = 0.3139

Epoch: 229
Dropout: False
Dropout: False
Iteration 229, train_acc = 99.9900, train_loss = 0.0012


 77%|███████▋  | 230/300 [47:05<14:17, 12.25s/it]

Iteration 229, test_acc = 93.7400, test_loss = 0.3152

Epoch: 230
Dropout: False
Dropout: False
Iteration 230, train_acc = 99.9880, train_loss = 0.0012


 77%|███████▋  | 231/300 [47:17<14:05, 12.25s/it]

Iteration 230, test_acc = 93.7300, test_loss = 0.3130

Epoch: 231
Dropout: False
Dropout: False
Iteration 231, train_acc = 99.9900, train_loss = 0.0012


 77%|███████▋  | 232/300 [47:30<13:53, 12.25s/it]

Iteration 231, test_acc = 93.7900, test_loss = 0.3146

Epoch: 232
Dropout: False
Dropout: False
Iteration 232, train_acc = 99.9780, train_loss = 0.0014


 78%|███████▊  | 233/300 [47:42<13:40, 12.24s/it]

Iteration 232, test_acc = 93.7600, test_loss = 0.3156

Epoch: 233
Dropout: False
Dropout: False
Iteration 233, train_acc = 99.9840, train_loss = 0.0012


 78%|███████▊  | 234/300 [47:54<13:28, 12.24s/it]

Iteration 233, test_acc = 93.7900, test_loss = 0.3153

Epoch: 234
Dropout: False
Dropout: False
Iteration 234, train_acc = 99.9900, train_loss = 0.0012


 78%|███████▊  | 235/300 [48:06<13:14, 12.22s/it]

Iteration 234, test_acc = 93.7500, test_loss = 0.3160

Epoch: 235
Dropout: False
Dropout: False
