# Imagenet Training MobileNet


In [1]:
import os

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
# Just some functions to average stuff, and save the model
from utils_pytorch import *
from sutils import AverageMeter,accuracy

# Trainning parameters
learning_rate = 0.1
batch_size = 64
momentum = 0.9
weight_decay = 1e-4
workers = 1
print_freq = 100
epochs = 1000

# Define your data path here
IMAGENET_PATH = './dataset'

### Define Mobilenet class
#### Architecture
![title](ArchMobileNet.png)
#### Normal Convolution and Depthwise convolution
![title](MobileNetConvs.png)

In [2]:
class MobileNet(nn.Module):
    def __init__(self):
        super(MobileNet, self).__init__()

        # Normal convolution block followed by Batchnorm (CONV_3x3-->BN-->Relu)
        def conv_bn(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
                nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True)
            )

        # Depthwise convolution block (CONV_BLK_3x3-->BN-->Relu-->CONV_1x1-->BN-->Relu)
        def conv_dw(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
                nn.BatchNorm2d(inp),
                nn.ReLU(inplace=True),
    
                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True),
            )

        self.model = nn.Sequential(
            conv_bn(  3,  32, 2), 
            conv_dw( 32,  64, 1),
            conv_dw( 64, 128, 2),
            conv_dw(128, 128, 1),
            conv_dw(128, 256, 2),
            conv_dw(256, 256, 1),
            conv_dw(256, 512, 2),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 1024, 2),
            conv_dw(1024, 1024, 1),
            nn.AvgPool2d(7),
        )
        self.fc = nn.Linear(1024, 1000)

    def forward(self, x):
        x = self.model(x)
        x = x.view(-1, 1024)
        x = self.fc(x)
        return x

### Initialize model and pass to the GPU

In [3]:
model = MobileNet()

#print(model)

# use GPU to accelerate
model = torch.nn.DataParallel(model).cuda()

### Define Loss

In [4]:
criterion = nn.CrossEntropyLoss().cuda()

### Define solver(SGD)

In [5]:
optimizer = torch.optim.SGD(model.parameters(), learning_rate,
                                momentum=momentum,
                                weight_decay=weight_decay)

### Data loading specifics for ImageNet

In [6]:
# Data loading code
traindir = os.path.join(IMAGENET_PATH, 'train')
valdir = os.path.join(IMAGENET_PATH, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

# Preprocess the data, Operations that will be done on data
train_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(traindir, transforms.Compose([
            transforms.RandomResizedCrop(224), #randomly crop image to 224 * 224
            transforms.RandomHorizontalFlip(), 
            transforms.RandomVerticalFlip(), 
#             transforms.RandomRotation(10), 
            transforms.ColorJitter(0.05, 0.05, 0.05, 0.05), 
            transforms.ToTensor(), 
            normalize,
        ])),
        batch_size=batch_size, shuffle=True,
        num_workers=workers, pin_memory=True)

print(train_loader)

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=batch_size, shuffle=False,
    num_workers=workers, pin_memory=True)
print(val_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7f7a04371358>
<torch.utils.data.dataloader.DataLoader object at 0x7f79250a7080>


### Train

In [7]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        #losses.update(loss.data[0], input.size(0))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1, top5=top5))
            
            
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    return top1.avg

In [8]:
best_prec1 = 0
for epoch in range(0, epochs):
        adjust_learning_rate(optimizer, epoch, learning_rate)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader,model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best)

Epoch: [0][0/60]	Time 0.860 (0.860)	Data 0.783 (0.783)	Loss 6.8219 (6.8219)	Prec@1 0.000 (0.000)	Prec@5 0.000 (0.000)




Test: [0/27]	Time 0.457 (0.457)	Loss 0.2767 (0.2767)	Prec@1 93.750 (93.750)	Prec@5 93.750 (93.750)
 * Prec@1 49.070 Prec@5 74.085
Epoch: [1][0/60]	Time 0.595 (0.595)	Data 0.556 (0.556)	Loss 2.7096 (2.7096)	Prec@1 25.000 (25.000)	Prec@5 54.688 (54.688)
Test: [0/27]	Time 0.393 (0.393)	Loss 0.4284 (0.4284)	Prec@1 93.750 (93.750)	Prec@5 93.750 (93.750)
 * Prec@1 53.329 Prec@5 86.203
Epoch: [2][0/60]	Time 0.585 (0.585)	Data 0.547 (0.547)	Loss 2.0455 (2.0455)	Prec@1 46.875 (46.875)	Prec@5 71.875 (71.875)
Test: [0/27]	Time 0.371 (0.371)	Loss 0.8448 (0.8448)	Prec@1 81.250 (81.250)	Prec@5 95.312 (95.312)
 * Prec@1 60.108 Prec@5 86.263
Epoch: [3][0/60]	Time 0.579 (0.579)	Data 0.540 (0.540)	Loss 1.7383 (1.7383)	Prec@1 51.562 (51.562)	Prec@5 82.812 (82.812)
Test: [0/27]	Time 0.380 (0.380)	Loss 0.2705 (0.2705)	Prec@1 93.750 (93.750)	Prec@5 96.875 (96.875)
 * Prec@1 73.905 Prec@5 93.221
Epoch: [4][0/60]	Time 0.596 (0.596)	Data 0.557 (0.557)	Loss 1.5734 (1.5734)	Prec@1 62.500 (62.500)	Prec@5 85.938 (

Epoch: [33][0/60]	Time 0.573 (0.573)	Data 0.534 (0.534)	Loss 0.5307 (0.5307)	Prec@1 84.375 (84.375)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.369 (0.369)	Loss 0.3520 (0.3520)	Prec@1 90.625 (90.625)	Prec@5 98.438 (98.438)
 * Prec@1 94.841 Prec@5 98.920
Epoch: [34][0/60]	Time 0.581 (0.581)	Data 0.543 (0.543)	Loss 0.7626 (0.7626)	Prec@1 76.562 (76.562)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.381 (0.381)	Loss 0.3420 (0.3420)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 94.901 Prec@5 98.860
Epoch: [35][0/60]	Time 0.579 (0.579)	Data 0.540 (0.540)	Loss 0.7398 (0.7398)	Prec@1 82.812 (82.812)	Prec@5 92.188 (92.188)
Test: [0/27]	Time 0.371 (0.371)	Loss 0.2793 (0.2793)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 94.721 Prec@5 98.920
Epoch: [36][0/60]	Time 0.591 (0.591)	Data 0.553 (0.553)	Loss 0.6488 (0.6488)	Prec@1 79.688 (79.688)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.368 (0.368)	Loss 0.2905 (0.2905)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 94.961 Pr

Test: [0/27]	Time 0.373 (0.373)	Loss 0.2926 (0.2926)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.160
Epoch: [66][0/60]	Time 0.581 (0.581)	Data 0.542 (0.542)	Loss 0.3454 (0.3454)	Prec@1 92.188 (92.188)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.374 (0.374)	Loss 0.2828 (0.2828)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 98.980
Epoch: [67][0/60]	Time 0.584 (0.584)	Data 0.544 (0.544)	Loss 0.3303 (0.3303)	Prec@1 89.062 (89.062)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.372 (0.372)	Loss 0.3050 (0.3050)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.100
Epoch: [68][0/60]	Time 0.599 (0.599)	Data 0.559 (0.559)	Loss 0.6113 (0.6113)	Prec@1 87.500 (87.500)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.383 (0.383)	Loss 0.2844 (0.2844)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.160
Epoch: [69][0/60]	Time 0.586 (0.586)	Data 0.546 (0.546)	Loss 0.4157 (0.4157)	Prec@1 87.500 (87.500)	Prec@5 

 * Prec@1 95.321 Prec@5 99.100
Epoch: [98][0/60]	Time 0.606 (0.606)	Data 0.567 (0.567)	Loss 0.2882 (0.2882)	Prec@1 89.062 (89.062)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.373 (0.373)	Loss 0.2707 (0.2707)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.561 Prec@5 99.100
Epoch: [99][0/60]	Time 0.585 (0.585)	Data 0.546 (0.546)	Loss 0.6409 (0.6409)	Prec@1 78.125 (78.125)	Prec@5 92.188 (92.188)
Test: [0/27]	Time 0.371 (0.371)	Loss 0.2904 (0.2904)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.040
Epoch: [100][0/60]	Time 0.574 (0.574)	Data 0.535 (0.535)	Loss 0.5272 (0.5272)	Prec@1 85.938 (85.938)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.380 (0.380)	Loss 0.2874 (0.2874)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.501 Prec@5 99.100
Epoch: [101][0/60]	Time 0.586 (0.586)	Data 0.547 (0.547)	Loss 0.3845 (0.3845)	Prec@1 89.062 (89.062)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.371 (0.371)	Loss 0.2835 (0.2835)	Prec@1 93.750 (93.750)	Prec@5 

Test: [0/27]	Time 0.375 (0.375)	Loss 0.2960 (0.2960)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.100
Epoch: [131][0/60]	Time 0.593 (0.593)	Data 0.554 (0.554)	Loss 0.4449 (0.4449)	Prec@1 89.062 (89.062)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.366 (0.366)	Loss 0.2957 (0.2957)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [132][0/60]	Time 0.574 (0.574)	Data 0.536 (0.536)	Loss 0.7899 (0.7899)	Prec@1 78.125 (78.125)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.367 (0.367)	Loss 0.3000 (0.3000)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 98.980
Epoch: [133][0/60]	Time 0.582 (0.582)	Data 0.543 (0.543)	Loss 0.7940 (0.7940)	Prec@1 79.688 (79.688)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.377 (0.377)	Loss 0.2917 (0.2917)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [134][0/60]	Time 0.581 (0.581)	Data 0.543 (0.543)	Loss 0.8416 (0.8416)	Prec@1 73.438 (73.438)	Prec@5 

 * Prec@1 95.321 Prec@5 98.980
Epoch: [163][0/60]	Time 0.616 (0.616)	Data 0.572 (0.572)	Loss 0.6366 (0.6366)	Prec@1 84.375 (84.375)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.396 (0.396)	Loss 0.2897 (0.2897)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 98.980
Epoch: [164][0/60]	Time 0.612 (0.612)	Data 0.567 (0.567)	Loss 0.3749 (0.3749)	Prec@1 85.938 (85.938)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.395 (0.395)	Loss 0.2965 (0.2965)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.040
Epoch: [165][0/60]	Time 0.607 (0.607)	Data 0.564 (0.564)	Loss 0.3848 (0.3848)	Prec@1 89.062 (89.062)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.402 (0.402)	Loss 0.2658 (0.2658)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.100
Epoch: [166][0/60]	Time 0.626 (0.626)	Data 0.586 (0.586)	Loss 0.6001 (0.6001)	Prec@1 84.375 (84.375)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.382 (0.382)	Loss 0.3060 (0.3060)	Prec@1 93.750 (93.750)	Prec@5 

Test: [0/27]	Time 0.411 (0.411)	Loss 0.2888 (0.2888)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.201 Prec@5 99.100
Epoch: [196][0/60]	Time 0.640 (0.640)	Data 0.598 (0.598)	Loss 0.6657 (0.6657)	Prec@1 76.562 (76.562)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.382 (0.382)	Loss 0.3166 (0.3166)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.040
Epoch: [197][0/60]	Time 0.595 (0.595)	Data 0.556 (0.556)	Loss 0.5316 (0.5316)	Prec@1 84.375 (84.375)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.386 (0.386)	Loss 0.2905 (0.2905)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.100
Epoch: [198][0/60]	Time 0.585 (0.585)	Data 0.546 (0.546)	Loss 0.4611 (0.4611)	Prec@1 85.938 (85.938)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.390 (0.390)	Loss 0.3111 (0.3111)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.100
Epoch: [199][0/60]	Time 0.575 (0.575)	Data 0.535 (0.535)	Loss 0.5464 (0.5464)	Prec@1 79.688 (79.688)	Prec@5 

 * Prec@1 95.381 Prec@5 99.100
Epoch: [228][0/60]	Time 0.611 (0.611)	Data 0.566 (0.566)	Loss 0.6109 (0.6109)	Prec@1 79.688 (79.688)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.389 (0.389)	Loss 0.2629 (0.2629)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.040
Epoch: [229][0/60]	Time 0.721 (0.721)	Data 0.683 (0.683)	Loss 0.5058 (0.5058)	Prec@1 85.938 (85.938)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.373 (0.373)	Loss 0.2928 (0.2928)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [230][0/60]	Time 0.638 (0.638)	Data 0.596 (0.596)	Loss 0.5223 (0.5223)	Prec@1 85.938 (85.938)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.392 (0.392)	Loss 0.2969 (0.2969)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.201 Prec@5 99.100
Epoch: [231][0/60]	Time 0.681 (0.681)	Data 0.642 (0.642)	Loss 0.4948 (0.4948)	Prec@1 81.250 (81.250)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.393 (0.393)	Loss 0.2967 (0.2967)	Prec@1 92.188 (92.188)	Prec@5 

Test: [0/27]	Time 0.512 (0.512)	Loss 0.2781 (0.2781)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [261][0/60]	Time 0.821 (0.821)	Data 0.777 (0.777)	Loss 0.5416 (0.5416)	Prec@1 84.375 (84.375)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.612 (0.612)	Loss 0.2551 (0.2551)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [262][0/60]	Time 0.833 (0.833)	Data 0.782 (0.782)	Loss 0.3308 (0.3308)	Prec@1 92.188 (92.188)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.381 (0.381)	Loss 0.2944 (0.2944)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [263][0/60]	Time 0.609 (0.609)	Data 0.567 (0.567)	Loss 0.4388 (0.4388)	Prec@1 87.500 (87.500)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.396 (0.396)	Loss 0.2822 (0.2822)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [264][0/60]	Time 0.677 (0.677)	Data 0.632 (0.632)	Loss 0.7971 (0.7971)	Prec@1 79.688 (79.688)	Prec@5 

 * Prec@1 95.381 Prec@5 99.100
Epoch: [293][0/60]	Time 0.648 (0.648)	Data 0.604 (0.604)	Loss 0.5340 (0.5340)	Prec@1 84.375 (84.375)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.416 (0.416)	Loss 0.2808 (0.2808)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [294][0/60]	Time 0.616 (0.616)	Data 0.576 (0.576)	Loss 0.3627 (0.3627)	Prec@1 85.938 (85.938)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.439 (0.439)	Loss 0.3069 (0.3069)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [295][0/60]	Time 0.673 (0.673)	Data 0.622 (0.622)	Loss 0.5234 (0.5234)	Prec@1 85.938 (85.938)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.398 (0.398)	Loss 0.2869 (0.2869)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [296][0/60]	Time 0.631 (0.631)	Data 0.590 (0.590)	Loss 0.4349 (0.4349)	Prec@1 89.062 (89.062)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.385 (0.385)	Loss 0.2686 (0.2686)	Prec@1 93.750 (93.750)	Prec@

Test: [0/27]	Time 0.375 (0.375)	Loss 0.2902 (0.2902)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [326][0/60]	Time 0.575 (0.575)	Data 0.532 (0.532)	Loss 0.5616 (0.5616)	Prec@1 84.375 (84.375)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.410 (0.410)	Loss 0.2566 (0.2566)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [327][0/60]	Time 0.632 (0.632)	Data 0.588 (0.588)	Loss 0.8019 (0.8019)	Prec@1 75.000 (75.000)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.380 (0.380)	Loss 0.2731 (0.2731)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.160
Epoch: [328][0/60]	Time 0.573 (0.573)	Data 0.532 (0.532)	Loss 0.4791 (0.4791)	Prec@1 82.812 (82.812)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.377 (0.377)	Loss 0.2833 (0.2833)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [329][0/60]	Time 0.614 (0.614)	Data 0.572 (0.572)	Loss 0.4402 (0.4402)	Prec@1 84.375 (84.375)	Prec@5 

 * Prec@1 95.381 Prec@5 99.100
Epoch: [358][0/60]	Time 0.603 (0.603)	Data 0.564 (0.564)	Loss 0.4116 (0.4116)	Prec@1 89.062 (89.062)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.399 (0.399)	Loss 0.3098 (0.3098)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.201 Prec@5 99.040
Epoch: [359][0/60]	Time 0.630 (0.630)	Data 0.585 (0.585)	Loss 0.4700 (0.4700)	Prec@1 85.938 (85.938)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.378 (0.378)	Loss 0.2980 (0.2980)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.040
Epoch: [360][0/60]	Time 0.585 (0.585)	Data 0.546 (0.546)	Loss 0.5663 (0.5663)	Prec@1 84.375 (84.375)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.376 (0.376)	Loss 0.2740 (0.2740)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.040
Epoch: [361][0/60]	Time 0.612 (0.612)	Data 0.568 (0.568)	Loss 0.7192 (0.7192)	Prec@1 79.688 (79.688)	Prec@5 92.188 (92.188)
Test: [0/27]	Time 0.372 (0.372)	Loss 0.2849 (0.2849)	Prec@1 93.750 (93.750)	Prec@5 

Test: [0/27]	Time 0.383 (0.383)	Loss 0.2935 (0.2935)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.040
Epoch: [391][0/60]	Time 0.617 (0.617)	Data 0.576 (0.576)	Loss 0.4544 (0.4544)	Prec@1 87.500 (87.500)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.397 (0.397)	Loss 0.2814 (0.2814)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 98.980
Epoch: [392][0/60]	Time 0.649 (0.649)	Data 0.601 (0.601)	Loss 0.5000 (0.5000)	Prec@1 82.812 (82.812)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.393 (0.393)	Loss 0.2779 (0.2779)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.160
Epoch: [393][0/60]	Time 0.627 (0.627)	Data 0.588 (0.588)	Loss 0.5211 (0.5211)	Prec@1 84.375 (84.375)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.382 (0.382)	Loss 0.2739 (0.2739)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [394][0/60]	Time 0.594 (0.594)	Data 0.548 (0.548)	Loss 0.5019 (0.5019)	Prec@1 89.062 (89.062)	Prec@

 * Prec@1 95.381 Prec@5 99.100
Epoch: [423][0/60]	Time 0.594 (0.594)	Data 0.553 (0.553)	Loss 0.4708 (0.4708)	Prec@1 87.500 (87.500)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.397 (0.397)	Loss 0.3011 (0.3011)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.201 Prec@5 98.980
Epoch: [424][0/60]	Time 0.644 (0.644)	Data 0.604 (0.604)	Loss 0.7873 (0.7873)	Prec@1 82.812 (82.812)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.513 (0.513)	Loss 0.2742 (0.2742)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.040
Epoch: [425][0/60]	Time 0.808 (0.808)	Data 0.763 (0.763)	Loss 0.2941 (0.2941)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.632 (0.632)	Loss 0.2724 (0.2724)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [426][0/60]	Time 0.893 (0.893)	Data 0.842 (0.842)	Loss 0.5379 (0.5379)	Prec@1 82.812 (82.812)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.528 (0.528)	Loss 0.2698 (0.2698)	Prec@1 93.750 (93.750)	Prec@5 

Test: [0/27]	Time 0.378 (0.378)	Loss 0.2856 (0.2856)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 98.980
Epoch: [456][0/60]	Time 0.602 (0.602)	Data 0.558 (0.558)	Loss 0.3553 (0.3553)	Prec@1 92.188 (92.188)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.410 (0.410)	Loss 0.2838 (0.2838)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.100
Epoch: [457][0/60]	Time 0.596 (0.596)	Data 0.550 (0.550)	Loss 0.4215 (0.4215)	Prec@1 87.500 (87.500)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.429 (0.429)	Loss 0.2637 (0.2637)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [458][0/60]	Time 0.589 (0.589)	Data 0.551 (0.551)	Loss 0.5500 (0.5500)	Prec@1 79.688 (79.688)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.368 (0.368)	Loss 0.2793 (0.2793)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.501 Prec@5 99.040
Epoch: [459][0/60]	Time 0.597 (0.597)	Data 0.552 (0.552)	Loss 0.9059 (0.9059)	Prec@1 76.562 (76.562)	Prec@5 

 * Prec@1 95.321 Prec@5 99.040
Epoch: [488][0/60]	Time 0.609 (0.609)	Data 0.567 (0.567)	Loss 0.3535 (0.3535)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.414 (0.414)	Loss 0.2693 (0.2693)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.100
Epoch: [489][0/60]	Time 0.631 (0.631)	Data 0.592 (0.592)	Loss 0.6377 (0.6377)	Prec@1 85.938 (85.938)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.441 (0.441)	Loss 0.2893 (0.2893)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [490][0/60]	Time 0.626 (0.626)	Data 0.582 (0.582)	Loss 0.5524 (0.5524)	Prec@1 85.938 (85.938)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.424 (0.424)	Loss 0.2845 (0.2845)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.160
Epoch: [491][0/60]	Time 0.639 (0.639)	Data 0.594 (0.594)	Loss 0.3853 (0.3853)	Prec@1 87.500 (87.500)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.406 (0.406)	Loss 0.2753 (0.2753)	Prec@1 93.750 (93.750)	Prec@5 

Test: [0/27]	Time 0.411 (0.411)	Loss 0.2899 (0.2899)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 98.980
Epoch: [521][0/60]	Time 0.642 (0.642)	Data 0.601 (0.601)	Loss 0.5965 (0.5965)	Prec@1 82.812 (82.812)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.378 (0.378)	Loss 0.2983 (0.2983)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.100
Epoch: [522][0/60]	Time 0.642 (0.642)	Data 0.596 (0.596)	Loss 0.4667 (0.4667)	Prec@1 87.500 (87.500)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.420 (0.420)	Loss 0.2783 (0.2783)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.040
Epoch: [523][0/60]	Time 0.659 (0.659)	Data 0.614 (0.614)	Loss 0.5607 (0.5607)	Prec@1 82.812 (82.812)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.392 (0.392)	Loss 0.2885 (0.2885)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [524][0/60]	Time 0.616 (0.616)	Data 0.573 (0.573)	Loss 0.6873 (0.6873)	Prec@1 79.688 (79.688)	Prec@5 

 * Prec@1 95.441 Prec@5 99.040
Epoch: [553][0/60]	Time 0.653 (0.653)	Data 0.604 (0.604)	Loss 0.6383 (0.6383)	Prec@1 82.812 (82.812)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.397 (0.397)	Loss 0.2695 (0.2695)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [554][0/60]	Time 0.692 (0.692)	Data 0.648 (0.648)	Loss 0.3944 (0.3944)	Prec@1 87.500 (87.500)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.476 (0.476)	Loss 0.2988 (0.2988)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.141 Prec@5 99.040
Epoch: [555][0/60]	Time 0.630 (0.630)	Data 0.590 (0.590)	Loss 0.4882 (0.4882)	Prec@1 84.375 (84.375)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.404 (0.404)	Loss 0.2676 (0.2676)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [556][0/60]	Time 0.616 (0.616)	Data 0.576 (0.576)	Loss 0.8515 (0.8515)	Prec@1 76.562 (76.562)	Prec@5 89.062 (89.062)
Test: [0/27]	Time 0.395 (0.395)	Loss 0.2800 (0.2800)	Prec@1 93.750 (93.750)	Prec@

Test: [0/27]	Time 0.377 (0.377)	Loss 0.2716 (0.2716)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [586][0/60]	Time 1.036 (1.036)	Data 0.960 (0.960)	Loss 0.4421 (0.4421)	Prec@1 82.812 (82.812)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.427 (0.427)	Loss 0.2757 (0.2757)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.160
Epoch: [587][0/60]	Time 0.577 (0.577)	Data 0.538 (0.538)	Loss 0.5657 (0.5657)	Prec@1 79.688 (79.688)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.403 (0.403)	Loss 0.2778 (0.2778)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [588][0/60]	Time 0.590 (0.590)	Data 0.552 (0.552)	Loss 0.4641 (0.4641)	Prec@1 82.812 (82.812)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.377 (0.377)	Loss 0.2902 (0.2902)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 98.980
Epoch: [589][0/60]	Time 0.591 (0.591)	Data 0.551 (0.551)	Loss 0.5901 (0.5901)	Prec@1 81.250 (81.250)	Prec@5 

 * Prec@1 95.321 Prec@5 99.100
Epoch: [618][0/60]	Time 0.616 (0.616)	Data 0.574 (0.574)	Loss 0.4537 (0.4537)	Prec@1 85.938 (85.938)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.445 (0.445)	Loss 0.3150 (0.3150)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.040
Epoch: [619][0/60]	Time 0.629 (0.629)	Data 0.582 (0.582)	Loss 0.5787 (0.5787)	Prec@1 76.562 (76.562)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.447 (0.447)	Loss 0.2611 (0.2611)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [620][0/60]	Time 0.672 (0.672)	Data 0.627 (0.627)	Loss 0.8682 (0.8682)	Prec@1 78.125 (78.125)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.404 (0.404)	Loss 0.2957 (0.2957)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.100
Epoch: [621][0/60]	Time 0.644 (0.644)	Data 0.594 (0.594)	Loss 0.9267 (0.9267)	Prec@1 75.000 (75.000)	Prec@5 92.188 (92.188)
Test: [0/27]	Time 0.417 (0.417)	Loss 0.2691 (0.2691)	Prec@1 93.750 (93.750)	Prec@5 

Test: [0/27]	Time 0.393 (0.393)	Loss 0.2744 (0.2744)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.501 Prec@5 99.160
Epoch: [651][0/60]	Time 0.591 (0.591)	Data 0.551 (0.551)	Loss 0.4758 (0.4758)	Prec@1 89.062 (89.062)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.374 (0.374)	Loss 0.2757 (0.2757)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [652][0/60]	Time 0.593 (0.593)	Data 0.553 (0.553)	Loss 0.5961 (0.5961)	Prec@1 84.375 (84.375)	Prec@5 92.188 (92.188)
Test: [0/27]	Time 0.365 (0.365)	Loss 0.2640 (0.2640)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.160
Epoch: [653][0/60]	Time 0.591 (0.591)	Data 0.550 (0.550)	Loss 0.5910 (0.5910)	Prec@1 87.500 (87.500)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.374 (0.374)	Loss 0.2831 (0.2831)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [654][0/60]	Time 0.591 (0.591)	Data 0.549 (0.549)	Loss 0.3259 (0.3259)	Prec@1 90.625 (90.625)	Prec@5 

 * Prec@1 95.261 Prec@5 99.160
Epoch: [683][0/60]	Time 0.656 (0.656)	Data 0.615 (0.615)	Loss 0.3827 (0.3827)	Prec@1 90.625 (90.625)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.411 (0.411)	Loss 0.2931 (0.2931)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [684][0/60]	Time 0.628 (0.628)	Data 0.588 (0.588)	Loss 0.3320 (0.3320)	Prec@1 85.938 (85.938)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.402 (0.402)	Loss 0.2738 (0.2738)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.040
Epoch: [685][0/60]	Time 0.600 (0.600)	Data 0.559 (0.559)	Loss 0.3801 (0.3801)	Prec@1 92.188 (92.188)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.385 (0.385)	Loss 0.2645 (0.2645)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [686][0/60]	Time 0.599 (0.599)	Data 0.554 (0.554)	Loss 0.5379 (0.5379)	Prec@1 79.688 (79.688)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.376 (0.376)	Loss 0.2882 (0.2882)	Prec@1 92.188 (92.188)	Prec@5 

Test: [0/27]	Time 0.451 (0.451)	Loss 0.2937 (0.2937)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.201 Prec@5 99.160
Epoch: [716][0/60]	Time 0.614 (0.614)	Data 0.574 (0.574)	Loss 0.4456 (0.4456)	Prec@1 85.938 (85.938)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.450 (0.450)	Loss 0.3047 (0.3047)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.100
Epoch: [717][0/60]	Time 0.590 (0.590)	Data 0.551 (0.551)	Loss 0.5014 (0.5014)	Prec@1 81.250 (81.250)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.416 (0.416)	Loss 0.2995 (0.2995)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 98.980
Epoch: [718][0/60]	Time 0.600 (0.600)	Data 0.558 (0.558)	Loss 0.3428 (0.3428)	Prec@1 90.625 (90.625)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.391 (0.391)	Loss 0.3054 (0.3054)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [719][0/60]	Time 0.637 (0.637)	Data 0.593 (0.593)	Loss 0.4437 (0.4437)	Prec@1 85.938 (85.938)	Prec@5 

 * Prec@1 95.381 Prec@5 99.040
Epoch: [748][0/60]	Time 0.619 (0.619)	Data 0.576 (0.576)	Loss 0.5328 (0.5328)	Prec@1 87.500 (87.500)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.372 (0.372)	Loss 0.2755 (0.2755)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.040
Epoch: [749][0/60]	Time 0.583 (0.583)	Data 0.544 (0.544)	Loss 0.3212 (0.3212)	Prec@1 87.500 (87.500)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.385 (0.385)	Loss 0.2880 (0.2880)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [750][0/60]	Time 0.578 (0.578)	Data 0.538 (0.538)	Loss 0.5986 (0.5986)	Prec@1 84.375 (84.375)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.379 (0.379)	Loss 0.2916 (0.2916)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.100
Epoch: [751][0/60]	Time 0.588 (0.588)	Data 0.550 (0.550)	Loss 0.3287 (0.3287)	Prec@1 85.938 (85.938)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.379 (0.379)	Loss 0.2783 (0.2783)	Prec@1 93.750 (93.750)	Prec@

Test: [0/27]	Time 0.407 (0.407)	Loss 0.2835 (0.2835)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [781][0/60]	Time 0.608 (0.608)	Data 0.566 (0.566)	Loss 0.3213 (0.3213)	Prec@1 85.938 (85.938)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.392 (0.392)	Loss 0.2828 (0.2828)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [782][0/60]	Time 0.642 (0.642)	Data 0.598 (0.598)	Loss 0.5799 (0.5799)	Prec@1 87.500 (87.500)	Prec@5 92.188 (92.188)
Test: [0/27]	Time 0.392 (0.392)	Loss 0.2742 (0.2742)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [783][0/60]	Time 0.632 (0.632)	Data 0.585 (0.585)	Loss 0.6147 (0.6147)	Prec@1 81.250 (81.250)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.413 (0.413)	Loss 0.2682 (0.2682)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.040
Epoch: [784][0/60]	Time 0.614 (0.614)	Data 0.567 (0.567)	Loss 0.5574 (0.5574)	Prec@1 87.500 (87.500)	Prec@

 * Prec@1 95.201 Prec@5 99.040
Epoch: [813][0/60]	Time 0.630 (0.630)	Data 0.591 (0.591)	Loss 0.6227 (0.6227)	Prec@1 84.375 (84.375)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.427 (0.427)	Loss 0.2735 (0.2735)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [814][0/60]	Time 0.633 (0.633)	Data 0.592 (0.592)	Loss 0.4240 (0.4240)	Prec@1 87.500 (87.500)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.389 (0.389)	Loss 0.2778 (0.2778)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [815][0/60]	Time 0.643 (0.643)	Data 0.601 (0.601)	Loss 0.6178 (0.6178)	Prec@1 81.250 (81.250)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.386 (0.386)	Loss 0.2857 (0.2857)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [816][0/60]	Time 0.639 (0.639)	Data 0.595 (0.595)	Loss 0.3823 (0.3823)	Prec@1 85.938 (85.938)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.397 (0.397)	Loss 0.2763 (0.2763)	Prec@1 93.750 (93.750)	Prec@

Test: [0/27]	Time 0.387 (0.387)	Loss 0.2739 (0.2739)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [846][0/60]	Time 0.620 (0.620)	Data 0.576 (0.576)	Loss 0.3949 (0.3949)	Prec@1 87.500 (87.500)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.410 (0.410)	Loss 0.3056 (0.3056)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.040
Epoch: [847][0/60]	Time 0.609 (0.609)	Data 0.566 (0.566)	Loss 0.4252 (0.4252)	Prec@1 90.625 (90.625)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.403 (0.403)	Loss 0.3011 (0.3011)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [848][0/60]	Time 0.603 (0.603)	Data 0.565 (0.565)	Loss 0.3675 (0.3675)	Prec@1 90.625 (90.625)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.384 (0.384)	Loss 0.2840 (0.2840)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.040
Epoch: [849][0/60]	Time 0.617 (0.617)	Data 0.571 (0.571)	Loss 0.5044 (0.5044)	Prec@1 84.375 (84.375)	Prec@

 * Prec@1 95.381 Prec@5 99.040
Epoch: [878][0/60]	Time 0.617 (0.617)	Data 0.574 (0.574)	Loss 0.5799 (0.5799)	Prec@1 82.812 (82.812)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.389 (0.389)	Loss 0.2977 (0.2977)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.040
Epoch: [879][0/60]	Time 0.592 (0.592)	Data 0.554 (0.554)	Loss 0.4442 (0.4442)	Prec@1 84.375 (84.375)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.417 (0.417)	Loss 0.2879 (0.2879)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [880][0/60]	Time 0.662 (0.662)	Data 0.622 (0.622)	Loss 0.4060 (0.4060)	Prec@1 87.500 (87.500)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.406 (0.406)	Loss 0.2867 (0.2867)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.100
Epoch: [881][0/60]	Time 0.622 (0.622)	Data 0.574 (0.574)	Loss 0.4555 (0.4555)	Prec@1 87.500 (87.500)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.414 (0.414)	Loss 0.2893 (0.2893)	Prec@1 93.750 (93.750)	Prec@5 

Test: [0/27]	Time 0.432 (0.432)	Loss 0.2801 (0.2801)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [911][0/60]	Time 0.691 (0.691)	Data 0.646 (0.646)	Loss 0.4417 (0.4417)	Prec@1 89.062 (89.062)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.414 (0.414)	Loss 0.2916 (0.2916)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.160
Epoch: [912][0/60]	Time 0.632 (0.632)	Data 0.586 (0.586)	Loss 0.4391 (0.4391)	Prec@1 85.938 (85.938)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.384 (0.384)	Loss 0.2860 (0.2860)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.381 Prec@5 99.100
Epoch: [913][0/60]	Time 0.585 (0.585)	Data 0.547 (0.547)	Loss 0.2849 (0.2849)	Prec@1 87.500 (87.500)	Prec@5 100.000 (100.000)
Test: [0/27]	Time 0.381 (0.381)	Loss 0.3135 (0.3135)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.141 Prec@5 99.100
Epoch: [914][0/60]	Time 0.589 (0.589)	Data 0.550 (0.550)	Loss 0.4297 (0.4297)	Prec@1 84.375 (84.375)	Prec@

 * Prec@1 95.381 Prec@5 99.160
Epoch: [943][0/60]	Time 0.637 (0.637)	Data 0.596 (0.596)	Loss 0.4188 (0.4188)	Prec@1 89.062 (89.062)	Prec@5 98.438 (98.438)
Test: [0/27]	Time 0.441 (0.441)	Loss 0.2874 (0.2874)	Prec@1 92.188 (92.188)	Prec@5 98.438 (98.438)
 * Prec@1 95.261 Prec@5 99.040
Epoch: [944][0/60]	Time 0.589 (0.589)	Data 0.551 (0.551)	Loss 0.5713 (0.5713)	Prec@1 79.688 (79.688)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.413 (0.413)	Loss 0.2915 (0.2915)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.100
Epoch: [945][0/60]	Time 0.666 (0.666)	Data 0.620 (0.620)	Loss 0.5141 (0.5141)	Prec@1 85.938 (85.938)	Prec@5 93.750 (93.750)
Test: [0/27]	Time 0.389 (0.389)	Loss 0.2668 (0.2668)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.040
Epoch: [946][0/60]	Time 0.645 (0.645)	Data 0.606 (0.606)	Loss 0.3905 (0.3905)	Prec@1 87.500 (87.500)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.386 (0.386)	Loss 0.2867 (0.2867)	Prec@1 93.750 (93.750)	Prec@5 

Test: [0/27]	Time 0.386 (0.386)	Loss 0.2670 (0.2670)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.501 Prec@5 99.040
Epoch: [976][0/60]	Time 0.587 (0.587)	Data 0.547 (0.547)	Loss 0.6261 (0.6261)	Prec@1 81.250 (81.250)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.507 (0.507)	Loss 0.2750 (0.2750)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.040
Epoch: [977][0/60]	Time 0.846 (0.846)	Data 0.789 (0.789)	Loss 0.5295 (0.5295)	Prec@1 81.250 (81.250)	Prec@5 96.875 (96.875)
Test: [0/27]	Time 0.570 (0.570)	Loss 0.2897 (0.2897)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.441 Prec@5 99.100
Epoch: [978][0/60]	Time 0.912 (0.912)	Data 0.863 (0.863)	Loss 0.5090 (0.5090)	Prec@1 89.062 (89.062)	Prec@5 95.312 (95.312)
Test: [0/27]	Time 0.450 (0.450)	Loss 0.2987 (0.2987)	Prec@1 93.750 (93.750)	Prec@5 98.438 (98.438)
 * Prec@1 95.321 Prec@5 99.160
Epoch: [979][0/60]	Time 0.617 (0.617)	Data 0.573 (0.573)	Loss 0.4909 (0.4909)	Prec@1 90.625 (90.625)	Prec@5 

## Save model

There are two ways to save the model.

**Reference:**

http://pytorch.org/docs/0.3.1/notes/serialization.html#recommend-saving-models

### Only save the parameters (Recommended)

In [9]:
# Save the parameters
torch.save(model.state_dict(),"mobilenet_params_100e.pth.tar")

### Save the whole model