## SIMCLR Implementation


### Import Libraries

In [None]:
# import necessary dependencies
import argparse
import os, sys
import time
import datetime
import pickle
import random
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.datasets import CIFAR10
from PIL import Image
# from torchvision import datasets, transforms, models
# from torch.optim.lr_scheduler import StepLR

import numpy as np
import pandas as pd

# import shutil, time, os, requests, random, copy

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

from sklearn.manifold import TSNE
from torchlars import LARS
# from pytorch_metric_learning import losses
from lightly.loss import NTXentLoss

# move one directory up to import from src
if os.getcwd().split("/")[-1] == "notebooks":
    os.chdir("..")

# from src.setdevice import set_device

### Set Seeds

In [None]:
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    random.seed(seed)
set_seed()

In [None]:
def set_device():
    if torch.cuda.is_available():  # check if NVIDIA GPU is available
        device = torch.device("cuda")
    elif torch.backends.mps.is_available():  # check if Apple's Metal is available
        device = torch.device("mps")
        print(
            f"Checking pytorch is built with mps activated: {torch.backends.mps.is_built()}"
        )
    else:
        device = torch.device("cpu")

    if device == torch.device("cuda") or device == torch.device("mps"):
        print(f"Running on {device} GPU...")
    else:
        print("Running on CPU...")

    return device
device = set_device()

Running on cuda GPU...


### Define f(·) encoder - ResNet-20


In [None]:
"""
It is the neural network model base encoder f(·) that extracts representation vectors 
from augmented data examples. We choose to use the ResNet20 architecture.
"""

# Residual Block
class BasicBlock(nn.Module):
    def __init__(self, inplanes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=inplanes,
            out_channels=planes,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
        )
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(
            planes, out_channels=planes, kernel_size=3, stride=1, padding=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(planes)
        if inplanes > planes or stride > 1:
            # Option B
            self.downsample = nn.Sequential(
                nn.Conv2d(
                    in_channels=inplanes,
                    out_channels=planes,
                    kernel_size=1,
                    stride=stride,
                    padding=0,
                    bias=False,
                ),
                nn.BatchNorm2d(planes),
            )
        else:
            self.downsample = None
        self.stride = stride

    def forward(self, x):
        identity = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity if self.downsample is None else self.downsample(identity)
        out = F.relu(out)
        return out


# ResNet
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.inplanes = 16
        self.conv0 = nn.Conv2d(
            in_channels=3,
            out_channels=self.inplanes,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
        )
        self.bn0 = nn.BatchNorm2d(self.inplanes)
        self.layer1 = self._make_layer(block, planes=16, blocks=layers[0], stride=1)
        self.layer2 = self._make_layer(block, planes=32, blocks=layers[1], stride=2)
        self.layer3 = self._make_layer(block, planes=64, blocks=layers[2], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # self.identity = nn.Identity()
        # self.fc = nn.Linear(in_features=64, out_features=num_classes)

    def _make_layer(self, block, planes, blocks, stride):
        layers = []
        for i in range(blocks):
            layers.append(
                block(
                    inplanes=(self.inplanes if i == 0 else planes),
                    planes=planes,
                    stride=stride if i == 0 else 1,
                )
            )
        self.inplanes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        x = F.relu(self.bn0(self.conv0(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avgpool(x)
        # x = self.identity(x)
        x = x.view(x.size(0), -1)
        # x = self.fc(x)
        # x = self.identity(x)
        return x


### Define g(·) projection head - MLP with linear layers

In [None]:
"""
It is the neural network projection head g(·) that maps representations to 
the space where contrastive loss is applied. There is the option to use a linear
projection head or a nonlinear projection head (2 layer MLP).
"""

class LinearLayer(nn.Module):
    def __init__(self, in_features, out_features, use_bias=False, use_bn=False):
        super(LinearLayer, self).__init__()

        self.use_bn = use_bn
        self.linear = nn.Linear(
            in_features=in_features,
            out_features=out_features,
            bias=use_bias and not use_bn,
        )
        self.bn = nn.BatchNorm1d(out_features)

    def forward(self, x):
        x = self.linear(x)
        x = self.bn(x) if self.use_bn else x
        return x


class ProjectionHead(nn.Module):
    def __init__(
        self,
        in_features,
        hidden_features,
        out_features,
        head_type="nonlinear",
        use_bn=True,
    ):
        super(ProjectionHead, self).__init__()

        if head_type == "linear":
            self.layers = LinearLayer(
                in_features=in_features,
                out_features=out_features,
                use_bias=False,
                use_bn=True,
            )
        elif head_type == "nonlinear":
            self.layers = nn.Sequential(
                LinearLayer(
                    in_features=in_features,
                    out_features=hidden_features,
                    use_bias=True,
                    use_bn=use_bn,
                ),
                nn.ReLU(),
                LinearLayer(
                    in_features=hidden_features,
                    out_features=out_features,
                    use_bias=False,
                    use_bn=use_bn,
                ),
            )

    def forward(self, x):
        x = self.layers(x)
        return x


### Model

In [None]:
class SimCLR(nn.Module):
    """
    Creates SimCLR model given encoder
    Args:
      encoder (nn.Module): Encoder
      projection_n_in (int): Number of input features of the projection head
      projection_n_hidden (int): Number of hidden features of the projection head
      projection_n_out (int): Number of output features of the projection head
      projection_use_bn (bool): Whether to use batch norm in the projection head
    """

    def __init__(
        self,
        encoder: nn.Module = ResNet(BasicBlock, [3, 3, 3]),
        projection_n_in: int = 64, # TODO CHECK ON THIS
        projection_n_hidden: int = 64,  # TODO CHECK ON THIS
        projection_n_out: int = 128,
        projection_use_bn: bool = True,
    ):
        super().__init__()

        self.encoder = ResNet(BasicBlock, [3, 3, 3])
        self.projection = ProjectionHead(
            in_features=projection_n_in,
            hidden_features=projection_n_hidden,
            out_features=projection_n_out,
            head_type="nonlinear",
            use_bn=projection_use_bn,
        )

    def forward(self, x):
        feature = self.encoder(x)
        x = self.projection(feature)
        return F.normalize(feature, dim=-1), x


In [None]:
model = SimCLR().to(device)

### Pre-processing functions for augmenting images

In [None]:
s = 0.5
train_transform = transforms.Compose(
            [
                transforms.RandomResizedCrop(
                    (32, 32),
                    scale=(0.08, 1.0),
                    ratio=(0.75, 1.3333333333333333),
                ),
                transforms.RandomHorizontalFlip(),
                transforms.RandomApply(
                    [transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s)], p=0.8
                ),
                transforms.RandomGrayscale(p=0.2),
                # transforms.RandomApply([transforms.GaussianBlur(kernel_size = 3, sigma=(0.1, 2.0))], p=0.5),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]),
            ]
        )

test_transform = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]),
            ]
        )

### Setting up hyperparameters

In [None]:
model = SimCLR().to(device)

In [None]:
# Tunable hyperparameters
LRS = [0.5, 1, 1.5]
TEMPS = [0.1, 0.5, 1.0]
BATCHES = [256, 512, 1024, 2048, 4096]

# for now
# LR = 0.5
TEMP = 0.5
BATCH = 1024
LR = 0.01 * 0.3*BATCH/256 #0.075 * np.sqrt(BATCH)
EPOCHS = 100
DECAY = 1e-6

#############################################
# loss function
criterion = NTXentLoss(temperature=TEMP)

# Add optimizer
optimizer = LARS(optim.Adam(model.parameters(), lr=LR, weight_decay=DECAY))
# optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=DECAY)

# "decay the learning rate with the cosine decay schedule without restarts"
# SCHEDULER OR LINEAR EWARMUP
warmupscheduler = torch.optim.lr_scheduler.LambdaLR(
    optimizer, lambda epoch: (epoch + 1) / 10.0, verbose=True
)

# # SCHEDULER FOR COSINE DECAY
mainscheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optimizer, 500, eta_min=0.05, last_epoch=-1, verbose=True
)
#############################################

Adjusting learning rate of group 0 to 1.2000e-03.
Epoch 00000: adjusting learning rate of group 0 to 1.2000e-02.


### Setting up dataset and dataloader

In [None]:
# Alternative class for dataloader
class getC10Pair(CIFAR10):

    def __getitem__(self, idx):

        img, label = self.data[idx], self.targets[idx]
        img = Image.fromarray(img)

        if self.transform is not None:
            img1 = self.transform(img)
            img2 = self.transform(img)

        if self.target_transform is not None:
            label = self.target_transform(label)

        return img1, img2, label

train_data = getC10Pair(root='data', train=True, transform=train_transform, download=True)
train_loader = DataLoader(train_data, batch_size=BATCH, shuffle=True, num_workers=4,
                            drop_last=True)

memory_data = getC10Pair(root='data', train=True, transform=test_transform, download=True)
memory_loader = DataLoader(memory_data, batch_size=BATCH, shuffle=False, num_workers=4)

test_data = getC10Pair(root='data', train=False, transform=test_transform, download=True)
test_loader = DataLoader(test_data, batch_size=BATCH, shuffle=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


### Training

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~KEEP THIS~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~KEEP THIS~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
current_epoch = 0

# DECAY_EPOCHS = 10

results = {'train_loss': [], 'val_loss': [], 'test_acc@1': [], 'test_acc@5': []}
save_name_pre = 'B{}_E{}'.format(BATCH, EPOCHS)

if not os.path.exists('./results'):
    os.mkdir('results')

best_acc = 0.0
for epoch in range(EPOCHS):
    # if epoch % DECAY_EPOCHS == 0 and epoch != 0:
    #     LR = LR * DECAY
    #     for param_group in optimizer.param_groups:
    #         param_group['lr'] = LR
    #     print("Current learning rate has decayed to %f" %param_group['lr'])

    print("")
    model.train()
    total_loss, total_num, train_bar = 0.0, 0, tqdm(train_loader)

    for x_1, x_2, target in train_bar:
        x_1, x_2 = x_1.cuda(non_blocking=True), x_2.cuda(non_blocking=True)

        feature_1, out_1 = model(x_1)
        feature_2, out_2 = model(x_2)

        loss = criterion(out_1, out_2)
        
        # # [2*B, D]
        # out = torch.cat([out_1, out_2], dim=0)
        # # [2*B, 2*B]
        # sim_matrix = torch.exp(torch.mm(out, out.t().contiguous()) / temperature)
        # mask = (torch.ones_like(sim_matrix) - torch.eye(2 * batch_size, device=sim_matrix.device)).bool()
        # # [2*B, 2*B-1]
        # sim_matrix = sim_matrix.masked_select(mask).view(2 * batch_size, -1)

        # # compute loss
        # pos_sim = torch.exp(torch.sum(out_1 * out_2, dim=-1) / temperature)
        # # [2*B]
        # pos_sim = torch.cat([pos_sim, pos_sim], dim=0)
        # loss = (- torch.log(pos_sim / sim_matrix.sum(dim=-1))).mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_num += BATCH
        total_loss += loss.item() * BATCH
        train_bar.set_description('Train Epoch: [{}/{}] Loss: {:.4f}'.format(epoch+1, EPOCHS, total_loss / total_num))
        
        train_loss = total_loss/total_num

    results['train_loss'].append(train_loss)

    if epoch < 20:
        warmupscheduler.step()
    if epoch >= 20:
        mainscheduler.step()

    lr = optimizer.param_groups[0]["lr"]

    optimizer.step()

    model.eval()
    total_loss_val, total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0.0, 0, []

    with torch.no_grad():
        # generate feature bank
        for data, _, target in tqdm(memory_loader, desc='Feature extracting'):
            feature, out = model(data.cuda(non_blocking=True))
            feature_bank.append(feature)
        # [D, N]
        feature_bank = torch.cat(feature_bank, dim=0).t().contiguous()
        # [N]
        feature_labels = torch.tensor(memory_loader.dataset.targets, device=feature_bank.device)

        # loop test data to predict the label by weighted knn search
        test_bar = tqdm(test_loader)
        for data, data2, target in test_bar:
            data, data2, target = data.cuda(non_blocking=True), data2.cuda(non_blocking=True), target.cuda(non_blocking=True)
            feature, out = model(data)
            feature2, out2 = model(data2)

            total_num += data.size(0)

            loss_val = criterion(out, out2)
            total_loss_val += loss_val.item() * BATCH

            val_loss = total_loss_val/total_num


            # compute cos similarity between each feature vector and feature bank ---> [B, N]
            sim_matrix = torch.mm(feature, feature_bank)
            # [B, K]
            sim_weight, sim_indices = sim_matrix.topk(k=200, dim=-1)
            # [B, K]
            sim_labels = torch.gather(feature_labels.expand(data.size(0), -1), dim=-1, index=sim_indices)
            sim_weight = (sim_weight / TEMP).exp()

            # counts for each class
            one_hot_label = torch.zeros(data.size(0) * 200, 10, device=sim_labels.device)
            # [B*K, C]
            one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0)
            # weighted score ---> [B, C]
            pred_scores = torch.sum(one_hot_label.view(data.size(0), -1, 10) * sim_weight.unsqueeze(dim=-1), dim=1)

            pred_labels = pred_scores.argsort(dim=-1, descending=True)

            total_top1 += torch.sum((pred_labels[:, :1] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            total_top5 += torch.sum((pred_labels[:, :5] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            test_bar.set_description('Test Epoch: [{}/{}] Acc@1:{:.2f}% Acc@5:{:.2f}%'
                                     .format(epoch+1, EPOCHS, total_top1 / total_num * 100, total_top5 / total_num * 100))
            
    results['val_loss'].append(val_loss)

    test_acc_1 = total_top1 / total_num * 100
    test_acc_5 = total_top5 / total_num * 100

    results['test_acc@1'].append(test_acc_1)
    results['test_acc@5'].append(test_acc_5)

    # save statistics
    # data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
    # data_frame.to_csv('results/{}_statistics.csv'.format(save_name_pre), index_label='epoch')

    if test_acc_1 > best_acc:
        best_acc = test_acc_1
        # torch.save(model.state_dict(), '/workspaces/RepLearning/saved_models/{}_best_model.pth'.format(save_name_pre))
        torch.save(model.state_dict(), './results/{}_lars_adam_with_schedulers_best_model.pth'.format(save_name_pre))
     
    # save_model(model, optimizer, mainscheduler, current_epoch, "SimCLR_CIFAR10_RN20_bestacc.pt")
    # save every epoch as backup
    # torch.save(model.state_dict(), f'./results/backup/{save_name_pre}_e{epoch}.pth')

    # current_epoch+=1




Train Epoch: [1/100] Loss: 6.9873: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.4000e-03.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.35it/s]
Test Epoch: [1/100] Acc@1:37.41% Acc@5:86.92%: 100%|██████████| 10/10 [00:01<00:00,  6.48it/s]





Train Epoch: [2/100] Loss: 6.7828: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 3.6000e-03.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.48it/s]
Test Epoch: [2/100] Acc@1:38.61% Acc@5:88.11%: 100%|██████████| 10/10 [00:01<00:00,  6.53it/s]





Train Epoch: [3/100] Loss: 6.7270: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 4.8000e-03.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.64it/s]
Test Epoch: [3/100] Acc@1:41.01% Acc@5:89.25%: 100%|██████████| 10/10 [00:01<00:00,  6.38it/s]





Train Epoch: [4/100] Loss: 6.6671: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 6.0000e-03.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.39it/s]
Test Epoch: [4/100] Acc@1:42.26% Acc@5:89.80%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [5/100] Loss: 6.6382: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 7.2000e-03.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.32it/s]
Test Epoch: [5/100] Acc@1:43.78% Acc@5:90.71%: 100%|██████████| 10/10 [00:01<00:00,  6.30it/s]





Train Epoch: [6/100] Loss: 6.6099: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 8.4000e-03.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.53it/s]
Test Epoch: [6/100] Acc@1:44.29% Acc@5:90.35%: 100%|██████████| 10/10 [00:01<00:00,  6.43it/s]





Train Epoch: [7/100] Loss: 6.5703: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 9.6000e-03.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.47it/s]
Test Epoch: [7/100] Acc@1:46.18% Acc@5:92.01%: 100%|██████████| 10/10 [00:01<00:00,  6.42it/s]





Train Epoch: [8/100] Loss: 6.5488: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.0800e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.50it/s]
Test Epoch: [8/100] Acc@1:47.05% Acc@5:92.23%: 100%|██████████| 10/10 [00:01<00:00,  6.32it/s]





Train Epoch: [9/100] Loss: 6.5076: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.2000e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.64it/s]
Test Epoch: [9/100] Acc@1:49.65% Acc@5:93.04%: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]





Train Epoch: [10/100] Loss: 6.4864: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.3200e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.53it/s]
Test Epoch: [10/100] Acc@1:50.26% Acc@5:93.81%: 100%|██████████| 10/10 [00:01<00:00,  6.28it/s]





Train Epoch: [11/100] Loss: 6.4609: 100%|██████████| 48/48 [00:23<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 1.4400e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.51it/s]
Test Epoch: [11/100] Acc@1:50.58% Acc@5:93.67%: 100%|██████████| 10/10 [00:01<00:00,  6.38it/s]





Train Epoch: [12/100] Loss: 6.4492: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.5600e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.49it/s]
Test Epoch: [12/100] Acc@1:52.54% Acc@5:94.29%: 100%|██████████| 10/10 [00:01<00:00,  6.36it/s]





Train Epoch: [13/100] Loss: 6.4270: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.6800e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.20it/s]
Test Epoch: [13/100] Acc@1:53.31% Acc@5:94.81%: 100%|██████████| 10/10 [00:01<00:00,  6.35it/s]





Train Epoch: [14/100] Loss: 6.4100: 100%|██████████| 48/48 [00:23<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 1.8000e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.39it/s]
Test Epoch: [14/100] Acc@1:53.98% Acc@5:94.90%: 100%|██████████| 10/10 [00:01<00:00,  6.35it/s]





Train Epoch: [15/100] Loss: 6.4001: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.9200e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.57it/s]
Test Epoch: [15/100] Acc@1:55.91% Acc@5:95.00%: 100%|██████████| 10/10 [00:01<00:00,  6.53it/s]





Train Epoch: [16/100] Loss: 6.3886: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0400e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.51it/s]
Test Epoch: [16/100] Acc@1:56.06% Acc@5:95.51%: 100%|██████████| 10/10 [00:01<00:00,  6.41it/s]





Train Epoch: [17/100] Loss: 6.3794: 100%|██████████| 48/48 [00:23<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 2.1600e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.58it/s]
Test Epoch: [17/100] Acc@1:57.99% Acc@5:95.84%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [18/100] Loss: 6.3658: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.2800e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.45it/s]
Test Epoch: [18/100] Acc@1:58.27% Acc@5:96.05%: 100%|██████████| 10/10 [00:01<00:00,  6.37it/s]





Train Epoch: [19/100] Loss: 6.3594: 100%|██████████| 48/48 [00:23<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 2.4000e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.58it/s]
Test Epoch: [19/100] Acc@1:58.70% Acc@5:95.90%: 100%|██████████| 10/10 [00:01<00:00,  6.42it/s]





Train Epoch: [20/100] Loss: 6.3516: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 2.5200e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.26it/s]
Test Epoch: [20/100] Acc@1:59.74% Acc@5:96.15%: 100%|██████████| 10/10 [00:01<00:00,  6.54it/s]





Train Epoch: [21/100] Loss: 6.3425: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00001: adjusting learning rate of group 0 to 1.2000e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.58it/s]
Test Epoch: [21/100] Acc@1:60.81% Acc@5:96.54%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [22/100] Loss: 6.3039: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00002: adjusting learning rate of group 0 to 1.2002e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.55it/s]
Test Epoch: [22/100] Acc@1:61.45% Acc@5:96.67%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [23/100] Loss: 6.2953: 100%|██████████| 48/48 [00:23<00:00,  2.08it/s]


Epoch 00003: adjusting learning rate of group 0 to 1.2003e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.64it/s]
Test Epoch: [23/100] Acc@1:62.64% Acc@5:96.78%: 100%|██████████| 10/10 [00:01<00:00,  6.38it/s]





Train Epoch: [24/100] Loss: 6.2864: 100%|██████████| 48/48 [00:23<00:00,  2.08it/s]


Epoch 00004: adjusting learning rate of group 0 to 1.2006e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.35it/s]
Test Epoch: [24/100] Acc@1:62.32% Acc@5:96.81%: 100%|██████████| 10/10 [00:01<00:00,  6.51it/s]





Train Epoch: [25/100] Loss: 6.2832: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00005: adjusting learning rate of group 0 to 1.2009e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.60it/s]
Test Epoch: [25/100] Acc@1:62.75% Acc@5:96.63%: 100%|██████████| 10/10 [00:01<00:00,  6.42it/s]





Train Epoch: [26/100] Loss: 6.2759: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00006: adjusting learning rate of group 0 to 1.2014e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.50it/s]
Test Epoch: [26/100] Acc@1:63.57% Acc@5:97.18%: 100%|██████████| 10/10 [00:01<00:00,  6.46it/s]





Train Epoch: [27/100] Loss: 6.2727: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s]


Epoch 00007: adjusting learning rate of group 0 to 1.2018e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.49it/s]
Test Epoch: [27/100] Acc@1:63.66% Acc@5:96.97%: 100%|██████████| 10/10 [00:01<00:00,  6.51it/s]





Train Epoch: [28/100] Loss: 6.2732: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00008: adjusting learning rate of group 0 to 1.2024e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.54it/s]
Test Epoch: [28/100] Acc@1:63.80% Acc@5:97.04%: 100%|██████████| 10/10 [00:01<00:00,  6.48it/s]





Train Epoch: [29/100] Loss: 6.2592: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00009: adjusting learning rate of group 0 to 1.2030e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.50it/s]
Test Epoch: [29/100] Acc@1:63.44% Acc@5:96.88%: 100%|██████████| 10/10 [00:01<00:00,  6.41it/s]





Train Epoch: [30/100] Loss: 6.2629: 100%|██████████| 48/48 [00:23<00:00,  2.08it/s]


Epoch 00010: adjusting learning rate of group 0 to 1.2037e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.51it/s]
Test Epoch: [30/100] Acc@1:64.48% Acc@5:97.18%: 100%|██████████| 10/10 [00:01<00:00,  6.53it/s]





Train Epoch: [31/100] Loss: 6.2589: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00011: adjusting learning rate of group 0 to 1.2045e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.55it/s]
Test Epoch: [31/100] Acc@1:63.91% Acc@5:97.22%: 100%|██████████| 10/10 [00:01<00:00,  6.30it/s]





Train Epoch: [32/100] Loss: 6.2529: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00012: adjusting learning rate of group 0 to 1.2054e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.47it/s]
Test Epoch: [32/100] Acc@1:64.32% Acc@5:97.33%: 100%|██████████| 10/10 [00:01<00:00,  6.34it/s]





Train Epoch: [33/100] Loss: 6.2550: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00013: adjusting learning rate of group 0 to 1.2063e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.39it/s]
Test Epoch: [33/100] Acc@1:64.86% Acc@5:97.39%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [34/100] Loss: 6.2474: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00014: adjusting learning rate of group 0 to 1.2073e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.61it/s]
Test Epoch: [34/100] Acc@1:64.88% Acc@5:97.36%: 100%|██████████| 10/10 [00:01<00:00,  6.37it/s]





Train Epoch: [35/100] Loss: 6.2488: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00015: adjusting learning rate of group 0 to 1.2084e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.55it/s]
Test Epoch: [35/100] Acc@1:65.52% Acc@5:97.38%: 100%|██████████| 10/10 [00:01<00:00,  6.37it/s]





Train Epoch: [36/100] Loss: 6.2462: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00016: adjusting learning rate of group 0 to 1.2096e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.33it/s]
Test Epoch: [36/100] Acc@1:65.65% Acc@5:97.60%: 100%|██████████| 10/10 [00:01<00:00,  6.51it/s]





Train Epoch: [37/100] Loss: 6.2441: 100%|██████████| 48/48 [00:23<00:00,  2.02it/s]


Epoch 00017: adjusting learning rate of group 0 to 1.2108e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.45it/s]
Test Epoch: [37/100] Acc@1:65.76% Acc@5:97.53%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [38/100] Loss: 6.2443: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00018: adjusting learning rate of group 0 to 1.2121e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.41it/s]
Test Epoch: [38/100] Acc@1:65.71% Acc@5:97.38%: 100%|██████████| 10/10 [00:01<00:00,  6.42it/s]





Train Epoch: [39/100] Loss: 6.2444: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s]


Epoch 00019: adjusting learning rate of group 0 to 1.2135e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.50it/s]
Test Epoch: [39/100] Acc@1:65.46% Acc@5:97.37%: 100%|██████████| 10/10 [00:01<00:00,  6.47it/s]





Train Epoch: [40/100] Loss: 6.2384: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00020: adjusting learning rate of group 0 to 1.2150e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.29it/s]
Test Epoch: [40/100] Acc@1:66.36% Acc@5:97.73%: 100%|██████████| 10/10 [00:01<00:00,  6.35it/s]





Train Epoch: [41/100] Loss: 6.2375: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00021: adjusting learning rate of group 0 to 1.2165e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.46it/s]
Test Epoch: [41/100] Acc@1:66.10% Acc@5:97.54%: 100%|██████████| 10/10 [00:01<00:00,  6.42it/s]





Train Epoch: [42/100] Loss: 6.2389: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00022: adjusting learning rate of group 0 to 1.2181e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.55it/s]
Test Epoch: [42/100] Acc@1:65.87% Acc@5:97.36%: 100%|██████████| 10/10 [00:01<00:00,  6.58it/s]





Train Epoch: [43/100] Loss: 6.2359: 100%|██████████| 48/48 [00:23<00:00,  2.08it/s]


Epoch 00023: adjusting learning rate of group 0 to 1.2198e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.49it/s]
Test Epoch: [43/100] Acc@1:66.32% Acc@5:97.58%: 100%|██████████| 10/10 [00:01<00:00,  6.31it/s]





Train Epoch: [44/100] Loss: 6.2314: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00024: adjusting learning rate of group 0 to 1.2216e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.52it/s]
Test Epoch: [44/100] Acc@1:66.45% Acc@5:97.55%: 100%|██████████| 10/10 [00:01<00:00,  6.54it/s]





Train Epoch: [45/100] Loss: 6.2314: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00025: adjusting learning rate of group 0 to 1.2234e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.47it/s]
Test Epoch: [45/100] Acc@1:66.87% Acc@5:97.82%: 100%|██████████| 10/10 [00:01<00:00,  6.50it/s]





Train Epoch: [46/100] Loss: 6.2264: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00026: adjusting learning rate of group 0 to 1.2253e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.51it/s]
Test Epoch: [46/100] Acc@1:66.93% Acc@5:97.79%: 100%|██████████| 10/10 [00:01<00:00,  6.44it/s]





Train Epoch: [47/100] Loss: 6.2301: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00027: adjusting learning rate of group 0 to 1.2273e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.48it/s]
Test Epoch: [47/100] Acc@1:66.63% Acc@5:97.58%: 100%|██████████| 10/10 [00:01<00:00,  6.30it/s]





Train Epoch: [48/100] Loss: 6.2259: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00028: adjusting learning rate of group 0 to 1.2293e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.03it/s]
Test Epoch: [48/100] Acc@1:67.01% Acc@5:97.68%: 100%|██████████| 10/10 [00:01<00:00,  6.43it/s]





Train Epoch: [49/100] Loss: 6.2229: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00029: adjusting learning rate of group 0 to 1.2315e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.52it/s]
Test Epoch: [49/100] Acc@1:67.10% Acc@5:97.79%: 100%|██████████| 10/10 [00:01<00:00,  6.31it/s]





Train Epoch: [50/100] Loss: 6.2240: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00030: adjusting learning rate of group 0 to 1.2337e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.47it/s]
Test Epoch: [50/100] Acc@1:67.64% Acc@5:97.83%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [51/100] Loss: 6.2216: 100%|██████████| 48/48 [00:23<00:00,  2.03it/s]


Epoch 00031: adjusting learning rate of group 0 to 1.2359e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.30it/s]
Test Epoch: [51/100] Acc@1:67.15% Acc@5:97.57%: 100%|██████████| 10/10 [00:01<00:00,  6.28it/s]





Train Epoch: [52/100] Loss: 6.2225: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00032: adjusting learning rate of group 0 to 1.2383e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.41it/s]
Test Epoch: [52/100] Acc@1:67.55% Acc@5:97.96%: 100%|██████████| 10/10 [00:01<00:00,  6.42it/s]





Train Epoch: [53/100] Loss: 6.2192: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s]


Epoch 00033: adjusting learning rate of group 0 to 1.2407e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.51it/s]
Test Epoch: [53/100] Acc@1:67.57% Acc@5:97.74%: 100%|██████████| 10/10 [00:01<00:00,  6.48it/s]





Train Epoch: [54/100] Loss: 6.2218: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00034: adjusting learning rate of group 0 to 1.2432e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.46it/s]
Test Epoch: [54/100] Acc@1:67.41% Acc@5:97.83%: 100%|██████████| 10/10 [00:01<00:00,  6.39it/s]





Train Epoch: [55/100] Loss: 6.2157: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00035: adjusting learning rate of group 0 to 1.2458e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.34it/s]
Test Epoch: [55/100] Acc@1:66.84% Acc@5:97.60%: 100%|██████████| 10/10 [00:01<00:00,  6.46it/s]





Train Epoch: [56/100] Loss: 6.2201: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00036: adjusting learning rate of group 0 to 1.2484e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.58it/s]
Test Epoch: [56/100] Acc@1:68.03% Acc@5:97.85%: 100%|██████████| 10/10 [00:01<00:00,  6.61it/s]





Train Epoch: [57/100] Loss: 6.2181: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00037: adjusting learning rate of group 0 to 1.2511e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.48it/s]
Test Epoch: [57/100] Acc@1:67.75% Acc@5:97.75%: 100%|██████████| 10/10 [00:01<00:00,  6.28it/s]





Train Epoch: [58/100] Loss: 6.2124: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00038: adjusting learning rate of group 0 to 1.2539e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.28it/s]
Test Epoch: [58/100] Acc@1:67.71% Acc@5:97.92%: 100%|██████████| 10/10 [00:01<00:00,  6.40it/s]





Train Epoch: [59/100] Loss: 6.2121: 100%|██████████| 48/48 [00:23<00:00,  2.08it/s]


Epoch 00039: adjusting learning rate of group 0 to 1.2568e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.28it/s]
Test Epoch: [59/100] Acc@1:67.35% Acc@5:97.90%: 100%|██████████| 10/10 [00:01<00:00,  6.40it/s]





Train Epoch: [60/100] Loss: 6.2136: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00040: adjusting learning rate of group 0 to 1.2597e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.46it/s]
Test Epoch: [60/100] Acc@1:67.52% Acc@5:97.65%: 100%|██████████| 10/10 [00:01<00:00,  6.25it/s]





Train Epoch: [61/100] Loss: 6.2143: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00041: adjusting learning rate of group 0 to 1.2627e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.49it/s]
Test Epoch: [61/100] Acc@1:68.15% Acc@5:97.99%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [62/100] Loss: 6.2111: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00042: adjusting learning rate of group 0 to 1.2658e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.58it/s]
Test Epoch: [62/100] Acc@1:67.84% Acc@5:97.70%: 100%|██████████| 10/10 [00:01<00:00,  6.50it/s]





Train Epoch: [63/100] Loss: 6.2085: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00043: adjusting learning rate of group 0 to 1.2689e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.53it/s]
Test Epoch: [63/100] Acc@1:67.82% Acc@5:97.86%: 100%|██████████| 10/10 [00:01<00:00,  6.43it/s]





Train Epoch: [64/100] Loss: 6.2100: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00044: adjusting learning rate of group 0 to 1.2721e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.51it/s]
Test Epoch: [64/100] Acc@1:68.18% Acc@5:97.81%: 100%|██████████| 10/10 [00:01<00:00,  6.37it/s]





Train Epoch: [65/100] Loss: 6.2121: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00045: adjusting learning rate of group 0 to 1.2754e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.27it/s]
Test Epoch: [65/100] Acc@1:68.16% Acc@5:97.85%: 100%|██████████| 10/10 [00:01<00:00,  6.51it/s]





Train Epoch: [66/100] Loss: 6.2069: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s]


Epoch 00046: adjusting learning rate of group 0 to 1.2788e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.41it/s]
Test Epoch: [66/100] Acc@1:68.22% Acc@5:97.99%: 100%|██████████| 10/10 [00:01<00:00,  6.31it/s]





Train Epoch: [67/100] Loss: 6.2103: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00047: adjusting learning rate of group 0 to 1.2822e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.48it/s]
Test Epoch: [67/100] Acc@1:68.43% Acc@5:97.97%: 100%|██████████| 10/10 [00:01<00:00,  6.39it/s]





Train Epoch: [68/100] Loss: 6.2057: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00048: adjusting learning rate of group 0 to 1.2858e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.59it/s]
Test Epoch: [68/100] Acc@1:67.80% Acc@5:97.91%: 100%|██████████| 10/10 [00:01<00:00,  6.41it/s]





Train Epoch: [69/100] Loss: 6.2114: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00049: adjusting learning rate of group 0 to 1.2893e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.42it/s]
Test Epoch: [69/100] Acc@1:68.11% Acc@5:97.88%: 100%|██████████| 10/10 [00:01<00:00,  6.37it/s]





Train Epoch: [70/100] Loss: 6.2065: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00050: adjusting learning rate of group 0 to 1.2930e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.47it/s]
Test Epoch: [70/100] Acc@1:68.28% Acc@5:97.96%: 100%|██████████| 10/10 [00:01<00:00,  6.36it/s]





Train Epoch: [71/100] Loss: 6.2089: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00051: adjusting learning rate of group 0 to 1.2967e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.49it/s]
Test Epoch: [71/100] Acc@1:68.26% Acc@5:97.88%: 100%|██████████| 10/10 [00:01<00:00,  6.46it/s]





Train Epoch: [72/100] Loss: 6.2087: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00052: adjusting learning rate of group 0 to 1.3005e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.51it/s]
Test Epoch: [72/100] Acc@1:68.21% Acc@5:97.93%: 100%|██████████| 10/10 [00:01<00:00,  6.40it/s]





Train Epoch: [73/100] Loss: 6.2078: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00053: adjusting learning rate of group 0 to 1.3044e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.35it/s]
Test Epoch: [73/100] Acc@1:68.23% Acc@5:97.91%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [74/100] Loss: 6.2056: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00054: adjusting learning rate of group 0 to 1.3083e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.49it/s]
Test Epoch: [74/100] Acc@1:68.06% Acc@5:97.68%: 100%|██████████| 10/10 [00:01<00:00,  6.43it/s]





Train Epoch: [75/100] Loss: 6.2029: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00055: adjusting learning rate of group 0 to 1.3123e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.39it/s]
Test Epoch: [75/100] Acc@1:69.10% Acc@5:97.90%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [76/100] Loss: 6.2015: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00056: adjusting learning rate of group 0 to 1.3164e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.51it/s]
Test Epoch: [76/100] Acc@1:68.50% Acc@5:97.94%: 100%|██████████| 10/10 [00:01<00:00,  6.42it/s]





Train Epoch: [77/100] Loss: 6.2030: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00057: adjusting learning rate of group 0 to 1.3206e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.44it/s]
Test Epoch: [77/100] Acc@1:68.45% Acc@5:97.87%: 100%|██████████| 10/10 [00:01<00:00,  6.15it/s]





Train Epoch: [78/100] Loss: 6.2038: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00058: adjusting learning rate of group 0 to 1.3248e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.48it/s]
Test Epoch: [78/100] Acc@1:69.19% Acc@5:98.08%: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]





Train Epoch: [79/100] Loss: 6.1990: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00059: adjusting learning rate of group 0 to 1.3291e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.21it/s]
Test Epoch: [79/100] Acc@1:68.25% Acc@5:97.89%: 100%|██████████| 10/10 [00:01<00:00,  6.51it/s]





Train Epoch: [80/100] Loss: 6.2014: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00060: adjusting learning rate of group 0 to 1.3334e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.52it/s]
Test Epoch: [80/100] Acc@1:68.52% Acc@5:97.83%: 100%|██████████| 10/10 [00:01<00:00,  6.46it/s]





Train Epoch: [81/100] Loss: 6.1974: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00061: adjusting learning rate of group 0 to 1.3379e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.56it/s]
Test Epoch: [81/100] Acc@1:69.01% Acc@5:97.97%: 100%|██████████| 10/10 [00:01<00:00,  6.42it/s]





Train Epoch: [82/100] Loss: 6.2025: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00062: adjusting learning rate of group 0 to 1.3424e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.60it/s]
Test Epoch: [82/100] Acc@1:68.10% Acc@5:97.80%: 100%|██████████| 10/10 [00:01<00:00,  6.38it/s]





Train Epoch: [83/100] Loss: 6.2024: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s]


Epoch 00063: adjusting learning rate of group 0 to 1.3469e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.51it/s]
Test Epoch: [83/100] Acc@1:68.70% Acc@5:97.84%: 100%|██████████| 10/10 [00:01<00:00,  6.37it/s]





Train Epoch: [84/100] Loss: 6.1986: 100%|██████████| 48/48 [00:23<00:00,  2.03it/s]


Epoch 00064: adjusting learning rate of group 0 to 1.3516e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.35it/s]
Test Epoch: [84/100] Acc@1:69.24% Acc@5:98.12%: 100%|██████████| 10/10 [00:01<00:00,  6.38it/s]





Train Epoch: [85/100] Loss: 6.2004: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00065: adjusting learning rate of group 0 to 1.3563e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.33it/s]
Test Epoch: [85/100] Acc@1:69.04% Acc@5:97.88%: 100%|██████████| 10/10 [00:01<00:00,  6.37it/s]





Train Epoch: [86/100] Loss: 6.1957: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00066: adjusting learning rate of group 0 to 1.3610e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.52it/s]
Test Epoch: [86/100] Acc@1:69.42% Acc@5:97.86%: 100%|██████████| 10/10 [00:01<00:00,  6.34it/s]





Train Epoch: [87/100] Loss: 6.2025: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00067: adjusting learning rate of group 0 to 1.3659e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.56it/s]
Test Epoch: [87/100] Acc@1:68.50% Acc@5:97.83%: 100%|██████████| 10/10 [00:01<00:00,  6.56it/s]





Train Epoch: [88/100] Loss: 6.1971: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00068: adjusting learning rate of group 0 to 1.3708e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.53it/s]
Test Epoch: [88/100] Acc@1:69.46% Acc@5:98.00%: 100%|██████████| 10/10 [00:01<00:00,  6.50it/s]





Train Epoch: [89/100] Loss: 6.1972: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00069: adjusting learning rate of group 0 to 1.3758e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.43it/s]
Test Epoch: [89/100] Acc@1:69.87% Acc@5:98.04%: 100%|██████████| 10/10 [00:01<00:00,  6.48it/s]





Train Epoch: [90/100] Loss: 6.1925: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00070: adjusting learning rate of group 0 to 1.3808e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.41it/s]
Test Epoch: [90/100] Acc@1:68.71% Acc@5:98.06%: 100%|██████████| 10/10 [00:01<00:00,  6.54it/s]





Train Epoch: [91/100] Loss: 6.2013: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00071: adjusting learning rate of group 0 to 1.3859e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.41it/s]
Test Epoch: [91/100] Acc@1:69.19% Acc@5:97.85%: 100%|██████████| 10/10 [00:01<00:00,  6.50it/s]





Train Epoch: [92/100] Loss: 6.1968: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00072: adjusting learning rate of group 0 to 1.3911e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.22it/s]
Test Epoch: [92/100] Acc@1:68.58% Acc@5:97.84%: 100%|██████████| 10/10 [00:01<00:00,  6.51it/s]





Train Epoch: [93/100] Loss: 6.1973: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00073: adjusting learning rate of group 0 to 1.3964e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  8.96it/s]
Test Epoch: [93/100] Acc@1:68.89% Acc@5:97.90%: 100%|██████████| 10/10 [00:01<00:00,  6.44it/s]





Train Epoch: [94/100] Loss: 6.1958: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00074: adjusting learning rate of group 0 to 1.4017e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.50it/s]
Test Epoch: [94/100] Acc@1:68.54% Acc@5:97.87%: 100%|██████████| 10/10 [00:01<00:00,  6.49it/s]





Train Epoch: [95/100] Loss: 6.1955: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00075: adjusting learning rate of group 0 to 1.4071e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.52it/s]
Test Epoch: [95/100] Acc@1:68.91% Acc@5:97.84%: 100%|██████████| 10/10 [00:01<00:00,  6.45it/s]





Train Epoch: [96/100] Loss: 6.1980: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s]


Epoch 00076: adjusting learning rate of group 0 to 1.4125e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.52it/s]
Test Epoch: [96/100] Acc@1:69.31% Acc@5:97.96%: 100%|██████████| 10/10 [00:01<00:00,  6.39it/s]





Train Epoch: [97/100] Loss: 6.1961: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s]


Epoch 00077: adjusting learning rate of group 0 to 1.4181e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.35it/s]
Test Epoch: [97/100] Acc@1:69.39% Acc@5:97.95%: 100%|██████████| 10/10 [00:01<00:00,  6.30it/s]





Train Epoch: [98/100] Loss: 6.1985: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00078: adjusting learning rate of group 0 to 1.4236e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.56it/s]
Test Epoch: [98/100] Acc@1:69.06% Acc@5:97.98%: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]





Train Epoch: [99/100] Loss: 6.1976: 100%|██████████| 48/48 [00:23<00:00,  2.03it/s]


Epoch 00079: adjusting learning rate of group 0 to 1.4293e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.38it/s]
Test Epoch: [99/100] Acc@1:69.49% Acc@5:97.89%: 100%|██████████| 10/10 [00:01<00:00,  6.30it/s]





Train Epoch: [100/100] Loss: 6.1970: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s]


Epoch 00080: adjusting learning rate of group 0 to 1.4350e-02.


Feature extracting: 100%|██████████| 49/49 [00:05<00:00,  9.37it/s]
Test Epoch: [100/100] Acc@1:69.76% Acc@5:98.13%: 100%|██████████| 10/10 [00:01<00:00,  6.53it/s]


In [None]:
# save statistics
data_frame = pd.DataFrame(data=results, index=range(1, epoch+2))
data_frame.to_csv('results/{}_lars_adam_with_schedulers_statistics.csv'.format(save_name_pre), index_label='epoch')