In [1]:
import os, sys, time, glob, random, argparse
import numpy as np
from copy import deepcopy
import torch
import torch.nn as nn

# XAutoDL 
from xautodl.config_utils import load_config, dict2config, configure2str
from xautodl.datasets import get_datasets, get_nas_search_loaders
from xautodl.procedures import (
    prepare_seed,
    prepare_logger,
    save_checkpoint,
    copy_checkpoint,
    get_optim_scheduler,
)
from xautodl.utils import get_model_infos, obtain_accuracy
from xautodl.log_utils import AverageMeter, time_string, convert_secs2time
from xautodl.models import get_search_spaces

from custom_models import get_cell_based_tiny_net
from custom_search_cells import NAS201SearchCell as SearchCell
from xautodl.models.cell_searchs.genotypes import Structure

# NB201
from nas_201_api import NASBench201API as API

import scipy.stats as stats

2022-11-03 06:42:08.571947: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
parser = argparse.ArgumentParser("Random search for NAS.")
parser.add_argument("--data_path", type=str, default='../cifar.python', help="The path to dataset")
parser.add_argument("--dataset", type=str, default='cifar10',choices=["cifar10", "cifar100", "ImageNet16-120"], help="Choose between Cifar10/100 and ImageNet-16.")

# channels and number-of-cells
parser.add_argument("--search_space_name", type=str, default='nas-bench-201', help="The search space name.")
parser.add_argument("--config_path", type=str, default='./MY.config', help="The path to the configuration.")
parser.add_argument("--max_nodes", type=int, default=4, help="The maximum number of nodes.")
parser.add_argument("--channel", type=int, default=16, help="The number of channels.")
parser.add_argument("--num_cells", type=int, default=5, help="The number of cells in one stage.")
parser.add_argument("--select_num", type=int, default=100, help="The number of selected architectures to evaluate.")
parser.add_argument("--track_running_stats", type=int, default=0, choices=[0, 1], help="Whether use track_running_stats or not in the BN layer.")
# log
parser.add_argument("--workers", type=int, default=4, help="number of data loading workers")
parser.add_argument("--save_dir", type=str, default='./cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric', help="Folder to save checkpoints and log.")
# parser.add_argument("--arch_nas_dataset", type=str, default='../NAS-Bench-201-v1_1-096897.pth', help="The path to load the architecture dataset (tiny-nas-benchmark).")
parser.add_argument("--arch_nas_dataset", type=str, default=None, help="The path to load the architecture dataset (tiny-nas-benchmark).")
parser.add_argument("--print_freq", type=int, default=200, help="print frequency (default: 200)")
parser.add_argument("--rand_seed", type=int, default=None, help="manual seed")
args = parser.parse_args(args=[])
if args.rand_seed is None or args.rand_seed < 0:
    args.rand_seed = random.randint(1, 100000)

    
print(args.rand_seed)
print(args)
xargs=args

92767
Namespace(arch_nas_dataset=None, channel=16, config_path='./MY.config', data_path='../cifar.python', dataset='cifar10', max_nodes=4, num_cells=5, print_freq=200, rand_seed=92767, save_dir='./cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric', search_space_name='nas-bench-201', select_num=100, track_running_stats=0, workers=4)


In [3]:
assert torch.cuda.is_available(), "CUDA is not available."
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.set_num_threads(xargs.workers)
prepare_seed(xargs.rand_seed)
logger = prepare_logger(args)

Main Function with logger : Logger(dir=cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric, use-tf=False, writer=None)
Arguments : -------------------------------
arch_nas_dataset : None
channel          : 16
config_path      : ./MY.config
data_path        : ../cifar.python
dataset          : cifar10
max_nodes        : 4
num_cells        : 5
print_freq       : 200
rand_seed        : 92767
save_dir         : ./cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric
search_space_name : nas-bench-201
select_num       : 100
track_running_stats : 0
workers          : 4
Python  Version  : 3.7.13 (default, Mar 29 2022, 02:18:16)  [GCC 7.5.0]
Pillow  Version  : 9.0.1
PyTorch Version  : 1.12.0
cuDNN   Version  : 8302
CUDA available   : True
CUDA GPU numbers : 2
CUDA_VISIBLE_DEVICES : None


In [4]:
train_data, valid_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1)
config = load_config(xargs.config_path, {"class_num": class_num, "xshape": xshape}, logger)
search_loader, _, valid_loader = get_nas_search_loaders(train_data,
                                                        valid_data,
                                                        xargs.dataset,
                                                        "../configs/nas-benchmark/",
                                                        (config.batch_size, config.test_batch_size),
                                                        xargs.workers)
logger.log("||||||| {:10s} ||||||| Search-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}".format(
            xargs.dataset, len(search_loader), len(valid_loader), config.batch_size))
logger.log("||||||| {:10s} ||||||| Config={:}".format(xargs.dataset, config))

search_space = get_search_spaces("cell", xargs.search_space_name)
model_config = dict2config(
    {
        "name": "RANDOM",
        "C": xargs.channel,
        "N": xargs.num_cells,
        "max_nodes": xargs.max_nodes,
        "num_classes": class_num,
        "space": search_space,
        "affine": False,
        "track_running_stats": bool(xargs.track_running_stats),
    },
    None,
)
search_model = get_cell_based_tiny_net(model_config)

w_optimizer, w_scheduler, criterion = get_optim_scheduler(search_model.parameters(), config)

logger.log("w-optimizer : {:}".format(w_optimizer))
logger.log("w-scheduler : {:}".format(w_scheduler))
logger.log("criterion   : {:}".format(criterion))
# if xargs.arch_nas_dataset is None:
api = None
# else:
#     api = API(xargs.arch_nas_dataset)
logger.log("{:} create API = {:} done".format(time_string(), api))

last_info, model_base_path, model_best_path = (
    logger.path("info"),
    logger.path("model"),
    logger.path("best"),
)
network, criterion = torch.nn.DataParallel(search_model).cuda(), criterion.cuda()

if last_info.exists():  # automatically resume from previous checkpoint
    logger.log(
        "=> loading checkpoint of the last-info '{:}' start".format(last_info)
    )
    last_info = torch.load(last_info)
    start_epoch = last_info["epoch"]
    checkpoint = torch.load(last_info["last_checkpoint"])
    genotypes = checkpoint["genotypes"]
    valid_accuracies = checkpoint["valid_accuracies"]
    search_model.load_state_dict(checkpoint["search_model"])
    w_scheduler.load_state_dict(checkpoint["w_scheduler"])
    w_optimizer.load_state_dict(checkpoint["w_optimizer"])
    logger.log(
        "=> loading checkpoint of the last-info '{:}' start with {:}-th epoch.".format(
            last_info, start_epoch
        )
    )
else:
    logger.log("=> do not find the last-info file : {:}".format(last_info))
    start_epoch, valid_accuracies, genotypes = 0, {"best": -1}, {}

Files already downloaded and verified
Files already downloaded and verified
./MY.config
Configure(scheduler='cos', LR=0.025, eta_min=0.001, epochs=50, warmup=0, optim='SGD', decay=0.0005, momentum=0.9, nesterov=True, criterion='Softmax', batch_size=64, test_batch_size=512, class_num=10, xshape=(1, 3, 32, 32))
||||||| cifar10    ||||||| Search-Loader-Num=391, Valid-Loader-Num=49, batch size=64
||||||| cifar10    ||||||| Config=Configure(scheduler='cos', LR=0.025, eta_min=0.001, epochs=50, warmup=0, optim='SGD', decay=0.0005, momentum=0.9, nesterov=True, criterion='Softmax', batch_size=64, test_batch_size=512, class_num=10, xshape=(1, 3, 32, 32))
w-optimizer : SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    initial_lr: 0.025
    lr: 0.025
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0005
)
w-scheduler : CosineAnnealingLR(warmup=0, max-epoch=50, current::epoch=0, iter=0.00, type=cosine, T-max=50, eta-min=0.001)
criterion   : CrossEntropyLoss(

In [5]:
def acc_confidence_robustness_metrics(network, inputs, targets):
    with torch.no_grad():
        # accuracy
        network.train()
        _, logits = network(inputs)
        val_top1, val_top5 = obtain_accuracy(logits.data, targets.data, topk=(1, 5))
        acc = val_top1
        
        return acc.item()
        
#         # confidence
#         prob = torch.nn.functional.softmax(logits, dim=1)
#         one_hot_idx = torch.nn.functional.one_hot(targets)
#         confidence = (prob[one_hot_idx==1].sum()) / inputs.size(0) * 100 # in percent
        
#         # sensitivity
#         _, noisy_logits = network(inputs + torch.randn_like(inputs)*0.1)
#         kl_loss = torch.nn.KLDivLoss(reduction="batchmean")
#         sensitivity = kl_loss(torch.nn.functional.log_softmax(noisy_logits, dim=1), torch.nn.functional.softmax(logits, dim=1))
        
#         # robustness
#         original_weights = deepcopy(network.state_dict())
#         for m in network.modules():
#             if isinstance(m, SearchCell):
#                 for p in m.parameters():
#                     p.add_(torch.randn_like(p) * p.std()*0.3)
            
#         _, noisy_logits = network(inputs)
#         kl_loss = torch.nn.KLDivLoss(reduction="batchmean")
#         robustness = -kl_loss(torch.nn.functional.log_softmax(noisy_logits, dim=1), torch.nn.functional.softmax(logits, dim=1))
#         network.load_state_dict(original_weights)
                
#         return acc.item(), confidence.item(), sensitivity.item(), robustness.item()
    
def step_sim_metric(network, criterion, inputs, targets):
    inputs, targets = inputs[:64], targets[:64] # smaller batches
    original_dict = deepcopy(network.state_dict())
    optim_large_step = torch.optim.SGD(network.parameters(), lr=0.025)
    
    # single large step
    network.train()
    optim_large_step.zero_grad()
    _, logits = network(inputs)
    base_loss = criterion(logits, targets)
    base_loss.backward()
    optim_large_step.step()
    large_step_dict = deepcopy(network.state_dict())
    
    # multiple small steps
    network.load_state_dict(original_dict)
    optim_small_step = torch.optim.SGD(network.parameters(), lr=0.025/3)
    for i in range(3):
        optim_small_step.zero_grad()
        _, logits = network(inputs)
        base_loss = criterion(logits, targets)
        base_loss.backward()
        optim_small_step.step()
    small_step_dict = deepcopy(network.state_dict())
    scores = []
    for key in large_step_dict.keys():
        if ('weight' in key) and (original_dict[key].dim()==4):
            if (original_dict[key] != large_step_dict[key]).sum():
                large_step = large_step_dict[key] - original_dict[key]
                small_step = small_step_dict[key] - original_dict[key]
                co, ci, kh, kw = large_step.size()
                large_step = large_step.view(co, -1)
                small_step = small_step.view(co, -1)
                score = torch.nn.functional.cosine_similarity(large_step, small_step, dim=1)
                score = score.mean().item() * 100 # in percent
                scores.append(score)
    if len(scores)==0:
        step_sim = 100
        raise RuntimeError
    else:
        step_sim = np.mean(scores)
    
    # resume
    network.load_state_dict(original_dict)
            
    return step_sim

In [6]:
# start training
start_time, search_time, epoch_time, total_epoch = (
    time.time(),
    AverageMeter(),
    AverageMeter(),
    config.epochs + config.warmup,
)

################# initialize
cells = []
for m in network.modules():
    if isinstance(m, SearchCell):
        cells.append(m)
num_cells = len(cells)
print("total number of nodes:{}".format(num_cells*xargs.max_nodes))
        
op_names = deepcopy(cells[0].op_names)
op_names_wo_none = deepcopy(op_names)
if "none" in op_names_wo_none:
    op_names_wo_none.remove("none")

genotypes = []
for i in range(1, xargs.max_nodes):
    xlist = []
    for j in range(i):
        node_str = "{:}<-{:}".format(i, j)
        if i-j==1:
            op_name = "skip_connect"
        else:
            op_name = "none"
        xlist.append((op_name, j))
    genotypes.append(tuple(xlist))
init_arch = Structure(genotypes)

for c in cells:
    c.arch_cache = init_arch

### gen possible connections of a target node
possible_connections = {}
for target_node_idx in range(1,xargs.max_nodes):
    possible_connections[target_node_idx] = list()
    xlists = []
    for src_node in range(target_node_idx):
        node_str = "{:}<-{:}".format(target_node_idx, src_node)
        # select possible ops
#         if target_node_idx - src_node == 1:
#             op_names_tmp = op_names_wo_none
#         else:
#             op_names_tmp = op_names
        op_names_tmp = op_names
            
        if len(xlists) == 0: # initial iteration
            for op_name in op_names_tmp:
                xlists.append([(op_name, src_node)])
        else:
            new_xlists = []
            for op_name in op_names_tmp:
                for xlist in xlists:
                    new_xlist = deepcopy(xlist)
                    new_xlist.append((op_name, src_node))
                    new_xlists.append(new_xlist)
            xlists = new_xlists
    for xlist in xlists:
        selected_ops = []
        for l in xlist:
            selected_ops.append(l[0])
        if sum(np.array(selected_ops) == "none") == len(selected_ops):
            continue
        possible_connections[target_node_idx].append(tuple(xlist))
    print("target_node:{}".format(target_node_idx), len(possible_connections[target_node_idx]))
        
### train while generating random architectures by mutating connections of a target node

for arch_loop in range(1):
    for target_cell_idx in range(num_cells):
        target_cell = cells[target_cell_idx]
        print("\n\n Searching with a cell #{}".format(target_cell_idx))
        ####
        for m in target_cell.modules():
            if hasattr(m, 'reset_parameters'):
                m.reset_parameters()
        ####
        ## training
        for ep in range(10):
            ###
            genotypes = []
            for n in range(1, xargs.max_nodes):
                genotypes.append(random.choice(possible_connections[n]))
            arch = Structure(genotypes)
            target_cell.arch_cache = arch
#             arch = target_cell.random_genotype(True)
            ###
            data_time, batch_time = AverageMeter(), AverageMeter()
            base_losses, base_top1, base_top5 = AverageMeter(), AverageMeter(), AverageMeter()
            network.train()
            end = time.time()
            print_freq = 200
            for step, (base_inputs, base_targets, arch_inputs, arch_targets) in enumerate(search_loader):
                ######### forward/backward/optim
                base_targets = base_targets.cuda(non_blocking=True)
                arch_targets = arch_targets.cuda(non_blocking=True)
                # measure data loading time
                data_time.update(time.time() - end)
                w_optimizer.zero_grad()
                _, logits = network(base_inputs)
                base_loss = criterion(logits, base_targets)
                base_loss.backward()
                nn.utils.clip_grad_norm_(network.parameters(), 5)
                w_optimizer.step()

                ######### logging
                base_prec1, base_prec5 = obtain_accuracy(logits.data, base_targets.data, topk=(1, 5))
                base_losses.update(base_loss.item(), base_inputs.size(0))
                base_top1.update(base_prec1.item(), base_inputs.size(0))
                base_top5.update(base_prec5.item(), base_inputs.size(0))
                batch_time.update(time.time() - end)
                end = time.time()
                if step % print_freq == 0 or step + 1 == len(search_loader):
                    Sstr = ("*Train* "+ time_string()+" Ep:{:} [{:03d}/{:03d}]".format(ep, step, len(search_loader)))
                    Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(batch_time=batch_time, data_time=data_time)
                    Wstr = "Base [Loss {loss.val:.3f} ({loss.avg:.3f})  Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})]".format(loss=base_losses, top1=base_top1, top5=base_top5)
                    logger.log(Sstr + " " + Tstr + " " + Wstr)

            logger.log("Ep:{:} ends : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%".format(ep, base_losses.avg, base_top1.avg, base_top5.avg))
        ## evaluation
        network.train()
        archs, metric_accs = [], []
        loader_iter = iter(valid_loader)
        for search_iter in range(200):
            ###### random gen
            genotypes = []
            for n in range(1, xargs.max_nodes):
                genotypes.append(random.choice(possible_connections[n]))
            arch = Structure(genotypes)
            target_cell.arch_cache = arch
#             arch = target_cell.random_genotype(True)
            ###### measure metrics
            try:
                inputs, targets = next(loader_iter)
            except:
                loader_iter = iter(valid_loader)
                inputs, targets = next(loader_iter)
            inputs, targets = inputs.cuda(non_blocking=True), targets.cuda(non_blocking=True)
            valid_acc = acc_confidence_robustness_metrics(network, inputs, targets)
            archs.append(arch)
            metric_accs.append(valid_acc)
        rank_accs = stats.rankdata(metric_accs)
        rank_agg = rank_accs
#         l = len(rank_accs)
#         rank_agg = np.log(rank_accs/l)+np.log(rank_confidences/l)+np.log(rank_sensitivities/l)+np.log(rank_robustnesses/l)+np.log(rank_step_sims/l)
#             rank_agg = np.log(rank_accs/l)+np.log(rank_confidences/l)+np.log(rank_sensitivities/l)+np.log(rank_step_sims/l)
        best_idx = np.argmax(rank_agg)
        best_arch, best_acc = archs[best_idx], metric_accs[best_idx]
        logger.log("Found best op for target cell:{}".format(target_cell_idx))
        logger.log(": {:} with accuracy={:.2f}%".format(best_arch, best_acc))
        target_cell.arch_cache = best_arch
            
best_archs = []
for c in cells:
    best_archs.append(c.arch_cache)
    
torch.save({"model":search_model.state_dict(), "best_archs":best_archs}, os.path.join(xargs.save_dir, "output.pth"))

for m in search_model.modules():
    if isinstance(m, SearchCell):
        logger.log(m.arch_cache)

logger.close()

total number of nodes:60
target_node:1 4
target_node:2 24
target_node:3 124


 Searching with a cell #0
*Train* [2022-11-03 06:42:16] Ep:0 [000/391] Time 2.82 (2.82) Data 0.13 (0.13) Base [Loss 2.266 (2.266)  Prec@1 15.62 (15.62) Prec@5 59.38 (59.38)]
*Train* [2022-11-03 06:42:37] Ep:0 [200/391] Time 0.20 (0.12) Data 0.00 (0.00) Base [Loss 1.641 (1.835)  Prec@1 28.12 (30.21) Prec@5 89.06 (83.95)]
*Train* [2022-11-03 06:43:01] Ep:0 [390/391] Time 0.13 (0.12) Data 0.00 (0.00) Base [Loss 1.447 (1.686)  Prec@1 52.50 (36.32) Prec@5 97.50 (87.51)]
Ep:0 ends : loss=1.69, accuracy@1=36.32%, accuracy@5=87.51%
*Train* [2022-11-03 06:43:01] Ep:1 [000/391] Time 0.28 (0.28) Data 0.20 (0.20) Base [Loss 2.862 (2.862)  Prec@1 9.38 (9.38) Prec@5 57.81 (57.81)]
*Train* [2022-11-03 06:43:18] Ep:1 [200/391] Time 0.07 (0.09) Data 0.00 (0.00) Base [Loss 1.334 (1.567)  Prec@1 54.69 (42.44) Prec@5 98.44 (89.89)]
*Train* [2022-11-03 06:43:40] Ep:1 [390/391] Time 0.20 (0.10) Data 0.00 (0.00) Base [Loss 1.488 (1

*Train* [2022-11-03 06:54:10] Ep:6 [000/391] Time 0.30 (0.30) Data 0.21 (0.21) Base [Loss 2.491 (2.491)  Prec@1 23.44 (23.44) Prec@5 73.44 (73.44)]
*Train* [2022-11-03 06:54:32] Ep:6 [200/391] Time 0.07 (0.11) Data 0.00 (0.00) Base [Loss 0.706 (0.839)  Prec@1 73.44 (70.47) Prec@5 98.44 (97.61)]
*Train* [2022-11-03 06:54:53] Ep:6 [390/391] Time 0.08 (0.11) Data 0.00 (0.00) Base [Loss 0.726 (0.809)  Prec@1 75.00 (71.86) Prec@5 97.50 (97.84)]
Ep:6 ends : loss=0.81, accuracy@1=71.86%, accuracy@5=97.84%
*Train* [2022-11-03 06:54:54] Ep:7 [000/391] Time 0.34 (0.34) Data 0.19 (0.19) Base [Loss 1.240 (1.240)  Prec@1 50.00 (50.00) Prec@5 96.88 (96.88)]
*Train* [2022-11-03 06:55:19] Ep:7 [200/391] Time 0.14 (0.13) Data 0.00 (0.00) Base [Loss 1.189 (0.808)  Prec@1 51.56 (71.65) Prec@5 95.31 (97.92)]
*Train* [2022-11-03 06:55:39] Ep:7 [390/391] Time 0.08 (0.12) Data 0.00 (0.00) Base [Loss 0.740 (0.787)  Prec@1 72.50 (72.37) Prec@5 97.50 (98.13)]
Ep:7 ends : loss=0.79, accuracy@1=72.37%, accuracy@5

*Train* [2022-11-03 07:06:23] Ep:1 [390/391] Time 0.13 (0.11) Data 0.00 (0.00) Base [Loss 0.752 (0.784)  Prec@1 72.50 (73.23) Prec@5 100.00 (97.80)]
Ep:1 ends : loss=0.78, accuracy@1=73.23%, accuracy@5=97.80%
*Train* [2022-11-03 07:06:23] Ep:2 [000/391] Time 0.28 (0.28) Data 0.18 (0.18) Base [Loss 0.900 (0.900)  Prec@1 65.62 (65.62) Prec@5 96.88 (96.88)]
*Train* [2022-11-03 07:06:46] Ep:2 [200/391] Time 0.09 (0.11) Data 0.00 (0.00) Base [Loss 1.011 (0.705)  Prec@1 65.62 (75.48) Prec@5 93.75 (98.51)]
*Train* [2022-11-03 07:07:09] Ep:2 [390/391] Time 0.12 (0.12) Data 0.00 (0.00) Base [Loss 0.758 (0.689)  Prec@1 75.00 (76.06) Prec@5 100.00 (98.56)]
Ep:2 ends : loss=0.69, accuracy@1=76.06%, accuracy@5=98.56%
*Train* [2022-11-03 07:07:09] Ep:3 [000/391] Time 0.31 (0.31) Data 0.20 (0.20) Base [Loss 0.822 (0.822)  Prec@1 70.31 (70.31) Prec@5 100.00 (100.00)]
*Train* [2022-11-03 07:07:31] Ep:3 [200/391] Time 0.07 (0.11) Data 0.00 (0.00) Base [Loss 0.681 (0.735)  Prec@1 78.12 (74.35) Prec@5 100

*Train* [2022-11-03 07:18:30] Ep:7 [390/391] Time 0.08 (0.11) Data 0.00 (0.00) Base [Loss 0.482 (0.629)  Prec@1 87.50 (78.25) Prec@5 100.00 (98.79)]
Ep:7 ends : loss=0.63, accuracy@1=78.25%, accuracy@5=98.79%
*Train* [2022-11-03 07:18:30] Ep:8 [000/391] Time 0.28 (0.28) Data 0.19 (0.19) Base [Loss 3.027 (3.027)  Prec@1 10.94 (10.94) Prec@5 62.50 (62.50)]
*Train* [2022-11-03 07:18:54] Ep:8 [200/391] Time 0.09 (0.12) Data 0.00 (0.00) Base [Loss 0.558 (0.687)  Prec@1 87.50 (76.25) Prec@5 98.44 (98.34)]
*Train* [2022-11-03 07:19:15] Ep:8 [390/391] Time 0.12 (0.11) Data 0.00 (0.00) Base [Loss 0.746 (0.650)  Prec@1 77.50 (77.49) Prec@5 97.50 (98.60)]
Ep:8 ends : loss=0.65, accuracy@1=77.49%, accuracy@5=98.60%
*Train* [2022-11-03 07:19:15] Ep:9 [000/391] Time 0.29 (0.29) Data 0.20 (0.20) Base [Loss 1.469 (1.469)  Prec@1 46.88 (46.88) Prec@5 92.19 (92.19)]
*Train* [2022-11-03 07:19:38] Ep:9 [200/391] Time 0.07 (0.12) Data 0.00 (0.00) Base [Loss 0.640 (0.635)  Prec@1 78.12 (77.88) Prec@5 98.44 

*Train* [2022-11-03 07:30:37] Ep:3 [200/391] Time 0.07 (0.12) Data 0.00 (0.00) Base [Loss 0.713 (0.637)  Prec@1 73.44 (78.04) Prec@5 98.44 (98.73)]
*Train* [2022-11-03 07:31:00] Ep:3 [390/391] Time 0.08 (0.12) Data 0.00 (0.00) Base [Loss 0.666 (0.619)  Prec@1 80.00 (78.66) Prec@5 97.50 (98.82)]
Ep:3 ends : loss=0.62, accuracy@1=78.66%, accuracy@5=98.82%
*Train* [2022-11-03 07:31:00] Ep:4 [000/391] Time 0.34 (0.34) Data 0.15 (0.15) Base [Loss 0.675 (0.675)  Prec@1 76.56 (76.56) Prec@5 96.88 (96.88)]
*Train* [2022-11-03 07:31:21] Ep:4 [200/391] Time 0.09 (0.11) Data 0.00 (0.00) Base [Loss 0.651 (0.598)  Prec@1 76.56 (78.99) Prec@5 95.31 (99.01)]
*Train* [2022-11-03 07:31:44] Ep:4 [390/391] Time 0.11 (0.11) Data 0.00 (0.00) Base [Loss 0.953 (0.603)  Prec@1 75.00 (79.14) Prec@5 95.00 (98.93)]
Ep:4 ends : loss=0.60, accuracy@1=79.14%, accuracy@5=98.93%
*Train* [2022-11-03 07:31:44] Ep:5 [000/391] Time 0.30 (0.30) Data 0.17 (0.17) Base [Loss 0.759 (0.759)  Prec@1 71.88 (71.88) Prec@5 100.00 

*Train* [2022-11-03 07:42:53] Ep:9 [200/391] Time 0.12 (0.12) Data 0.00 (0.00) Base [Loss 0.572 (0.599)  Prec@1 73.44 (79.42) Prec@5 100.00 (98.96)]
*Train* [2022-11-03 07:43:16] Ep:9 [390/391] Time 0.13 (0.12) Data 0.00 (0.00) Base [Loss 0.489 (0.586)  Prec@1 80.00 (79.74) Prec@5 100.00 (98.99)]
Ep:9 ends : loss=0.59, accuracy@1=79.74%, accuracy@5=98.99%
Found best op for target cell:7
: Structure(4 nodes with |nor_conv_1x1~0|+|avg_pool_3x3~0|nor_conv_3x3~1|+|nor_conv_3x3~0|nor_conv_3x3~1|none~2|) with accuracy=76.76%


 Searching with a cell #8
*Train* [2022-11-03 07:43:36] Ep:0 [000/391] Time 0.37 (0.37) Data 0.18 (0.18) Base [Loss 0.773 (0.773)  Prec@1 75.00 (75.00) Prec@5 98.44 (98.44)]
*Train* [2022-11-03 07:43:57] Ep:0 [200/391] Time 0.11 (0.11) Data 0.00 (0.00) Base [Loss 0.474 (0.578)  Prec@1 84.38 (80.32) Prec@5 100.00 (98.95)]
*Train* [2022-11-03 07:44:19] Ep:0 [390/391] Time 0.08 (0.11) Data 0.00 (0.00) Base [Loss 0.752 (0.574)  Prec@1 72.50 (80.58) Prec@5 100.00 (98.91)]
E

*Train* [2022-11-03 07:55:24] Ep:5 [000/391] Time 0.28 (0.28) Data 0.15 (0.15) Base [Loss 0.638 (0.638)  Prec@1 73.44 (73.44) Prec@5 96.88 (96.88)]
*Train* [2022-11-03 07:55:50] Ep:5 [200/391] Time 0.14 (0.13) Data 0.00 (0.00) Base [Loss 0.652 (0.552)  Prec@1 78.12 (80.98) Prec@5 100.00 (98.97)]
*Train* [2022-11-03 07:56:14] Ep:5 [390/391] Time 0.13 (0.13) Data 0.00 (0.00) Base [Loss 0.584 (0.551)  Prec@1 75.00 (81.16) Prec@5 100.00 (99.05)]
Ep:5 ends : loss=0.55, accuracy@1=81.16%, accuracy@5=99.05%
*Train* [2022-11-03 07:56:14] Ep:6 [000/391] Time 0.26 (0.26) Data 0.17 (0.17) Base [Loss 3.666 (3.666)  Prec@1 15.62 (15.62) Prec@5 51.56 (51.56)]
*Train* [2022-11-03 07:56:36] Ep:6 [200/391] Time 0.13 (0.11) Data 0.00 (0.00) Base [Loss 0.537 (0.729)  Prec@1 84.38 (75.09) Prec@5 98.44 (98.02)]
*Train* [2022-11-03 07:56:58] Ep:6 [390/391] Time 0.07 (0.11) Data 0.00 (0.00) Base [Loss 0.613 (0.649)  Prec@1 77.50 (77.68) Prec@5 100.00 (98.52)]
Ep:6 ends : loss=0.65, accuracy@1=77.68%, accurac

*Train* [2022-11-03 08:08:28] Ep:0 [390/391] Time 0.15 (0.12) Data 0.00 (0.00) Base [Loss 0.583 (0.548)  Prec@1 80.00 (81.45) Prec@5 100.00 (98.94)]
Ep:0 ends : loss=0.55, accuracy@1=81.45%, accuracy@5=98.94%
*Train* [2022-11-03 08:08:28] Ep:1 [000/391] Time 0.33 (0.33) Data 0.17 (0.17) Base [Loss 0.461 (0.461)  Prec@1 84.38 (84.38) Prec@5 100.00 (100.00)]
*Train* [2022-11-03 08:08:53] Ep:1 [200/391] Time 0.13 (0.13) Data 0.00 (0.00) Base [Loss 0.642 (0.513)  Prec@1 76.56 (82.33) Prec@5 98.44 (99.28)]
*Train* [2022-11-03 08:09:17] Ep:1 [390/391] Time 0.12 (0.12) Data 0.00 (0.00) Base [Loss 0.711 (0.506)  Prec@1 77.50 (82.58) Prec@5 95.00 (99.26)]
Ep:1 ends : loss=0.51, accuracy@1=82.58%, accuracy@5=99.26%
*Train* [2022-11-03 08:09:17] Ep:2 [000/391] Time 0.38 (0.38) Data 0.20 (0.20) Base [Loss 0.580 (0.580)  Prec@1 79.69 (79.69) Prec@5 96.88 (96.88)]
*Train* [2022-11-03 08:09:40] Ep:2 [200/391] Time 0.08 (0.12) Data 0.00 (0.00) Base [Loss 0.493 (0.501)  Prec@1 84.38 (82.82) Prec@5 100.

*Train* [2022-11-03 08:21:22] Ep:6 [390/391] Time 0.15 (0.12) Data 0.00 (0.00) Base [Loss 0.411 (0.490)  Prec@1 85.00 (83.04) Prec@5 100.00 (99.25)]
Ep:6 ends : loss=0.49, accuracy@1=83.04%, accuracy@5=99.25%
*Train* [2022-11-03 08:21:22] Ep:7 [000/391] Time 0.34 (0.34) Data 0.18 (0.18) Base [Loss 0.442 (0.442)  Prec@1 82.81 (82.81) Prec@5 100.00 (100.00)]
*Train* [2022-11-03 08:21:46] Ep:7 [200/391] Time 0.09 (0.12) Data 0.00 (0.00) Base [Loss 0.422 (0.438)  Prec@1 85.94 (85.16) Prec@5 100.00 (99.57)]
*Train* [2022-11-03 08:22:10] Ep:7 [390/391] Time 0.14 (0.12) Data 0.00 (0.00) Base [Loss 0.404 (0.456)  Prec@1 85.00 (84.50) Prec@5 100.00 (99.49)]
Ep:7 ends : loss=0.46, accuracy@1=84.50%, accuracy@5=99.49%
*Train* [2022-11-03 08:22:10] Ep:8 [000/391] Time 0.30 (0.30) Data 0.19 (0.19) Base [Loss 0.515 (0.515)  Prec@1 85.94 (85.94) Prec@5 100.00 (100.00)]
*Train* [2022-11-03 08:22:36] Ep:8 [200/391] Time 0.10 (0.13) Data 0.00 (0.00) Base [Loss 0.577 (0.465)  Prec@1 81.25 (83.85) Prec@5 

*Train* [2022-11-03 08:34:42] Ep:2 [000/391] Time 0.28 (0.28) Data 0.18 (0.18) Base [Loss 0.507 (0.507)  Prec@1 82.81 (82.81) Prec@5 100.00 (100.00)]
*Train* [2022-11-03 08:35:08] Ep:2 [200/391] Time 0.10 (0.13) Data 0.00 (0.00) Base [Loss 0.527 (0.449)  Prec@1 79.69 (84.42) Prec@5 100.00 (99.37)]
*Train* [2022-11-03 08:35:33] Ep:2 [390/391] Time 0.13 (0.13) Data 0.00 (0.00) Base [Loss 0.550 (0.448)  Prec@1 87.50 (84.60) Prec@5 92.50 (99.38)]
Ep:2 ends : loss=0.45, accuracy@1=84.60%, accuracy@5=99.38%
*Train* [2022-11-03 08:35:34] Ep:3 [000/391] Time 0.39 (0.39) Data 0.21 (0.21) Base [Loss 2.788 (2.788)  Prec@1 15.62 (15.62) Prec@5 56.25 (56.25)]
*Train* [2022-11-03 08:36:00] Ep:3 [200/391] Time 0.09 (0.13) Data 0.00 (0.00) Base [Loss 0.367 (0.495)  Prec@1 84.38 (83.24) Prec@5 98.44 (99.07)]
*Train* [2022-11-03 08:36:25] Ep:3 [390/391] Time 0.09 (0.13) Data 0.00 (0.00) Base [Loss 0.527 (0.479)  Prec@1 85.00 (83.45) Prec@5 97.50 (99.14)]
Ep:3 ends : loss=0.48, accuracy@1=83.45%, accurac

In [7]:
# import matplotlib.pyplot as plt

# plt.scatter(rank_confidences,rank_accs)
# plt.show()

# plt.scatter(rank_sensitivities,rank_accs)
# plt.show()

# plt.scatter(rank_robustnesses,rank_accs)
# plt.show()

# plt.scatter(rank_step_sims,rank_accs)
# plt.show()

# Train a found model

In [8]:
trained_output = torch.load(os.path.join(xargs.save_dir, "output.pth"))
print(args)
args.save_dir = os.path.join(xargs.save_dir, "train")
print(args)

Namespace(arch_nas_dataset=None, channel=16, config_path='./MY.config', data_path='../cifar.python', dataset='cifar10', max_nodes=4, num_cells=5, print_freq=200, rand_seed=92767, save_dir='./cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric', search_space_name='nas-bench-201', select_num=100, track_running_stats=0, workers=4)
Namespace(arch_nas_dataset=None, channel=16, config_path='./MY.config', data_path='../cifar.python', dataset='cifar10', max_nodes=4, num_cells=5, print_freq=200, rand_seed=92767, save_dir='./cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train', search_space_name='nas-bench-201', select_num=100, track_running_stats=0, workers=4)


In [9]:
print(config)

Configure(scheduler='cos', LR=0.025, eta_min=0.001, epochs=50, warmup=0, optim='SGD', decay=0.0005, momentum=0.9, nesterov=True, criterion='Softmax', batch_size=64, test_batch_size=512, class_num=10, xshape=(1, 3, 32, 32))


In [10]:
logger = prepare_logger(args)

# cifar_train_config_path = "./MY.config"
cifar_train_config_path = "../configs/nas-benchmark/CIFAR.config"
###
train_data, test_data, xshape, class_num = get_datasets(xargs.dataset, xargs.data_path, -1)
config = load_config(cifar_train_config_path, {"class_num": class_num, "xshape": xshape}, logger)

train_loader = torch.utils.data.DataLoader(
            train_data,
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=xargs.workers,
            pin_memory=True,)

test_loader = torch.utils.data.DataLoader(
            test_data,
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=xargs.workers,
            pin_memory=True,)

# search_loader, _, valid_loader = get_nas_search_loaders(train_data,
#                                                         valid_data,
#                                                         xargs.dataset,
#                                                         "../configs/nas-benchmark/",
#                                                         (config.batch_size, config.batch_size),
#                                                         xargs.workers)
logger.log("||||||| {:10s} ||||||| Train-Loader-Num={:}, Test-Loader-Num={:}, batch size={:}".format(
            xargs.dataset, len(train_loader), len(test_loader), config.batch_size))
logger.log("||||||| {:10s} ||||||| Config={:}".format(xargs.dataset, config))

search_space = get_search_spaces("cell", xargs.search_space_name)
model_config = dict2config(
    {
        "name": "RANDOM",
        "C": xargs.channel,
        "N": xargs.num_cells,
        "max_nodes": xargs.max_nodes,
        "num_classes": class_num,
        "space": search_space,
        "affine": False,
        "track_running_stats": True, # true for eval
    },
    None,
)
search_model = get_cell_based_tiny_net(model_config)

### load
# trained_output = torch.load(os.path.join(xargs.save_dir, "output.pth"))
# search_model.load_state_dict(trained_output['model'], strict=False)
best_archs = trained_output['best_archs']
i=0
for m in search_model.modules():
    if isinstance(m, SearchCell):
        m.arch_cache = best_archs[i]
        i += 1
for m in network.modules():
    if isinstance(m, SearchCell):
        print(m.arch_cache)
###

w_optimizer, w_scheduler, criterion = get_optim_scheduler(search_model.parameters(), config)

logger.log("w-optimizer : {:}".format(w_optimizer))
logger.log("w-scheduler : {:}".format(w_scheduler))
logger.log("criterion   : {:}".format(criterion))

network, criterion = torch.nn.DataParallel(search_model).cuda(), criterion.cuda()

last_info, model_base_path, model_best_path = (
    logger.path("info"),
    logger.path("model"),
    logger.path("best"),
)

start_epoch, valid_accuracies, genotypes = 0, {"best": -1}, {}

Main Function with logger : Logger(dir=cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train, use-tf=False, writer=None)
Arguments : -------------------------------
arch_nas_dataset : None
channel          : 16
config_path      : ./MY.config
data_path        : ../cifar.python
dataset          : cifar10
max_nodes        : 4
num_cells        : 5
print_freq       : 200
rand_seed        : 92767
save_dir         : ./cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train
search_space_name : nas-bench-201
select_num       : 100
track_running_stats : 0
workers          : 4
Python  Version  : 3.7.13 (default, Mar 29 2022, 02:18:16)  [GCC 7.5.0]
Pillow  Version  : 9.0.1
PyTorch Version  : 1.12.0
cuDNN   Version  : 8302
CUDA available   : True
CUDA GPU numbers : 2
CUDA_VISIBLE_DEVICES : None
Files already downloaded and verified
Files already downloaded and verified
../configs/nas-benchmark/CIFAR.config
Configure(scheduler='cos', eta_min=0.0, epochs=20

In [11]:
# def search_func_one_arch(xloader, network, criterion, scheduler, w_optimizer, epoch_str, print_freq, logger):
#     data_time, batch_time = AverageMeter(), AverageMeter()
#     base_losses, base_top1, base_top5 = AverageMeter(), AverageMeter(), AverageMeter()
#     network.train()
#     end = time.time()
#     for step, (base_inputs, base_targets, arch_inputs, arch_targets) in enumerate(
#         xloader
#     ):
#         scheduler.update(None, 1.0 * step / len(xloader))
#         base_targets = base_targets.cuda(non_blocking=True)
#         arch_targets = arch_targets.cuda(non_blocking=True)
#         # measure data loading time
#         data_time.update(time.time() - end)

#         w_optimizer.zero_grad()
#         _, logits = network(base_inputs)
#         base_loss = criterion(logits, base_targets)
#         base_loss.backward()
#         nn.utils.clip_grad_norm_(network.parameters(), 5)
#         w_optimizer.step()
#         # record
#         base_prec1, base_prec5 = obtain_accuracy(
#             logits.data, base_targets.data, topk=(1, 5)
#         )
#         base_losses.update(base_loss.item(), base_inputs.size(0))
#         base_top1.update(base_prec1.item(), base_inputs.size(0))
#         base_top5.update(base_prec5.item(), base_inputs.size(0))

#         # measure elapsed time
#         batch_time.update(time.time() - end)
#         end = time.time()

#         if step % print_freq == 0 or step + 1 == len(xloader):
#             Sstr = (
#                 "*SEARCH* "
#                 + time_string()
#                 + " [{:}][{:03d}/{:03d}]".format(epoch_str, step, len(xloader))
#             )
#             Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(
#                 batch_time=batch_time, data_time=data_time
#             )
#             Wstr = "Base [Loss {loss.val:.3f} ({loss.avg:.3f})  Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})]".format(
#                 loss=base_losses, top1=base_top1, top5=base_top5
#             )
#             logger.log(Sstr + " " + Tstr + " " + Wstr)
#     return base_losses.avg, base_top1.avg, base_top5.avg

def train_func_one_arch(xloader, network, criterion, scheduler, w_optimizer, epoch_str, print_freq, logger):
    data_time, batch_time = AverageMeter(), AverageMeter()
    base_losses, base_top1, base_top5 = AverageMeter(), AverageMeter(), AverageMeter()
    network.train()
    end = time.time()
    for step, (base_inputs, base_targets) in enumerate(
        xloader
    ):
        scheduler.update(None, 1.0 * step / len(xloader))
        base_targets = base_targets.cuda(non_blocking=True)
        # measure data loading time
        data_time.update(time.time() - end)

        w_optimizer.zero_grad()
        _, logits = network(base_inputs)
        base_loss = criterion(logits, base_targets)
        base_loss.backward()
        nn.utils.clip_grad_norm_(network.parameters(), 5)
        w_optimizer.step()
        # record
        base_prec1, base_prec5 = obtain_accuracy(
            logits.data, base_targets.data, topk=(1, 5)
        )
        base_losses.update(base_loss.item(), base_inputs.size(0))
        base_top1.update(base_prec1.item(), base_inputs.size(0))
        base_top5.update(base_prec5.item(), base_inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if step % print_freq == 0 or step + 1 == len(xloader):
            Sstr = (
                "*SEARCH* "
                + time_string()
                + " [{:}][{:03d}/{:03d}]".format(epoch_str, step, len(xloader))
            )
            Tstr = "Time {batch_time.val:.2f} ({batch_time.avg:.2f}) Data {data_time.val:.2f} ({data_time.avg:.2f})".format(
                batch_time=batch_time, data_time=data_time
            )
            Wstr = "Base [Loss {loss.val:.3f} ({loss.avg:.3f})  Prec@1 {top1.val:.2f} ({top1.avg:.2f}) Prec@5 {top5.val:.2f} ({top5.avg:.2f})]".format(
                loss=base_losses, top1=base_top1, top5=base_top5
            )
            logger.log(Sstr + " " + Tstr + " " + Wstr)
    return base_losses.avg, base_top1.avg, base_top5.avg

def valid_func_one_arch(xloader, network, criterion):
    data_time, batch_time = AverageMeter(), AverageMeter()
    arch_losses, arch_top1, arch_top5 = AverageMeter(), AverageMeter(), AverageMeter()
    network.eval()
    end = time.time()
    with torch.no_grad():
        for step, (arch_inputs, arch_targets) in enumerate(xloader):
            arch_targets = arch_targets.cuda(non_blocking=True)
            # measure data loading time
            data_time.update(time.time() - end)
            # prediction

#             network.module.random_genotype_per_cell(True)
            _, logits = network(arch_inputs)
            arch_loss = criterion(logits, arch_targets)
            # record
            arch_prec1, arch_prec5 = obtain_accuracy(
                logits.data, arch_targets.data, topk=(1, 5)
            )
            arch_losses.update(arch_loss.item(), arch_inputs.size(0))
            arch_top1.update(arch_prec1.item(), arch_inputs.size(0))
            arch_top5.update(arch_prec5.item(), arch_inputs.size(0))
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
    return arch_losses.avg, arch_top1.avg, arch_top5.avg

In [None]:
start_time, search_time, epoch_time, total_epoch = (
    time.time(),
    AverageMeter(),
    AverageMeter(),
    config.epochs + config.warmup,
)
for epoch in range(0, total_epoch):
    w_scheduler.update(epoch, 0.0)
    need_time = "Time Left: {:}".format(
        convert_secs2time(epoch_time.val * (total_epoch - epoch), True)
    )
    epoch_str = "{:03d}-{:03d}".format(epoch, total_epoch)
    logger.log(
        "\n[Search the {:}-th epoch] {:}, LR={:}".format(
            epoch_str, need_time, min(w_scheduler.get_lr())
        )
    )

    # selected_arch = search_find_best(valid_loader, network, criterion, xargs.select_num)
    search_w_loss, search_w_top1, search_w_top5 = train_func_one_arch(
        train_loader,
        network,
        criterion,
        w_scheduler,
        w_optimizer,
        epoch_str,
        xargs.print_freq,
        logger,
    )
    search_time.update(time.time() - start_time)
    logger.log(
        "[{:}] searching : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%, time-cost={:.1f} s".format(
            epoch_str, search_w_loss, search_w_top1, search_w_top5, search_time.sum
        )
    )
    valid_a_loss, valid_a_top1, valid_a_top5 = valid_func_one_arch(
        test_loader, network, criterion
    )
    logger.log(
        "[{:}] evaluate  : loss={:.2f}, accuracy@1={:.2f}%, accuracy@5={:.2f}%".format(
            epoch_str, valid_a_loss, valid_a_top1, valid_a_top5
        )
    )
    
    # check the best accuracy
    valid_accuracies[epoch] = valid_a_top1
    if valid_a_top1 > valid_accuracies["best"]:
        valid_accuracies["best"] = valid_a_top1
        find_best = True
    else:
        find_best = False

    # save checkpoint
    save_path = save_checkpoint(
        {
            "epoch": epoch + 1,
            "args": deepcopy(xargs),
            "search_model": search_model.state_dict(),
            "w_optimizer": w_optimizer.state_dict(),
            "w_scheduler": w_scheduler.state_dict(),
            "genotypes": genotypes,
            "valid_accuracies": valid_accuracies,
        },
        model_base_path,
        logger,
    )
    last_info = save_checkpoint(
        {
            "epoch": epoch + 1,
            "args": deepcopy(args),
            "last_checkpoint": save_path,
        },
        logger.path("info"),
        logger,
    )
    if find_best:
        logger.log(
            "<<<--->>> The {:}-th epoch : find the highest validation accuracy : {:.2f}%.".format(
                epoch_str, valid_a_top1
            )
        )
        copy_checkpoint(model_base_path, model_best_path, logger)
    if api is not None:
        logger.log("{:}".format(api.query_by_arch(genotypes[epoch], "200")))
    # measure elapsed time
    epoch_time.update(time.time() - start_time)
    start_time = time.time()

logger.close()


[Search the 000-200-th epoch] Time Left: [00:00:00], LR=0.1
*SEARCH* [2022-11-03 08:42:05] [000-200][000/196] Time 0.37 (0.37) Data 0.19 (0.19) Base [Loss 2.340 (2.340)  Prec@1 8.20 (8.20) Prec@5 44.53 (44.53)]
*SEARCH* [2022-11-03 08:42:35] [000-200][195/196] Time 0.23 (0.15) Data 0.00 (0.00) Base [Loss 1.205 (1.615)  Prec@1 61.25 (40.08) Prec@5 91.25 (88.60)]
[000-200] searching : loss=1.61, accuracy@1=40.08%, accuracy@5=88.60%, time-cost=30.0 s
[000-200] evaluate  : loss=1.63, accuracy@1=44.31%, accuracy@5=91.27%
save checkpoint into cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/checkpoint/seed-92767-basic.pth
save checkpoint into cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/seed-92767-last-info.pth
<<<--->>> The 000-200-th epoch : find the highest validation accuracy : 44.31%.
copy the file from cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/checkpoint/seed-92767-basic.pth into cell_level-

*SEARCH* [2022-11-03 08:45:58] [006-200][195/196] Time 0.14 (0.16) Data 0.00 (0.00) Base [Loss 0.668 (0.647)  Prec@1 77.50 (77.71) Prec@5 100.00 (98.72)]
[006-200] searching : loss=0.65, accuracy@1=77.71%, accuracy@5=98.72%, time-cost=209.1 s
[006-200] evaluate  : loss=1.03, accuracy@1=69.18%, accuracy@5=97.16%
Find cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/checkpoint/seed-92767-basic.pth exist, delete is at first before saving
save checkpoint into cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/checkpoint/seed-92767-basic.pth
Find cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/seed-92767-last-info.pth exist, delete is at first before saving
save checkpoint into cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/seed-92767-last-info.pth

[Search the 007-200-th epoch] Time Left: [01:53:54], LR=0.099698047772759
*SEARCH* [2022-11-03 08:46:03] [007-200][000/196] Time 0.3

*SEARCH* [2022-11-03 08:49:24] [013-200][000/196] Time 0.42 (0.42) Data 0.26 (0.26) Base [Loss 0.690 (0.690)  Prec@1 76.56 (76.56) Prec@5 97.66 (97.66)]
*SEARCH* [2022-11-03 08:49:56] [013-200][195/196] Time 0.14 (0.16) Data 0.00 (0.00) Base [Loss 0.799 (0.507)  Prec@1 73.75 (82.42) Prec@5 95.00 (99.16)]
[013-200] searching : loss=0.51, accuracy@1=82.42%, accuracy@5=99.16%, time-cost=417.6 s
[013-200] evaluate  : loss=0.84, accuracy@1=72.78%, accuracy@5=97.99%
Find cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/checkpoint/seed-92767-basic.pth exist, delete is at first before saving
save checkpoint into cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/checkpoint/seed-92767-basic.pth
Find cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/seed-92767-last-info.pth exist, delete is at first before saving
save checkpoint into cell_level-arch_loop-reset_cell_params-loop1_ep10_sample200-acc_metric/train/seed-9

In [None]:
best_archs