In [1]:
import os
import sys
import time
import glob
import numpy as np
import torch
import utils
import logging
import argparse
import torch.nn as nn
import torch.utils
import torch.nn.functional as F
import torchvision.datasets as dset
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms

from torch.autograd import Variable
from model_search_imagenet import Network
from architect import Architect
from copy import deepcopy

In [2]:
parser = argparse.ArgumentParser("imagenet")
parser.add_argument('--workers', type=int, default=4, help='number of workers to load dataset')
parser.add_argument('--data', type=str, default='/tmp/cache/', help='location of the data corpus')
parser.add_argument('--batch_size', type=int, default=1, help='batch size')
parser.add_argument('--learning_rate', type=float, default=0.5, help='init learning rate')
parser.add_argument('--learning_rate_min', type=float, default=0.0, help='min learning rate')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay')
parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
parser.add_argument('--init_channels', type=int, default=48, help='num of init channels')
parser.add_argument('--layers', type=int, default=14, help='total number of layers')
parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model')
parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
parser.add_argument('--drop_path_prob', type=float, default=0.4, help='drop path probability')
parser.add_argument('--save', type=str, default='/tmp/checkpoints/', help='experiment name')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping')
parser.add_argument('--unrolled', action='store_true', default=False, help='use one-step unrolled validation loss')
parser.add_argument('--arch_learning_rate', type=float, default=6e-3, help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
parser.add_argument('--begin', type=int, default=35, help='batch size')

parser.add_argument('--tmp_data_dir', type=str, default='/home/mzhang3/Data/', help='temp data dir')
parser.add_argument('--note', type=str, default='try', help='note for this run')

args = parser.parse_args([])

args.save = 'FreeDARTS_SynFlow_ImageNet_oneshot-exp-seed-{}-{}'.format(args.seed, time.strftime("%Y%m%d-%H%M%S"))
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
log_format = '%(asctime)s %(message)s'

logging.basicConfig(stream=sys.stdout, level=logging.INFO,
    format=log_format, datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)


data_dir = os.path.join(args.tmp_data_dir, 'imagenet')   
CLASSES = 1000

Experiment dir : FreeDARTS_SynFlow_ImageNet_oneshot-exp-seed-0-20211118-062122


In [3]:
def pruning_func(model, a_optimizer, criterion, optimizer, lr):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    
    ####-------------set \theta abs
    for name, param in model.state_dict().items():
        param.abs_()
    ####-------------set \theta abs    
    #model._arch_parameters[0].data=abs(model._arch_parameters[0].data)
   # model._arch_parameters[1].data=abs(model._arch_parameters[1].data)
    
    model.train()

    a_optimizer.zero_grad()  

    input_dim = list([3,224,224])##############get the input shape
    inputs = torch.ones([1] + input_dim).float().cuda(non_blocking=True)
    logits = model(inputs)        

    arch_loss = torch.sum(logits)
    arch_loss.backward()     

    #a_optimizer.step()         
    print(arch_loss)


    norm_arch_pruned= abs(model._arch_parameters[0].data*model._arch_parameters[0].grad.data)
    reduce_arch_pruned= abs(model._arch_parameters[1].data*model._arch_parameters[1].grad.data)
        


    return norm_arch_pruned, reduce_arch_pruned

import torch
import torch.nn as nn
import torch.nn.functional as F
from operations import *
from torch.autograd import Variable
from genotypes import PRIMITIVES
from genotypes import Genotype


def synflow_genotype(norm_arch_synflow,redu_arch_synflow):
    def _parse(weights):
        gene = []
        n = 2
        start = 0
        for i in range(4):
            end = start + n
            W = weights[start:end].copy()
            edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
            for j in edges:
                k_best = None
                for k in range(len(W[j])):
                    if k != PRIMITIVES.index('none'):
                        if k_best is None or W[j][k] > W[j][k_best]:
                            k_best = k
                gene.append((PRIMITIVES[k_best], j))
            start = end
            n += 1
        return gene

    #gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).data.cpu().numpy())
    #gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).data.cpu().numpy())
    gene_normal = _parse(norm_arch_synflow.cpu().numpy())
    gene_reduce = _parse(redu_arch_synflow.cpu().numpy())
    
    
    concat = range(2, 6)
    genotype = Genotype(
      normal=gene_normal, normal_concat=concat,
      reduce=gene_reduce, reduce_concat=concat
    )

    return genotype


In [4]:
if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

np.random.seed(args.seed)
#torch.cuda.set_device(args.gpu)
cudnn.benchmark = True
torch.manual_seed(args.seed)
cudnn.enabled=True
torch.cuda.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
#logging.info('gpu device = %d' % args.gpu)
logging.info("args = %s", args)
#dataset_dir = '/cache/'
#pre.split_dataset(dataset_dir)
#sys.exit(1)
# dataset prepare
traindir = os.path.join(data_dir, 'train')
valdir = os.path.join(data_dir,  'val')

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                       std=[0.229, 0.224, 0.225])
criterion = nn.CrossEntropyLoss()
criterion = criterion.cuda()
#dataset split     

model = Network(args.init_channels, CLASSES, args.layers, criterion)
##model = torch.nn.DataParallel(model)
model = model.cuda()
logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

optimizer = torch.optim.SGD(
    model.parameters(),
    args.learning_rate,
    momentum=args.momentum,
    weight_decay=args.weight_decay)
optimizer_a = torch.optim.Adam(model.arch_parameters(),
           lr=args.arch_learning_rate, betas=(0.5, 0.999), 
           weight_decay=args.arch_weight_decay)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    #architect = Architect(model, args)
lr=args.learning_rate

11/18 06:21:23 AM args = Namespace(arch_learning_rate=0.006, arch_weight_decay=0.001, batch_size=1, begin=35, cutout=False, cutout_length=16, data='/tmp/cache/', drop_path_prob=0.4, epochs=50, grad_clip=5, init_channels=48, layers=14, learning_rate=0.5, learning_rate_min=0.0, model_path='saved_models', momentum=0.9, note='try', report_freq=50, save='FreeDARTS_SynFlow_ImageNet_oneshot-exp-seed-0-20211118-062122', seed=0, tmp_data_dir='/home/mzhang3/Data/', unrolled=False, weight_decay=0.0003, workers=4)
11/18 06:21:25 AM param size = 25.512016MB


In [5]:
epoch=0

scheduler.step()
current_lr = scheduler.get_lr()[0]

start_time= time.time()

norm_arch_pruned, reduce_arch_pruned = pruning_func(model, optimizer_a, criterion, optimizer, lr)


search_time = time.time() - start_time

logging.info('Pruning cost {:.1f} s.'.format(search_time))


genotype=synflow_genotype(norm_arch_pruned,reduce_arch_pruned)

logging.info('Searched architecture------------------')
logging.info(genotype)
logging.info('---------------------------------------')



tensor(17.6584, device='cuda:0', grad_fn=<SumBackward0>)
11/18 06:21:25 AM Pruning cost 0.7 s.
11/18 06:21:25 AM Searched architecture------------------
11/18 06:21:25 AM Genotype(normal=[('dil_conv_5x5', 0), ('dil_conv_5x5', 1), ('dil_conv_3x3', 1), ('dil_conv_3x3', 0), ('sep_conv_3x3', 2), ('dil_conv_5x5', 1), ('sep_conv_5x5', 0), ('sep_conv_3x3', 2)], normal_concat=range(2, 6), reduce=[('dil_conv_3x3', 0), ('max_pool_3x3', 1), ('dil_conv_3x3', 0), ('skip_connect', 1), ('sep_conv_5x5', 3), ('dil_conv_5x5', 0), ('sep_conv_5x5', 2), ('avg_pool_3x3', 0)], reduce_concat=range(2, 6))
11/18 06:21:25 AM ---------------------------------------
