In [30]:
import argparse
import os
#import time
from logging import getLogger
#import warnings

import numpy as np 
#from tqdm import tqdm
import yaml 

import torch
import torch.nn as nn
#import torch.nn.parallel
#import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data as data
import torch.distributed as dist
import torch.autograd as autograd

In [2]:
%load_ext autoreload
%autoreload 2
import sys
module_path = os.path.abspath(os.path.join('../src')) # or the path to your source code
sys.path.insert(0, module_path)

In [12]:
from utils import (
	bool_flag,
	initialize_exp,
	restart_from_checkpoint,
	fix_random_seeds,
	AverageMeter,
	init_distributed_mode,
	accuracy,
	add_slurm_params,
	get_dataloader,
	optimizer_config,
)

from models import get_model, get_classifier, modelfusion
from datasets import get_dataset

In [14]:
def train(model, reglog, optimizer, loader,  epoch, args ):
	"""
	Train the models on the dataset.
	"""
	# running statistics
	batch_time = AverageMeter()
	data_time = AverageMeter()

	# training statistics
	top1 = AverageMeter()
	top5 = AverageMeter()
	losses = AverageMeter()
	end = time.perf_counter()
	
	model.eval()
	reglog.train()

	criterion = nn.CrossEntropyLoss().cuda()

	for iter_epoch, record in enumerate(loader):
		# measure data loading time
		data_time.update(time.perf_counter() - end)
		

		if len(record) == 2:
			inp, target = record 
		elif len(record) == 3:
		
			inp, target, meta = record 
		
		#move to gpu
        # commenting next two lines for running on a cpu
		#inp = inp.cuda(non_blocking=True)
		#target = target.cuda(non_blocking=True)
		# forward

		with torch.no_grad():
			output = model(inp)

		output = reglog(output)
		loss = criterion(output, target) 

		# compute the gradients
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

		# update stats
		acc1, acc5 = accuracy(output, target, topk=(1, 5))
		losses.update(loss.item(), inp.size(0))
		top1.update(acc1[0], inp.size(0))
		top5.update(acc5[0], inp.size(0))

		batch_time.update(time.perf_counter() - end)
		end = time.perf_counter()

		# verbose
		if args.rank == 0 and iter_epoch % 50 == 0:
			
			logger.info(
				"Epoch[{0}] - Iter: [{1}/{2}]\t"
				"Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
				"Data {data_time.val:.3f} ({data_time.avg:.3f})\t"
				"Loss {loss.val:.4f} ({loss.avg:.4f})\t"
				"Prec {top1.val:.3f} ({top1.avg:.3f})\t"
				"LR {lr}".format(
					epoch,
					iter_epoch,
					len(loader),
					batch_time=batch_time,
					data_time=data_time,
					loss=losses,
					top1=top1,
					lr=optimizer.param_groups[0]["lr"],
				)
			)

	return epoch, losses.avg, top1.avg.item(), top5.avg.item()

In [29]:
def validate_network(val_loader, model, classifier, args, indices=None):
	batch_time = AverageMeter()
	losses = AverageMeter()
	top1 = AverageMeter()
	top5 = AverageMeter()
	#global best_acc

	# switch to evaluate mode
	model.eval()
	classifier.eval()

	criterion = nn.CrossEntropyLoss().cuda()
    #criterion = nn.CrossEntropyLoss()

	with torch.no_grad():
		end = time.perf_counter()
		for i, record in enumerate(val_loader):
			if len(record) == 2:
				inp, target = record 
			elif len(record) == 3:
				inp, target, meta = record 
			
			# move to gpu
			#inp = inp.cuda(non_blocking=True)
			#target = target.cuda(non_blocking=True)

			# compute output
			output = classifier(model(inp))
			
	


			if indices is not None:
				output = output[:,indices]

			loss = criterion(output, target)
	
			acc1, acc5 = accuracy(output, target, topk=(1, 5))

			losses.update(loss.item(), inp.size(0))

			top1.update(acc1[0], inp.size(0))
			top5.update(acc5[0], inp.size(0))

			# measure elapsed time
			batch_time.update(time.perf_counter() - end)
			end = time.perf_counter()
			if args.rank == 0 and i % 50 == 0:
				logger.info(
				"Epoch[{0}] - Iter: [{1}/{2}]\t"
				"Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
				"Loss {loss.val:.4f} ({loss.avg:.4f})\t"
				"Prec {top1.val:.3f} ({top1.avg:.3f})\t".format(
					0,
					i,
					len(val_loader),
					batch_time=batch_time,
					loss=losses,
					top1=top1,
				)
				)

	scores_val = torch.Tensor(np.array([losses.sum, top1.sum.item(), top5.sum.item(), \
								losses.count, top1.count, top5.count])).to(target.get_device())
	dist.all_reduce(scores_val, op=dist.ReduceOp.SUM)
	scores_val = tuple((scores_val[:3] / scores_val[3:]).detach().cpu().numpy().tolist())
	losses, top1, top5  = scores_val
	return losses, top1, top5 

In [24]:
def main(args):

	global best_acc

	tags = yaml.load(open('./configs/pretrained_checkpoints.yaml'), Loader=yaml.FullLoader)

	if args.tag is not None and args.tag in tags:
		for key in tags[args.tag]:
			print(key)
			setattr(args, key, tags[args.tag][key])

	# distributed training environments and seeds
	#init_distributed_mode(args)
	fix_random_seeds(args.seed)
	
	# amd gpu cards environment variables
	# os.environ['MIOPEN_USER_DB_PATH']=os.path.join(args.dump_path, 'amd/rank_%d' % args.rank)
	# os.environ['MIOPEN_FIND_MODE']='2'

	# initialize logger ... 
	logger, training_stats = initialize_exp( args, "epoch", "loss", "prec1", "prec5", "loss_val", "prec1_val", "prec5_val")

	# build data
	train_dataset, val_dataset, datamsg = get_dataset(args.data_name, args.tf_name, args)

	# build dataloaders 
	train_loader, val_loader, additional_loaders = get_dataloader(train_dataset, val_dataset, datamsg, args)
	logger.info("Building data done")


	## build trunk and load weights 
	total_feat_dim, feat_dims, models = 0, [], []

	for i in range(len(args.arch)):
	
		per_model, msg, feat_dim = get_model(args.arch[i], skip_pool=args.skip_pool, \
		pretrain_path = None if len(args.pretrained)==0 else args.pretrained[i], img_dim=datamsg['img_dim'])
		#fix1st_pretrain_path = args.fix1st_pretrained ) #e.g. 'regnet_y_32gf'
		logger.info("Load pretrained model with msg: {}".format(msg))

		feat_dims.append(feat_dim)
		models.append(per_model)

	
	#build classifier
	classifier = get_classifier(args.classifier, datamsg['nclass'], feat_dims, logger, args)

	#print(classifier.linear.weight.data)
	logger.info('classifier {}'.format(classifier))
    #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	device = torch.device("cpu")
	# model to gpu
	# device = torch.device("cuda:" + str(args.gpu_to_work_on))
    # Using cpu for now

	# model is either Identity or backbone. only classifier is trainable
	model, classifier = modelfusion(args.richway, models, classifier, args)
	model, classifier = model.to(device), classifier.to(device)

    # For distributed usage in future
	# classifier = nn.parallel.DistributedDataParallel(
	# 	classifier,
	# 	device_ids=[args.gpu_to_work_on],
	# )

	optimizer = optimizer_config(classifier, args, logger, \
		head_reg = lambda x: True if args.exp_mode in ['lineareval','biaslineareval'] else lambda x: 'classifier' in x )
	logger.info('optimizer {}'.format(optimizer))
	



	# set scheduler
	if args.scheduler_type == "step":
		scheduler = torch.optim.lr_scheduler.MultiStepLR(
			optimizer, args.decay_epochs, gamma=args.gamma
		)
	elif args.scheduler_type == "cosine":
		scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
			optimizer, args.epochs, eta_min=args.final_lr
		)
	logger.info('lr scheduler {}'.format(scheduler))




	# Optionally resume from a checkpoint
	to_restore = {"epoch": 0, "best_acc": 0.}
	
	if 'save' in args.mode:
		restart_from_checkpoint(
		os.path.join(args.dump_path, "checkpoint.pth.tar"),
		run_variables=to_restore,
		state_dict=classifier,
		)
		start_epoch = to_restore["epoch"]
		best_acc = to_restore["best_acc"]
	else:
		restart_from_checkpoint(
			os.path.join(args.dump_path, "checkpoint.pth.tar"),
			run_variables=to_restore,
			state_dict=classifier,
			optimizer=optimizer,
			scheduler=scheduler,
		)
		start_epoch = to_restore["epoch"]
		best_acc = to_restore["best_acc"]

	#cudnn.benchmark = True
	eval('setattr(torch.backends.cudnn, "benchmark", True)')
	
	if args.cuda_deterministic:
		logger.info("cuda deterministic")
		eval('setattr(torch.backends.cudnn, "deterministic", True)') 

	for epoch in range(start_epoch, args.epochs):
		
		if epoch == 0 and args.save_init:
			save_dict = {
					"epoch": 0,
					"state_dict": classifier.state_dict(),
					"optimizer": optimizer.state_dict(),
					"scheduler": scheduler.state_dict(),
					"best_acc": 0,
				}
			torch.save(save_dict, os.path.join(args.dump_path, f"checkpoint_init.pth.tar"))
			logger.info('saved weight initialization')
		# train the network for one epoch
		logger.info("============ Starting epoch %i ... ============" % epoch)

		# set samplers
		train_loader.sampler.set_epoch(epoch)

		tr_epoch, tr_loss, tr_top1, tr_top5 = train(model, classifier, optimizer, train_loader, epoch, args)
		scheduler.step()

		if (epoch+1) % args.eval_freq == 0: 
			loss, top1, top5 = validate_network(val_loader, model,  classifier, args,)
			

			if args.custom_eval_func is not None: 
				from src import custom_eval
				for custom_eval_name in args.custom_eval_func:
					custom_eval_func = getattr(custom_eval, custom_eval_name)
					custom_eval_results = custom_eval_func(val_loader, model, classifier, args)
					logger.info(f'{custom_eval_name}: ' + ','.join(['%.4f' % val for val in custom_eval_results]))
			
			# additional validation sets
			additional_msg = {}
			for key in additional_loaders:
				loader = additional_loaders[key]
				ad_loss, ad_top1, ad_top5 = validate_network(loader, model,  classifier,args)
				additional_msg[key]=[ad_loss, ad_top1, ad_top5]

			training_stats.update([tr_epoch, tr_loss, tr_top1, tr_top5] + [loss, top1, top5])


			# log best acc
			#global best_acc
			is_best = False 
			if top1 > best_acc:
				#best_acc = top1.avg.item()
				best_acc = top1
				is_best = True 

			if args.rank == 0:
				logger.info(
					"Test:\t"
					"Loss {loss:.4f}\t"
					"Acc@1 {top1:.3f}\t"
					"Best Acc@1 so far {acc:.1f}".format(loss=loss, top1=top1, acc=best_acc))
				
				for key in additional_msg:
					loss, top1, _ = additional_msg[key]
					logger.info(
						"additional Test {key}:\t"
						"Loss {loss:.4f}\t"
						"Acc@1 {top1:.3f}".format(key=key, loss=loss, top1=top1))

			# save checkpoint
			if args.rank == 0:

				save_dict = {
					"epoch": epoch + 1,
					"state_dict": classifier.state_dict(),
					"optimizer": optimizer.state_dict(),
					"scheduler": scheduler.state_dict(),
					"best_acc": best_acc,
				}
				torch.save(save_dict, os.path.join(args.dump_path, "checkpoint.pth.tar"))
				
				if (epoch+1) % args.save_freq == 0:
					torch.save(save_dict, os.path.join(args.dump_path, f"checkpoint_epoch{epoch+1}.pth.tar"))
		
				if is_best:
					torch.save(save_dict, os.path.join(args.dump_path, "best.pth.tar"))
			
			
	logger.info("Training of the supervised linear classifier on frozen features completed.\n"
				"Top-1 test accuracy: {acc:.1f}".format(acc=best_acc))


In [13]:
logger = getLogger()

In [32]:
def test_args(args):
	print(args.tags, args.epochs)


In [40]:
class Args(argparse.Namespace):
    data = './data/penn'
    model = 'LSTM'
    emsize = 200
    nhid = 200
    tags = 'some'
    epochs = 1
    

args=Args()
test_args(args)

some 1
