In [12]:
from comet_ml import Experiment
import argparse
from tqdm import tqdm
import os, sys
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import yaml
from head.metrics import CosFace
from loss.focal import FocalLoss
from utils.utils import separate_resnet_bn_paras, warm_up_lr, load_checkpoint, load_checkpoints_all, \
    schedule_lr, AverageMeter, accuracy
from utils.fairness_utils import evaluate
from utils.data_utils_balanced import prepare_data
from utils.utils_train import Network
import numpy as np
import pandas as pd
import random
import timm
from utils.utils import save_output_from_dict
from utils.utils_train import Network, get_head
from utils.fairness_utils import evaluate, add_column_to_file
from timm.optim import create_optimizer_v2, optimizer_kwargs
from timm.scheduler import create_scheduler
from timm.utils.model_ema import ModelEmaV2
sys.path.append('/cmlscratch/sdooley1/merge_timm/FR-NAS/')
from dpn107 import DPN
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = 'cpu'
#device_ids=range(torch.cuda.device_count())
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


parser = argparse.ArgumentParser()

parser.add_argument('--config_path', default='/cmlscratch/sdooley1/merge_timm/FR-NAS/configs/dpn107/config_dpn107_CosFace_sgd.yaml', type=str, required=False)
parser.add_argument('--seed', default=666, type=int, required=False)

args = parser.parse_args(['--config_path', 
                          '/cmlscratch/sdooley1/merge_timm/FR-NAS/configs/dpn107/config_dpn107_CosFace_sgd.yaml'
                         ])

seed = args.seed
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)

with open(args.config_path, "r") as ymlfile:
    options = yaml.load(ymlfile, Loader=yaml.FullLoader)
    print(options)
for key, value in options.items():
    setattr(args, key, value)

config={
'edge1': 3,
'edge2': 0,
'edge3': 1,
'head': 'CosFace',
'lr': 0.13828312564892567,
'optimizer': 'SGD',
}
args.opt = config["optimizer"]
args.head = config["head"]
args.lr = config["lr"]
print(args)

checkpoints_suffix = ''
if seed != 666:
    checkpoints_suffix = '_'+str(seed)
args.checkpoints_root = f'Checkpoints/msceleb_train{checkpoints_suffix}/'
args.msceleb = '/fs/cml-datasets/MS_Celeb_aligned_112/imgs/'
args.default_train_root = '/cmlscratch/sdooley1/data/vggface2/val/'
args.default_test_root = '/cmlscratch/sdooley1/data/vggface2/val/'
args.demographics_file = '/cmlscratch/sdooley1/data/vggface2/vggface2_demographics.txt'
args.RFW_checkpoints_root = f'Checkpoints/msceleb_train{checkpoints_suffix}/'
args.dataset = 'vggface2'
p_images = {
    args.groups_to_modify[i]: args.p_images[i]
    for i in range(len(args.groups_to_modify))
}
p_identities = {
    args.groups_to_modify[i]: args.p_identities[i]
    for i in range(len(args.groups_to_modify))
}
args.p_images = p_images
args.p_identities = p_identities

print("P identities: {}".format(args.p_identities))
print("P images: {}".format(args.p_images))


{'backbone': 'dpn107', 'batch_size': 64, 'bn_eps': None, 'bn_momentum': None, 'checkpoints_root': 'Checkpoints/timm_explore_few_epochs/', 'clip_grad': None, 'clip_mode': 'norm', 'comet_api_key': 'KKiKMVZI9RCYowoKDZDS5Y2km', 'comet_workspace': 'rsukthanker', 'cooldown_epochs': 10, 'dataset': 'CelebA', 'decay_epochs': 100, 'decay_rate': 0.1, 'default_test_root': '/work/dlclarge2/sukthank-ZCP_Competition/FairNAS/FR-NAS/data/CelebA/Img/img_align_celeba_splits/val/', 'default_train_root': '/work/dlclarge2/sukthank-ZCP_Competition/FairNAS/FR-NAS/data/CelebA/Img/img_align_celeba_splits/train/', 'demographics_file': 'CelebA/CelebA_demographics.txt', 'dist_bn': 'reduce', 'drop': 0.0, 'drop_block': None, 'drop_connect': None, 'drop_path': None, 'epoch_repeats': 0.0, 'epochs': 100, 'file_name': 'timm_from-scratch.csv', 'file_name_ema': 'timm_from-scratch_ema.csv', 'gp': None, 'groups_to_modify': ['male', 'female'], 'head': 'CosFace', 'input_size': 224, 'layer_decay': None, 'lr': 0.001, 'lr_cycle_

In [13]:
# run_name = "Checkpoints_Edges_{}_LR_{}_Head_{}_Optimizer_{}".format(str(config["edge1"])+str(config["edge2"])+str(config["edge3"]), config["lr"], config["head"],config["optimizer"])
# output_dir = os.path.join('/cmlscratch/sdooley1/merge_timm/FR-NAS',args.checkpoints_root, run_name)
# args.checkpoints_root = output_dir
# if not os.path.isdir(output_dir):
#     os.mkdir(output_dir)
# output_dir = os.path.join('/cmlscratch/sdooley1/merge_timm/FR-NAS',args.RFW_checkpoints_root, run_name)
# args.RFW_checkpoints_root = output_dir
# if not os.path.isdir(output_dir):
#     os.mkdir(output_dir)

# dataloaders, num_class, demographic_to_labels_train, demographic_to_labels_test = prepare_data(
#     args)

train_transform = transforms.Compose([
    # refer to https://pytorch.org/docs/stable/torchvision/transforms.html for more build-in online data augmentation
    # smaller side resized
    transforms.Resize([int(128 * args.input_size / 112),
                      int(128 * args.input_size / 112)]),
    transforms.RandomCrop([args.input_size, args.input_size]),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=args.mean,
                         std=args.std),
])

dataset_train = datasets.ImageFolder(args.msceleb, train_transform)

train_loader = torch.utils.data.DataLoader(
    dataset_train, batch_size=args.batch_size, pin_memory=True,
    num_workers=args.num_workers, drop_last=True, shuffle=True
)
dataloaders['train'] = train_loader


In [14]:
args.num_class = 7058
config={
'edge1': 3,
'edge2': 0,
'edge3': 1,
'head': 'CosFace',
'lr': 0.13828312564892567,
'optimizer': 'SGD',
}
edges=[config['edge1'],config['edge2'],config['edge3']]
# Build model
backbone = DPN(edges,num_init_features=128, k_r=200, groups=50, k_sec=(1,1,1,1), inc_sec=(20, 64, 64, 128))

config = timm.data.resolve_data_config({}, model=backbone)
model_input_size = args.input_size

# get model's embedding size
meta = pd.read_csv('../'+args.metadata_file)
embedding_size = 1000
args.embedding_size= embedding_size

head = get_head(args)
train_criterion = FocalLoss(elementwise=True)
head,backbone= head.to(device), backbone.to(device)
backbone = nn.DataParallel(backbone).to(device)
####################################################################################################################
# ======= argsimizer =======#
model = Network(backbone, head)

print(args.lr)
print(args.opt)
args.decay_milestones = [30, 60]
optimizer = create_optimizer_v2(model, **optimizer_kwargs(cfg=args))
scheduler, num_epochs = create_scheduler(args, optimizer)

model_ema = None
if args.model_ema:
    # Important to create EMA model after cuda(), DP wrapper, and AMP but before DDP wrapper
    model_ema = ModelEmaV2(
        model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else None)
model, model_ema, optimizer, epoch, batch, checkpoints_model_root = load_checkpoint(
    args, model, model_ema, optimizer, dataloaders["test"], p_identities,
    p_images)
#model = nn.DataParallel(model)
model = model.to(device)


316
704
1216
2432
0.13828312564892567
SGD
Checkpoint_Head_CosFace_Backbone_dpn107_Opt_SGD_Dataset_CelebA_Epoch_
Found checkpoints for this model: []
No Checkpoints Found at 'Checkpoints/msceleb_train/'. Please Have a Check or Continue to Train from Scratch


In [16]:
print('Start training')
while epoch <= args.epochs:

    model.train()  # set to training mode
    model = model.to(device)
    meters = {}
    meters["loss"] = AverageMeter()
    meters["top5"] = AverageMeter()

    #             if epoch in args.stages:  # adjust LR for each training stage after warm up, you can also choose to adjust LR manually (with slight modification) once plaueau observed
    #                 schedule_lr(argsimizer)

    for inputs, labels  in tqdm(iter(
            dataloaders["train"])):

        #                 if batch + 1 <= num_batch_warm_up:  # adjust LR for each training batch during warm up
        #                     warm_up_lr(batch + 1, num_batch_warm_up, args.lr, argsimizer)

        inputs, labels = inputs.to(device), labels.to(device).long()
        outputs, reg_loss = model(inputs, labels)
        loss = train_criterion(outputs, labels) + reg_loss
        loss = loss.mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step(epoch + 1, meters["top5"])
        if model_ema is not None:
            model_ema.update(model)
        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, labels, topk=(1, 5))
        meters["loss"].update(loss.data.item(), inputs.size(0))
        meters["top5"].update(prec5.data.item(), inputs.size(0))

        batch += 1


Start training


  0%|          | 0/90978 [00:00<?, ?it/s]/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [0,0,0], thread: [32,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [0,0,0], thread: [33,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [0,0,0], thread: [34,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/ScatterGatherKernel.cu:365: operator(): block: [0,0,0], thread: [36,0,0] Assertion `idx_dim >= 0 && idx_dim < index_size && "index out of bounds"` failed.
/opt/conda/conda-bld/pytorch_1670525539683/

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [17]:
labels

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [21]:
import os
for x in os.walk('/fs/cml-datasets/MS_Celeb_aligned_112/imgs/'):
    print(x)
    break

('/fs/cml-datasets/MS_Celeb_aligned_112/imgs/', ['65447', '10076', '599', '75024', '64593', '1864', '48717', '55618', '43427', '56699', '80060', '4902', '21516', '47899', '59652', '9906', '61423', '70516', '41838', '22635', '18502', '216', '29974', '26144', '51604', '11904', '49505', '1070', '62428', '35979', '66277', '45208', '74247', '1684', '55215', '18536', '47961', '66948', '27227', '41832', '55558', '73337', '34016', '23582', '63892', '72766', '12303', '27271', '43534', '29912', '13976', '25901', '84484', '65938', '11597', '75780', '24630', '77418', '79777', '34690', '22806', '38151', '71624', '1909', '58140', '83903', '38573', '47661', '5186', '35945', '26325', '33892', '15601', '20561', '79070', '71073', '43966', '70291', '3769', '17011', '74813', '7570', '42373', '65956', '11092', '47833', '71453', '67421', '33822', '33985', '79592', '28912', '11967', '7240', '37977', '1170', '80573', '85052', '81020', '63126', '35747', '80746', '82817', '25866', '40511', '4321', '13319', '647

In [25]:
len(x[1])

85742

In [20]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
labels

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [18]:
p

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [17]:
for p in model.parameters():
    p

In [9]:
model = model.to('cpu')
inputs = inputs.to('cpu')
features = model.backbone(inputs)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [32]:
model.head(features, labels)

RuntimeError: chunk expects `chunks` to be greater than 0, got: 0

In [37]:
device

device(type='cpu')

In [36]:
model.head = model.head.to(device)
len(model.head.device_id)

0