In [1]:
%load_ext autoreload
%autoreload 2
import sys
import os
codebase = '../../'
sys.path.append(codebase)

In [2]:
import torch
torch.__version__

'1.8.2'

In [3]:
exp_name = 'exp_045_rep'
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [4]:
import os
from time import time as time_time
import numpy as np
# from torch import optim
from apex import amp
import torch
import pandas as pd
from tqdm import tqdm
write = tqdm.write

from torch.optim.lr_scheduler import ReduceLROnPlateau

from config import ModelConfig, BOX_SCALE, IM_SCALE
from torch.nn import functional as F
from lib.pytorch_misc import optimistic_restore, de_chunkize, clip_grad_norm
from lib.evaluation.sg_eval import BasicSceneGraphEvaluator, calculate_mR_from_evaluator_list, eval_entry
from lib.pytorch_misc import print_para
from dataloaders.visual_genome import VGDataLoader, VG

from lib.my_model_24 import KERN


In [5]:
conf = ModelConfig(f'''
-m predcls -p 1000 -clip 5 
-tb_log_dir summaries/kern_predcls/{exp_name} 
-save_dir checkpoints/kern_predcls/{exp_name}
-ckpt checkpoints/vgdet/vgrel-12.tar 
-val_size 5000 
-adam 
-b 3
-ngpu 1
-lr 1e-4 
''')

~~~~~~~~ Hyperparameters used: ~~~~~~~
ckpt : checkpoints/vgdet/vgrel-12.tar
save_dir : checkpoints/kern_predcls/exp_045_rep
num_gpus : 1
num_workers : 1
lr : 0.0001
batch_size : 3
val_size : 5000
l2 : 0.0001
adamwd : 0.0
clip : 5.0
print_interval : 1000
mode : predcls
cache : 
adam : True
test : False
num_epochs : 50
use_resnet : False
use_proposals : False
pooling_dim : 4096
use_ggnn_obj : False
ggnn_obj_time_step_num : 3
ggnn_obj_hidden_dim : 512
ggnn_obj_output_dim : 512
use_obj_knowledge : False
obj_knowledge : 
use_ggnn_rel : False
ggnn_rel_time_step_num : 3
ggnn_rel_hidden_dim : 512
ggnn_rel_output_dim : 512
use_rel_knowledge : False
rel_knowledge : 
tb_log_dir : summaries/kern_predcls/exp_045_rep
save_rel_recall : 


In [6]:
conf.MODEL.CONF_MAT_FREQ_TRAIN = '../../../vgmeta/conf_mat_freq_train.npy'
conf.MODEL.LRGA.USE_LRGA = False
conf.MODEL.USE_ONTOLOGICAL_ADJUSTMENT = True
conf.MODEL.NORMALIZE_EOA = True
conf.num_workers = 9
# conf.MODEL.LRGA.K = 50
# conf.MODEL.LRGA.DROPOUT = 0.5
# conf.MODEL.GN.NUM_GROUPS = 1024//8

In [7]:
os.getcwd()

'/home/zhanwen/kangaroo/eoa_preds_2_norm_bpl_no_sa_20220326_legion/ipynb/train_predcls'

In [8]:
train, val, _ = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True,
                          use_proposals=conf.use_proposals,
                          filter_non_overlap=conf.mode == 'sgdet', with_clean_classifier=True, get_state=False)


Dataloader using BPL
Dataloader using BPL
Dataloader using BPL


In [9]:
ind_to_predicates = train.ind_to_predicates # ind_to_predicates[0] means no relationship

In [10]:
train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel',
                                               batch_size=conf.batch_size,
                                               num_workers=conf.num_workers,
                                               num_gpus=conf.num_gpus,
                                               pin_memory=True)


In [11]:
detector = KERN(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
                num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
                use_resnet=conf.use_resnet, use_proposals=conf.use_proposals, pooling_dim=conf.pooling_dim,
                ggnn_rel_time_step_num=3, ggnn_rel_hidden_dim=1024, ggnn_rel_output_dim=None,
                graph_path=os.path.join(codebase, '/home/zhanwen/gbnet/graphs/005/edge_dict_all_plus_wikidata_177_20220208.pkl'), 
                emb_path=os.path.join(codebase, '/home/zhanwen/gbnet/graphs/001/emb_mtx_wiki_51.pkl'), 
                rel_counts_path=os.path.join(codebase, 'graphs/001/pred_counts.pkl'), 
                use_knowledge=True, use_embedding=True, refine_obj_cls=False,
                class_volume=1.0, with_clean_classifier=True, with_transfer=True, sa=True, config=conf,
               )

my_ggnn_10: using use_ontological_adjustment
EOA-N: Used adj_normalize
!!!!!!!!!With Confusion Matrix Channel!!!!!
SA: Used adj_normalize


In [12]:
# Freeze the detector
for n, param in detector.detector.named_parameters():
    param.requires_grad = False

In [13]:
print(print_para(detector), flush=True)


 449.4M total parameters 
 ----- 
 
detector.roi_fmap.0.weight                        : [4096,25088]    (102760448) (    )
roi_fmap.1.0.weight                               : [4096,25088]    (102760448) (grad)
roi_fmap_obj.0.weight                             : [4096,25088]    (102760448) (grad)
detector.roi_fmap.3.weight                        : [4096,4096]     (16777216) (    )
roi_fmap.1.3.weight                               : [4096,4096]     (16777216) (grad)
roi_fmap_obj.3.weight                             : [4096,4096]     (16777216) (grad)
ggnn_rel_reason.ggnn.fc_mp_receive_ont_ent.model.0.linear.weight: [3328,3328]     (11075584) (grad)
ggnn_rel_reason.obj_proj.weight                   : [1024,4096]     ( 4194304) (grad)
ggnn_rel_reason.rel_proj.weight                   : [1024,4096]     ( 4194304) (grad)
ggnn_rel_reason.ggnn.fc_mp_receive_ont_pred.model.0.linear.weight: [2048,2048]     ( 4194304) (grad)
ggnn_rel_reason.ggnn.fc_mp_receive_ont_ent.model.2.linear.weight: [1024

In [14]:
from apex.optimizers import FusedAdam, FusedSGD
def get_optim(lr):
    # Lower the learning rate on the VGG fully connected layers by 1/10th. It's a hack, but it helps
    # stabilize the models.
    fc_params = [p for n,p in detector.named_parameters() if (n.startswith('roi_fmap') or 'clean' in n) and p.requires_grad]
    non_fc_params = [p for n,p in detector.named_parameters() if not (n.startswith('roi_fmap') or 'clean' in n) and p.requires_grad]
    params = [{'params': fc_params, 'lr': lr / 10.0}, {'params': non_fc_params}]
    # params = [p for n,p in detector.named_parameters() if p.requires_grad]

    if conf.adam:
        optimizer = FusedAdam(params, weight_decay=conf.adamwd, lr=lr, eps=1e-3)
    else:
        optimizer = FusedSGD(params, weight_decay=conf.l2, lr=lr, momentum=0.9)

    # scheduler = ReduceLROnPlateau(optimizer, 'max', patience=3, factor=0.1,
    #                               verbose=True, threshold=0.0001, threshold_mode='abs', cooldown=1)
    return optimizer #, scheduler



In [15]:
ckpt = torch.load(conf.ckpt)


In [16]:
if conf.ckpt.split('-')[-2].split('/')[-1] == 'vgrel':
    print("Loading EVERYTHING")
    start_epoch = ckpt['epoch']

    if not optimistic_restore(detector, ckpt['state_dict']):
        start_epoch = -1
        # optimistic_restore(detector.detector, torch.load('checkpoints/vgdet/vg-28.tar')['state_dict'])
else:
    start_epoch = -1
    optimistic_restore(detector.detector, ckpt['state_dict'])

    detector.roi_fmap[1][0].weight.data.copy_(ckpt['state_dict']['roi_fmap.0.weight'])
    detector.roi_fmap[1][3].weight.data.copy_(ckpt['state_dict']['roi_fmap.3.weight'])
    detector.roi_fmap[1][0].bias.data.copy_(ckpt['state_dict']['roi_fmap.0.bias'])
    detector.roi_fmap[1][3].bias.data.copy_(ckpt['state_dict']['roi_fmap.3.bias'])

    detector.roi_fmap_obj[0].weight.data.copy_(ckpt['state_dict']['roi_fmap.0.weight'])
    detector.roi_fmap_obj[3].weight.data.copy_(ckpt['state_dict']['roi_fmap.3.weight'])
    detector.roi_fmap_obj[0].bias.data.copy_(ckpt['state_dict']['roi_fmap.0.bias'])
    detector.roi_fmap_obj[3].bias.data.copy_(ckpt['state_dict']['roi_fmap.3.bias'])


Loading EVERYTHING
We couldn't find ggnn_rel_reason.ggnn.fc_output_proj_img_pred_clean.model.2.linear.bias,ggnn_rel_reason.ggnn.fc_output_proj_ont_pred_clean.model.2.linear.weight,ggnn_rel_reason.ggnn.fc_output_proj_img_pred_clean.model.0.linear.bias,ggnn_rel_reason.ggnn.fc_output_proj_img_pred_clean.model.0.linear.weight,ggnn_rel_reason.ggnn.fc_output_proj_ont_pred_clean.model.0.linear.bias,ggnn_rel_reason.ggnn.fc_output_proj_ont_pred_clean.model.2.linear.bias,ggnn_rel_reason.ggnn.fc_output_proj_img_pred_clean.model.2.linear.weight,ggnn_rel_reason.ggnn.fc_output_proj_ont_pred_clean.model.0.linear.weight


In [17]:
detector.cuda();


In [18]:
from time import time as time_time
def train_epoch(epoch_num):
    detector.train()
    tr = []
    start = time_time()
    prog_bar = tqdm(enumerate(train_loader), total=int(len(train)/train_loader.batch_size))
    for b, batch in prog_bar:
        result, loss_dict = train_batch(batch, verbose=b % (conf.print_interval*10) == 0)
        tr.append(loss_dict)
        '''
        if b % 100 == 0:
            print(loss_pd)
            gt = result.rel_labels[:,3].data.cpu().numpy()
            out = result.rel_dists.data.cpu().numpy()
            ind = np.where(gt)[0]
            print(gt[ind])
            print(np.argmax(out[ind], 1))
            print(np.argmax(out[ind, 1:], 1) + 1)
        '''

        if b % conf.print_interval == 0 and b >= conf.print_interval:
#             mn = pd.DataFrame([pd.Series(dicty) for dicty in tr[-conf.print_interval:]]).mean(1)
            mn = pd.DataFrame(tr[-conf.print_interval:]).mean(axis=0)
            time_per_batch = (time_time() - start) / conf.print_interval
            write("\ne{:2d}b{:5d}/{:5d} {:.3f}s/batch, {:.1f}m/epoch".format(
                epoch_num, b, len(train_loader), time_per_batch, len(train_loader) * time_per_batch / 60))
            write(mn.to_string())
            write('-----------')
            start = time_time()
    return pd.DataFrame(tr)


In [19]:
from torch.cuda.amp import autocast
def train_batch(b, verbose=False):
    """
    :param b: contains:
          :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
          :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using
          :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated
                                  RPN feature vector that give us all_anchors,
                                  each one (img_ind, fpn_idx)
          :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image.

          :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img

          Training parameters:
          :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will
                                    be used to compute the training loss (img_ind, fpn_idx)
          :param gt_boxes: [num_gt, 4] GT boxes over the batch.
          :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
    :return:
    """
    optimizer.zero_grad()
    with autocast():
        result = detector[b]
    #     losses = {}
    #     losses['class_loss'] = float(detector.obj_loss(result))
        loss_class = detector.obj_loss(result)
    #     losses['rel_loss'] = float(detector.rel_loss(result))
        loss_rel = detector.rel_loss(result)
    #     loss = sum(losses.values())
        loss = loss_class + loss_rel
    #     loss.backward()
    with amp.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()
    clip_grad_norm(
        [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
        max_norm=conf.clip, verbose=verbose, clip=True)
#     losses['total'] = loss
    optimizer.step()
    return result, {
          'loss_class': float(loss_class),
          'loss_rel': float(loss_rel),
          'loss_total': float(loss),
    }
#     loss_pd = pd.Series({x: y.detach() for x, y in losses.items()})
#     return result, loss_pd


In [20]:
from torch import no_grad as torch_no_grad
from tqdm import tqdm

def val_epoch():
    detector.eval()
    evaluator_list = [] # for calculating recall of each relationship except no relationship
    evaluator_multiple_preds_list = []
    for index, name in enumerate(ind_to_predicates):
        if index == 0:
            continue
        evaluator_list.append((index, name, BasicSceneGraphEvaluator.all_modes()))
        evaluator_multiple_preds_list.append((index, name, BasicSceneGraphEvaluator.all_modes(multiple_preds=True)))
    evaluator = BasicSceneGraphEvaluator.all_modes() # for calculating recall
    evaluator_multiple_preds = BasicSceneGraphEvaluator.all_modes(multiple_preds=True)
    
    prog_bar = tqdm(enumerate(val_loader), total=int(len(val)/val_loader.batch_size))
    
    with torch_no_grad():
#         for i, data in prog_bar:
        for val_b, batch in prog_bar:
#         for val_b, batch in enumerate(val_loader):
            val_batch(conf.num_gpus * val_b, batch, evaluator, evaluator_multiple_preds, evaluator_list, evaluator_multiple_preds_list)

    recall = evaluator[conf.mode].print_stats()
    recall_mp = evaluator_multiple_preds[conf.mode].print_stats()
    
    mean_recall = calculate_mR_from_evaluator_list(evaluator_list, conf.mode)
    mean_recall_mp = calculate_mR_from_evaluator_list(evaluator_multiple_preds_list, conf.mode, multiple_preds=True)
    
    detector.train()
    return recall, recall_mp, mean_recall, mean_recall_mp


In [21]:
def val_batch(batch_num, b, evaluator, evaluator_multiple_preds, evaluator_list, evaluator_multiple_preds_list):
    with autocast():
        det_res = detector[b]
    if conf.num_gpus == 1:
        det_res = [det_res]

    for i, (boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i) in enumerate(det_res):
        gt_entry = {
            'gt_classes': val.gt_classes[batch_num + i].copy(),
            'gt_relations': val.relationships[batch_num + i].copy(),
            'gt_boxes': val.gt_boxes[batch_num + i].copy(),
        }
        assert np.all(objs_i[rels_i[:, 0]] > 0) and np.all(objs_i[rels_i[:, 1]] > 0)

        pred_entry = {
            'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE,
            'pred_classes': objs_i,
            'pred_rel_inds': rels_i,
            'obj_scores': obj_scores_i,
            'rel_scores': pred_scores_i,  # hack for now.
        }

        eval_entry(conf.mode, gt_entry, pred_entry, evaluator, evaluator_multiple_preds, 
                   evaluator_list, evaluator_multiple_preds_list)



In [22]:
if conf.tb_log_dir is not None:
    from tensorboardX import SummaryWriter
    if not os.path.exists(conf.tb_log_dir):
        os.makedirs(conf.tb_log_dir) 
    writer = SummaryWriter(log_dir=conf.tb_log_dir)
    use_tb = True
else:
    use_tb = False


In [23]:
# print("Training starts now!")
# import torch
from warnings import warn
# torch.backends.cudnn.benchmark = True

optimizer = get_optim(conf.lr * conf.num_gpus * conf.batch_size)
detector, optimizer = amp.initialize(detector, optimizer, opt_level="O1")

start_epoch = 0
end_epoch = 60
for epoch in range(start_epoch, end_epoch):
    write(f'epoch = {epoch}')
    if epoch != 0 and epoch % 10 == 0:
        for param_group in optimizer.param_groups:
            param_group['lr'] /= 10
    
    rez = train_epoch(epoch)
    losses_mean_epoch = rez.mean(axis=0)
    losses_mean_epoch_class = losses_mean_epoch['loss_class']
    losses_mean_epoch_rel = losses_mean_epoch['loss_rel']
    losses_mean_epoch_total = losses_mean_epoch['loss_total']
    write("overall{:2d}: ({:.3f})\n{}".format(epoch, losses_mean_epoch_total, losses_mean_epoch))

    if use_tb:
        writer.add_scalar('loss/rel_loss', losses_mean_epoch_rel, epoch)
        writer.add_scalar('loss/class_loss', losses_mean_epoch_class, epoch)
        writer.add_scalar('loss/total', losses_mean_epoch_total, epoch)

    if conf.save_dir is not None:
        torch.save({
            'epoch': epoch,
            'state_dict': detector.state_dict(), #{k:v for k,v in detector.state_dict().items() if not k.startswith('detector.')},
            # 'optimizer': optimizer.state_dict(),
        }, os.path.join(conf.save_dir, '{}-{}.tar'.format('vgrel', epoch)))

    recall, recall_mp, mean_recall, mean_recall_mp = val_epoch()
    if use_tb:
        for key, value in recall.items():
            writer.add_scalar('eval_' + conf.mode + '_with_constraint/' + key, value, epoch)
        for key, value in recall_mp.items():
            writer.add_scalar('eval_' + conf.mode + '_without_constraint/' + key, value, epoch)
        for key, value in mean_recall.items():
            writer.add_scalar('eval_' + conf.mode + '_with_constraint/mean ' + key, value, epoch)
        for key, value in mean_recall_mp.items():
            writer.add_scalar('eval_' + conf.mode + '_without_constraint/mean ' + key, value, epoch)

        try:
            writer.add_scalar('eval_' + conf.mode + 'loss_class', losses_mean_epoch_class, epoch)
            writer.add_scalar('eval_' + conf.mode + 'loss_rel', losses_mean_epoch_rel, epoch)
            writer.add_scalar('eval_' + conf.mode + 'loss_total', losses_mean_epoch_total, epoch)
        except:
            warn(f'Cannot add loss to writer')
            
    if epoch == 0:
        train_loader.dataset.set_use_cache(True)

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
epoch=0


  0%|                                       | 1/6755 [00:00<1:30:15,  1.25it/s]

---Total norm 7.256 clip coef 0.689-----------------
ggnn_rel_reason.ggnn.fc_output_proj_ont_pred_clean.model.2.linear.weight: 3.247, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_output_proj_img_pred_clean.model.2.linear.weight: 3.123, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_output_proj_ont_pred_clean.model.0.linear.weight: 3.111, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_output_proj_img_pred_clean.model.0.linear.weight: 2.323, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_eq5_w_ont_pred.weight     : 1.781, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_eq5_w_img_pred.weight     : 1.347, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_output_proj_img_pred_clean.model.2.linear.bias: 1.322, (torch.Size([1024]))
ggnn_rel_reason.ggnn.fc_eq5_u_ont_pred.weight     : 1.268, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_output_proj_ont_pred_clean.model.0.linear.bias: 1.161, (torch.Size([1024]))
ggnn_rel_reason.ggnn.fc_eq5_w_ont_pred.bias       : 1.031

 15%|█████▋                                | 1002/6755 [02:56<16:54,  5.67it/s]


e 0b 1000/ 6755 0.177s/batch, 19.9m/epoch
loss_class    0.000000
loss_rel      0.102519
loss_total    0.102519
-----------


 30%|███████████▎                          | 2002/6755 [05:51<14:16,  5.55it/s]


e 0b 2000/ 6755 0.174s/batch, 19.6m/epoch
loss_class    0.000000
loss_rel      0.083669
loss_total    0.083669
-----------


 44%|████████████████▉                     | 3002/6755 [08:45<10:33,  5.92it/s]


e 0b 3000/ 6755 0.175s/batch, 19.7m/epoch
loss_class    0.000000
loss_rel      0.080781
loss_total    0.080781
-----------


 59%|██████████████████████▌               | 4002/6755 [11:40<08:09,  5.62it/s]


e 0b 4000/ 6755 0.174s/batch, 19.6m/epoch
loss_class    0.000000
loss_rel      0.077882
loss_total    0.077882
-----------


 74%|████████████████████████████▏         | 5002/6755 [14:32<04:48,  6.08it/s]


e 0b 5000/ 6755 0.172s/batch, 19.4m/epoch
loss_class    0.000000
loss_rel      0.077006
loss_total    0.077006
-----------


 80%|██████████████████████████████▏       | 5373/6755 [15:33<03:38,  6.33it/s]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 131072.0


 89%|█████████████████████████████████▊    | 6002/6755 [17:16<02:01,  6.20it/s]


e 0b 6000/ 6755 0.164s/batch, 18.5m/epoch
loss_class    0.000000
loss_rel      0.076003
loss_total    0.076003
-----------


100%|██████████████████████████████████████| 6755/6755 [19:20<00:00,  5.82it/s]


overall 0: (0.083)
loss_class    0.000000
loss_rel      0.082767
loss_total    0.082767
dtype: float64


100%|██████████████████████████████████████| 5000/5000 [03:24<00:00, 24.51it/s]

R@20: 0.342099
R@50: 0.401877
R@100: 0.415798
R@20: 0.486882
R@50: 0.682797
R@100: 0.798635


mR@20:  0.2863187368554035
mR@50:  0.33131011335257154
mR@100:  0.34514792088112095


mR@20:  0.34885342444021317
mR@50:  0.5188120486796873
mR@100:  0.6200192716502937
epoch=1



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  0%|                                         | 1/6755 [00:00<35:36,  3.16it/s]

---Total norm 0.287 clip coef 17.432-----------------
roi_fmap.1.0.weight                               : 0.168, (torch.Size([4096, 25088]))
roi_fmap_obj.0.weight                             : 0.111, (torch.Size([4096, 25088]))
roi_fmap_obj.3.weight                             : 0.096, (torch.Size([4096, 4096]))
ggnn_rel_reason.ggnn.fc_output_proj_img_pred_clean.model.0.linear.weight: 0.067, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_output_proj_img_pred_clean.model.2.linear.weight: 0.061, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_output_proj_ont_pred_clean.model.2.linear.weight: 0.060, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_output_proj_ont_pred_clean.model.0.linear.weight: 0.059, (torch.Size([1024, 1024]))
roi_fmap.1.3.weight                               : 0.053, (torch.Size([4096, 4096]))
ggnn_rel_reason.ggnn.fc_eq5_w_img_pred.weight     : 0.048, (torch.Size([1024, 1024]))
ggnn_rel_reason.ggnn.fc_mp_receive_img_pred.model.0.linear.weight: 0.041, (torch.S

 15%|█████▋                                | 1002/6755 [02:44<16:02,  5.98it/s]


e 1b 1000/ 6755 0.164s/batch, 18.5m/epoch
loss_class    0.000000
loss_rel      0.072656
loss_total    0.072656
-----------


 24%|█████████▏                            | 1643/6755 [04:29<13:30,  6.31it/s]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 131072.0


 30%|███████████▎                          | 2002/6755 [05:28<12:52,  6.15it/s]


e 1b 2000/ 6755 0.164s/batch, 18.4m/epoch
loss_class    0.000000
loss_rel      0.073051
loss_total    0.073051
-----------


 39%|██████████████▋                       | 2613/6755 [07:09<11:20,  6.09it/s]


KeyboardInterrupt: 

In [None]:
import os
os.getcwd()