In [1]:
import os
import sys
sys.path.append(os.path.abspath("../Video-Swin-Transformer"))

In [2]:
# Change teh working directory to a location that the code prefers
os.chdir("../Video-Swin-Transformer")

In [3]:
import argparse
import copy
import os.path as osp
import time
import warnings

import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist, set_random_seed
from mmcv.utils import get_git_hash

from mmaction import __version__
from mmaction.apis import train_model
from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.utils import collect_env, get_root_logger, register_module_hooks

In [4]:
!pip3 install wandb -qqq

In [5]:
import wandb
# Log in to your W&B account
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33maswin_thiru[0m (use `wandb login --relogin` to force relogin)


True

In [6]:
wandb_project_name = 'bsl'

In [7]:
# TODO import test functions from mmcv and delete them from mmaction2
try:
    from mmcv.engine import multi_gpu_test, single_gpu_test
except (ImportError, ModuleNotFoundError):
    warnings.warn(
        'DeprecationWarning: single_gpu_test, multi_gpu_test, '
        'collect_results_cpu, collect_results_gpu from mmaction2 will be '
        'deprecated. Please install mmcv through master branch.')
    from mmaction.apis import multi_gpu_test, single_gpu_test

In [8]:
def parse_args(parse_options=None):
    parser = argparse.ArgumentParser(description='Train a recognizer')
    parser.add_argument('config', help='train config file path')
    parser.add_argument('--work-dir', help='the dir to save logs and models')
    parser.add_argument(
        '--resume-from', help='the checkpoint file to resume from')
    parser.add_argument(
        '--load-from', help='the checkpoint file to load from')
    parser.add_argument(
        '--validate',
        action='store_true',
        help='whether to evaluate the checkpoint during training')
    parser.add_argument(
        '--test-last',
        action='store_true',
        help='whether to test the checkpoint after training')
    parser.add_argument(
        '--test-best',
        action='store_true',
        help=('whether to test the best checkpoint (if applicable) after '
              'training'))
    group_gpus = parser.add_mutually_exclusive_group()
    group_gpus.add_argument(
        '--gpus',
        type=int,
        help='number of gpus to use '
        '(only applicable to non-distributed training)')
    group_gpus.add_argument(
        '--gpu-ids',
        type=int,
        nargs='+',
        help='ids of gpus to use '
        '(only applicable to non-distributed training)')
    parser.add_argument('--seed', type=int, default=None, help='random seed')
    parser.add_argument(
        '--deterministic',
        action='store_true',
        help='whether to set deterministic options for CUDNN backend.')
    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        default={},
        help='override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file. For example, '
        "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
    parser.add_argument(
        '--launcher',
        choices=['none', 'pytorch', 'slurm', 'mpi'],
        default='none',
        help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)
    
    if parse_options is None: 
        args = parser.parse_args()
    else:
        args = parser.parse_args(parse_options)
        
    if 'LOCAL_RANK' not in os.environ:
        os.environ['LOCAL_RANK'] = str(args.local_rank)

    return args

In [9]:
# Setup the cofiguration and data file
config_file = '../configs/bsl_config.py'
check_point_file = '../configs/swin_tiny_patch244_window877_kinetics400_1k.pth'
# , "model.backbone.pretrained="+check_point_file
# cmd_options = [config_file, "--cfg-options", "model.backbone.use_checkpoint=True", "--load-from", check_point_file,
#                "--seed", "12345"]
cmd_options = [config_file, "--cfg-options", "model.backbone.use_checkpoint=True", "--load-from", check_point_file,
              "--validate", "--seed", "12345"]

In [10]:
distributed = False

In [11]:
# Create a configuration object that describes the training and testing
args = parse_args(cmd_options)
cfg = Config.fromfile(args.config)
cfg.merge_from_dict(args.cfg_options)

# Customization for training the BSL data set
# https://mmcv.readthedocs.io/en/latest/_modules/mmcv/runner/epoch_based_runner.html
cfg.workflow = [('train', 1), ('val', 1)]
# cfg.workflow = [('train', 1), ]
cfg.model.cls_head.num_classes = 5

# Resume from this pyhton checkpoint file
cfg.resume_from = args.resume_from
cfg.load_from = args.load_from

# One GPU
cfg.gpu_ids = range(1)

# The flag is used to determine whether it is omnisource training
# Omnisource reference: https://arxiv.org/abs/2003.13042
cfg.setdefault('omnisource', False)

# The flag is used to register module's hooks
cfg.setdefault('module_hooks', [])

# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

# dump config
cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))

# init logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
            dash_line)
meta['env_info'] = env_info

# log some basic info
logger.info(f'Distributed training: {distributed}')
logger.info(f'Config: {cfg.pretty_text}')

# Set seed for training
logger.info(f'Set random seed to {args.seed}, '
            f'deterministic: {args.deterministic}')
set_random_seed(args.seed, deterministic=args.deterministic)

cfg.seed = args.seed
meta['seed'] = args.seed
meta['config_name'] = osp.basename(args.config)
meta['work_dir'] = osp.basename(cfg.work_dir.rstrip('/\\'))

../Video-Swin-Transformer/configs/_base_/models/swin/swin_tiny.py
../Video-Swin-Transformer/configs/_base_/default_runtime.py


2022-03-23 22:44:10,796 - mmaction - INFO - Environment info:
------------------------------------------------------------
sys.platform: linux
Python: 3.8.12 | packaged by conda-forge | (default, Oct 12 2021, 21:59:51) [GCC 9.4.0]
CUDA available: True
GPU 0: Tesla T4
CUDA_HOME: /usr/local/cuda
NVCC: Build cuda_11.6.r11.6/compiler.30794723_0
GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
PyTorch: 1.11.0a0+17540c5
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.3.3 (Git Hash N/A)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX512
  - CUDA Runtime 11.6
  - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode

In [12]:
# Create the dataset
datasets = [build_dataset(cfg.data.train)]

# Validation is setup as a hook that kicks off every 5 iterations
# This is not required
if 1:
    # Create the validation dataset
    val_dataset = copy.deepcopy(cfg.data.val)
    datasets.append(build_dataset(val_dataset))

In [13]:
for batch in val_dataset:
    break

In [14]:
# Which model to test after training, best or last?
test_option = dict(test_last=args.test_last, test_best=args.test_best)

In [15]:
# Build the model for 
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


# Train the model

In [16]:
import apex
from mmaction.core import DistEvalHook, EvalHook
from mmaction.datasets import build_dataloader, build_dataset
from mmcv_custom.runner import EpochBasedRunnerAmp
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, EpochBasedRunner, OptimizerHook, build_optimizer, get_dist_info

In [17]:
# Get the root logger
logger = get_root_logger(log_level=cfg.log_level)


# Load the data using the GPU
dataloader_setting = dict(
    videos_per_gpu=cfg.data.get('videos_per_gpu', 1) // cfg.optimizer_config.get('update_interval', 1),
    workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
    num_gpus=len(cfg.gpu_ids),
    dist=distributed,
    seed=cfg.seed)

# 
dataloader_setting = dict(dataloader_setting, **cfg.data.get('train_dataloader', {}))
data_loaders = [build_dataloader(ds, **dataloader_setting) for ds in datasets]

# 
val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
dataloader_setting = dict(
    videos_per_gpu=cfg.data.get('videos_per_gpu', 1),
    workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
    # cfg.gpus will be ignored if distributed
    num_gpus=len(cfg.gpu_ids),
    dist=distributed,
    shuffle=False)

dataloader_setting = dict(dataloader_setting, **cfg.data.get('val_dataloader', {}))
val_dataloader = build_dataloader(val_dataset, **dataloader_setting)

In [18]:
# build optimizer
optimizer = build_optimizer(model, cfg.optimizer)
model, optimizer = apex.amp.initialize(model.cuda(), optimizer, opt_level="O1")

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


In [19]:
for m in model.modules():
    if hasattr(m, "fp16_enabled"):
        m.fp16_enabled = True

In [20]:
# Put the model on GPU's for training
if distributed:
    find_unused_parameters = cfg.get('find_unused_parameters', False)
    # Sets the `find_unused_parameters` parameter in
    # torch.nn.parallel.DistributedDataParallel
    model = MMDistributedDataParallel(
        model.cuda(),
        device_ids=[torch.cuda.current_device()],
        broadcast_buffers=False,
        find_unused_parameters=find_unused_parameters)
else:
    model = MMDataParallel(
        model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)

In [21]:
# Create the class that will run the code 
Runner = EpochBasedRunnerAmp
runner = Runner(model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)

# an ugly workaround to make .log and .log.json filenames the same
runner.timestamp = timestamp

# 
optimizer_config = cfg.optimizer_config

# register hooks
runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None))

if distributed:
    runner.register_hook(DistSamplerSeedHook())

In [22]:
# 
eval_cfg = cfg.get('evaluation', {})
eval_hook = DistEvalHook if distributed else EvalHook
runner.register_hook(eval_hook(val_dataloader, **eval_cfg))

In [23]:
if cfg.resume_from:
    runner.resume(cfg.resume_from, resume_amp=use_amp)
elif cfg.get("auto_resume", False) and osp.exists(osp.join(runner.work_dir, 'latest.pth')):
    runner.auto_resume()
elif cfg.load_from:
    runner.load_checkpoint(cfg.load_from)

2022-03-23 22:44:13,310 - mmaction - INFO - load checkpoint from local path: ../configs/swin_tiny_patch244_window877_kinetics400_1k.pth

size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 768]) from checkpoint, the shape in current model is torch.Size([5, 768]).
size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([5]).


## Dashboarding using wandb

In [24]:
wandb.init(project=wandb_project_name, config=cfg)

In [25]:
from mmcv.runner import Hook
from torch.utils.data import DataLoader
from mmaction.apis import single_gpu_test


class WandBHook(Hook):  # noqa: F811
    """Non-Distributed evaluation hook.

    Notes:
        If new arguments are added for EvalHook, tools/test.py,
        tools/eval_metric.py may be effected.

    This hook will regularly perform evaluation in a given interval when
    performing in non-distributed environment.

    Args:
        dataloader (DataLoader): A PyTorch dataloader.
        wandb_obj: A wandb object
        optimizer_obj: optimizer object
        **eval_kwargs: Evaluation arguments fed into the evaluate function
            of the dataset.
    """

    def __init__(self,
                 dataloader,
                 wandb_obj,
                 optimizer_obj,
                 **eval_kwargs):

        if not isinstance(dataloader, DataLoader):
            raise TypeError(f'dataloader must be a pytorch DataLoader, '
                            f'but got {type(dataloader)}')

        self.dataloader = dataloader
        self.wandb = wandb
        self.eval_kwargs = eval_kwargs
    
    def before_train_epoch(self, runner):
        """Called after every train epoch to save learning rate"""
        self.wandb.log({"lr": optimizer.param_groups[0]['lr']})

    def after_val_epoch(self, runner):
        """Called after every validation epoch to evaluate the results."""
        self._do_evaluate(runner)

    def _do_evaluate(self, runner):
        results = single_gpu_test(runner.model, self.dataloader)
        eval_res = self.dataloader.dataset.evaluate(results, logger=runner.logger, **self.eval_kwargs)
        self.wandb.log(eval_res)

In [26]:
runner.register_hook(WandBHook(val_dataloader, wandb, optimizer))

In [27]:
runner_kwargs = dict()
runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs)

2022-03-23 22:44:15,147 - mmaction - INFO - Start running, host: root@ip-10-0-0-32, work_dir: /workspace/Video-Swin-Transformer/work_dirs/k400_swin_tiny_patch244_window877.py
2022-03-23 22:44:15,148 - mmaction - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) CosineAnnealingLrUpdaterHook       
(ABOVE_NORMAL) DistOptimizerHook                  
(NORMAL      ) CheckpointHook                     
(NORMAL      ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) CosineAnnealingLrUpdaterHook       
(NORMAL      ) EvalHook                           
(NORMAL      ) WandBHook                          
(LOW         ) IterTimerHook                      
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_iter:
(VERY_HIGH   ) CosineAnnealingLrUpdaterHook       
(NORMAL      ) EvalHook                           
(LOW         ) IterTimerH

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 36s, ETA:     0s

2022-03-23 22:46:24,816 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 22:46:24,818 - mmaction - INFO - 
top1_acc	0.3390
top5_acc	1.0000
2022-03-23 22:46:24,820 - mmaction - INFO - Epoch(val) [1][30]	top1_acc: 0.3390, top5_acc: 1.0000, loss_cls: 1.5369, loss: 1.5369
2022-03-23 22:46:41,535 - mmaction - INFO - Epoch [2][20/109]	lr: 5.213e-05, eta: 0:35:36, time: 0.836, data_time: 0.174, memory: 2832, top1_acc: 0.5250, top5_acc: 1.0000, loss_cls: 1.4515, loss: 1.4515


Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


2022-03-23 22:46:54,721 - mmaction - INFO - Epoch [2][40/109]	lr: 5.872e-05, eta: 0:35:14, time: 0.659, data_time: 0.001, memory: 2833, top1_acc: 0.5250, top5_acc: 1.0000, loss_cls: 1.3718, loss: 1.3718
2022-03-23 22:47:07,965 - mmaction - INFO - Epoch [2][60/109]	lr: 6.531e-05, eta: 0:34:54, time: 0.662, data_time: 0.001, memory: 2833, top1_acc: 0.5250, top5_acc: 1.0000, loss_cls: 1.3100, loss: 1.3100
2022-03-23 22:47:21,255 - mmaction - INFO - Epoch [2][80/109]	lr: 7.189e-05, eta: 0:34:37, time: 0.664, data_time: 0.001, memory: 2833, top1_acc: 0.5250, top5_acc: 1.0000, loss_cls: 1.2520, loss: 1.2520
2022-03-23 22:47:34,569 - mmaction - INFO - Epoch [2][100/109]	lr: 7.848e-05, eta: 0:34:21, time: 0.666, data_time: 0.001, memory: 2833, top1_acc: 0.4500, top5_acc: 1.0000, loss_cls: 1.2875, loss: 1.2875
2022-03-23 22:47:40,322 - mmaction - INFO - Saving checkpoint at 2 epochs


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 22:48:35,115 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 22:48:35,116 - mmaction - INFO - 
top1_acc	0.6949
top5_acc	1.0000
2022-03-23 22:48:35,118 - mmaction - INFO - Epoch(val) [2][30]	top1_acc: 0.6949, top5_acc: 1.0000, loss_cls: 0.8413, loss: 0.8413
2022-03-23 22:48:52,277 - mmaction - INFO - Epoch [3][20/109]	lr: 8.731e-05, eta: 0:33:31, time: 0.858, data_time: 0.193, memory: 2833, top1_acc: 0.7500, top5_acc: 1.0000, loss_cls: 0.8277, loss: 0.8277
2022-03-23 22:49:05,539 - mmaction - INFO - Epoch [3][40/109]	lr: 9.384e-05, eta: 0:33:18, time: 0.663, data_time: 0.001, memory: 2835, top1_acc: 0.6250, top5_acc: 1.0000, loss_cls: 1.0233, loss: 1.0233
2022-03-23 22:49:18,825 - mmaction - INFO - Epoch [3][60/109]	lr: 9.874e-05, eta: 0:33:05, time: 0.664, data_time: 0.001, memory: 2835, top1_acc: 0.6750, top5_acc: 1.0000, loss_cls: 0.8775, loss: 0.8775
2022-03-23 22:49:32,121 - mmaction - INFO - Epoch [3][80/109]	lr: 9.874e-05, eta: 0:32:52, time: 0.665, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 22:50:44,624 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 22:50:44,625 - mmaction - INFO - 
top1_acc	0.5424
top5_acc	1.0000
2022-03-23 22:50:44,627 - mmaction - INFO - Epoch(val) [3][30]	top1_acc: 0.5424, top5_acc: 1.0000, loss_cls: 0.9605, loss: 0.9605
2022-03-23 22:51:02,111 - mmaction - INFO - Epoch [4][20/109]	lr: 9.755e-05, eta: 0:32:05, time: 0.874, data_time: 0.211, memory: 2835, top1_acc: 0.5250, top5_acc: 1.0000, loss_cls: 1.0704, loss: 1.0704
2022-03-23 22:51:15,372 - mmaction - INFO - Epoch [4][40/109]	lr: 9.755e-05, eta: 0:31:52, time: 0.663, data_time: 0.000, memory: 2835, top1_acc: 0.7000, top5_acc: 1.0000, loss_cls: 0.9004, loss: 0.9004
2022-03-23 22:51:28,662 - mmaction - INFO - Epoch [4][60/109]	lr: 9.755e-05, eta: 0:31:40, time: 0.664, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.6199, loss: 0.6199
2022-03-23 22:51:41,971 - mmaction - INFO - Epoch [4][80/109]	lr: 9.755e-05, eta: 0:31:28, time: 0.665, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 22:52:53,914 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 22:52:53,915 - mmaction - INFO - 
top1_acc	0.7627
top5_acc	1.0000
2022-03-23 22:52:53,917 - mmaction - INFO - Epoch(val) [4][30]	top1_acc: 0.7627, top5_acc: 1.0000, loss_cls: 0.4756, loss: 0.4756
2022-03-23 22:53:11,701 - mmaction - INFO - Epoch [5][20/109]	lr: 9.568e-05, eta: 0:30:48, time: 0.889, data_time: 0.225, memory: 2835, top1_acc: 0.7500, top5_acc: 1.0000, loss_cls: 0.6702, loss: 0.6702
2022-03-23 22:53:24,977 - mmaction - INFO - Epoch [5][40/109]	lr: 9.568e-05, eta: 0:30:36, time: 0.664, data_time: 0.001, memory: 2835, top1_acc: 0.6750, top5_acc: 1.0000, loss_cls: 0.9032, loss: 0.9032
2022-03-23 22:53:38,273 - mmaction - INFO - Epoch [5][60/109]	lr: 9.568e-05, eta: 0:30:24, time: 0.665, data_time: 0.000, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.3957, loss: 0.3957
2022-03-23 22:53:51,594 - mmaction - INFO - Epoch [5][80/109]	lr: 9.568e-05, eta: 0:30:11, time: 0.666, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 22:54:49,059 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 22:54:49,061 - mmaction - INFO - 
top1_acc	0.8305
top5_acc	1.0000
2022-03-23 22:54:49,061 - mmaction - INFO - Evaluating mean_class_accuracy ...
2022-03-23 22:54:49,062 - mmaction - INFO - 
mean_acc	0.8462
2022-03-23 22:54:50,079 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_5.pth.
2022-03-23 22:54:50,080 - mmaction - INFO - Best top1_acc is 0.8305 at 5 epoch.
2022-03-23 22:54:50,080 - mmaction - INFO - Epoch(val) [5][59]	top1_acc: 0.8305, top5_acc: 1.0000, mean_class_accuracy: 0.8462


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 22:55:41,899 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 22:55:41,900 - mmaction - INFO - 
top1_acc	0.8305
top5_acc	1.0000
2022-03-23 22:55:41,903 - mmaction - INFO - Epoch(val) [5][30]	top1_acc: 0.8305, top5_acc: 1.0000, loss_cls: 0.4029, loss: 0.4029
2022-03-23 22:55:58,564 - mmaction - INFO - Epoch [6][20/109]	lr: 9.330e-05, eta: 0:29:28, time: 0.833, data_time: 0.170, memory: 2835, top1_acc: 0.7250, top5_acc: 1.0000, loss_cls: 0.7197, loss: 0.7197
2022-03-23 22:56:11,798 - mmaction - INFO - Epoch [6][40/109]	lr: 9.330e-05, eta: 0:29:16, time: 0.662, data_time: 0.000, memory: 2835, top1_acc: 0.7500, top5_acc: 1.0000, loss_cls: 0.5926, loss: 0.5926
2022-03-23 22:56:25,073 - mmaction - INFO - Epoch [6][60/109]	lr: 9.330e-05, eta: 0:29:04, time: 0.664, data_time: 0.000, memory: 2835, top1_acc: 0.7750, top5_acc: 1.0000, loss_cls: 0.5243, loss: 0.5243
2022-03-23 22:56:38,374 - mmaction - INFO - Epoch [6][80/109]	lr: 9.330e-05, eta: 0:28:52, time: 0.665, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 22:57:50,300 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 22:57:50,301 - mmaction - INFO - 
top1_acc	0.8136
top5_acc	1.0000
2022-03-23 22:57:50,303 - mmaction - INFO - Epoch(val) [6][30]	top1_acc: 0.8136, top5_acc: 1.0000, loss_cls: 0.4516, loss: 0.4516
2022-03-23 22:58:07,628 - mmaction - INFO - Epoch [7][20/109]	lr: 9.045e-05, eta: 0:28:14, time: 0.866, data_time: 0.203, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.4370, loss: 0.4370
2022-03-23 22:58:20,905 - mmaction - INFO - Epoch [7][40/109]	lr: 9.045e-05, eta: 0:28:02, time: 0.664, data_time: 0.001, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.2992, loss: 0.2992
2022-03-23 22:58:34,196 - mmaction - INFO - Epoch [7][60/109]	lr: 9.045e-05, eta: 0:27:49, time: 0.665, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.3754, loss: 0.3754
2022-03-23 22:58:47,497 - mmaction - INFO - Epoch [7][80/109]	lr: 9.045e-05, eta: 0:27:37, time: 0.665, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 38s, ETA:     0s

2022-03-23 23:00:00,707 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 23:00:00,708 - mmaction - INFO - 
top1_acc	0.8983
top5_acc	1.0000
2022-03-23 23:00:00,711 - mmaction - INFO - Epoch(val) [7][30]	top1_acc: 0.8983, top5_acc: 1.0000, loss_cls: 0.3461, loss: 0.3461
2022-03-23 23:00:18,143 - mmaction - INFO - Epoch [8][20/109]	lr: 8.716e-05, eta: 0:27:01, time: 0.871, data_time: 0.206, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.4950, loss: 0.4950
2022-03-23 23:00:31,414 - mmaction - INFO - Epoch [8][40/109]	lr: 8.716e-05, eta: 0:26:49, time: 0.664, data_time: 0.000, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.4405, loss: 0.4405
2022-03-23 23:00:44,718 - mmaction - INFO - Epoch [8][60/109]	lr: 8.716e-05, eta: 0:26:36, time: 0.665, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.3251, loss: 0.3251
2022-03-23 23:00:58,028 - mmaction - INFO - Epoch [8][80/109]	lr: 8.716e-05, eta: 0:26:24, time: 0.665, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 23:02:12,102 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 23:02:12,103 - mmaction - INFO - 
top1_acc	0.8136
top5_acc	1.0000
2022-03-23 23:02:12,105 - mmaction - INFO - Epoch(val) [8][30]	top1_acc: 0.8136, top5_acc: 1.0000, loss_cls: 0.5509, loss: 0.5509
2022-03-23 23:02:28,822 - mmaction - INFO - Epoch [9][20/109]	lr: 8.346e-05, eta: 0:25:47, time: 0.836, data_time: 0.172, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.3813, loss: 0.3813
2022-03-23 23:02:42,080 - mmaction - INFO - Epoch [9][40/109]	lr: 8.346e-05, eta: 0:25:35, time: 0.663, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.3377, loss: 0.3377
2022-03-23 23:02:55,379 - mmaction - INFO - Epoch [9][60/109]	lr: 8.346e-05, eta: 0:25:22, time: 0.665, data_time: 0.000, memory: 2835, top1_acc: 0.7500, top5_acc: 1.0000, loss_cls: 0.6669, loss: 0.6669
2022-03-23 23:03:08,695 - mmaction - INFO - Epoch [9][80/109]	lr: 8.346e-05, eta: 0:25:10, time: 0.666, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 23:04:22,058 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 23:04:22,059 - mmaction - INFO - 
top1_acc	0.7797
top5_acc	1.0000
2022-03-23 23:04:22,061 - mmaction - INFO - Epoch(val) [9][30]	top1_acc: 0.7797, top5_acc: 1.0000, loss_cls: 0.6307, loss: 0.6307


Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


2022-03-23 23:04:38,796 - mmaction - INFO - Epoch [10][20/109]	lr: 7.939e-05, eta: 0:24:34, time: 0.837, data_time: 0.174, memory: 2835, top1_acc: 0.7750, top5_acc: 1.0000, loss_cls: 0.5887, loss: 0.5887
2022-03-23 23:04:52,058 - mmaction - INFO - Epoch [10][40/109]	lr: 7.939e-05, eta: 0:24:21, time: 0.663, data_time: 0.000, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.2796, loss: 0.2796
2022-03-23 23:05:05,339 - mmaction - INFO - Epoch [10][60/109]	lr: 7.939e-05, eta: 0:24:09, time: 0.664, data_time: 0.000, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.3674, loss: 0.3674
2022-03-23 23:05:18,647 - mmaction - INFO - Epoch [10][80/109]	lr: 7.939e-05, eta: 0:23:57, time: 0.665, data_time: 0.000, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.4119, loss: 0.4119
2022-03-23 23:05:31,971 - mmaction - INFO - Epoch [10][100/109]	lr: 7.939e-05, eta: 0:23:44, time: 0.666, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 23:06:15,751 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 23:06:15,752 - mmaction - INFO - 
top1_acc	0.8136
top5_acc	1.0000
2022-03-23 23:06:15,753 - mmaction - INFO - Evaluating mean_class_accuracy ...
2022-03-23 23:06:15,754 - mmaction - INFO - 
mean_acc	0.8244
2022-03-23 23:06:15,755 - mmaction - INFO - Epoch(val) [10][59]	top1_acc: 0.8136, top5_acc: 1.0000, mean_class_accuracy: 0.8244


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 37s, ETA:     0s

2022-03-23 23:07:07,418 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 23:07:07,419 - mmaction - INFO - 
top1_acc	0.8136
top5_acc	1.0000
2022-03-23 23:07:07,421 - mmaction - INFO - Epoch(val) [10][30]	top1_acc: 0.8136, top5_acc: 1.0000, loss_cls: 0.5244, loss: 0.5244
2022-03-23 23:07:23,562 - mmaction - INFO - Epoch [11][20/109]	lr: 7.500e-05, eta: 0:23:20, time: 0.807, data_time: 0.143, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.4174, loss: 0.4174
2022-03-23 23:07:36,796 - mmaction - INFO - Epoch [11][40/109]	lr: 7.500e-05, eta: 0:23:08, time: 0.662, data_time: 0.000, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.2417, loss: 0.2417
2022-03-23 23:07:50,051 - mmaction - INFO - Epoch [11][60/109]	lr: 7.500e-05, eta: 0:22:55, time: 0.663, data_time: 0.000, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.3538, loss: 0.3538
2022-03-23 23:08:03,318 - mmaction - INFO - Epoch [11][80/109]	lr: 7.500e-05, eta: 0:22:43, time: 0.663, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.6 task/s, elapsed: 36s, ETA:     0s

2022-03-23 23:09:15,595 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 23:09:15,596 - mmaction - INFO - 
top1_acc	0.7627
top5_acc	1.0000
2022-03-23 23:09:15,598 - mmaction - INFO - Epoch(val) [11][30]	top1_acc: 0.7627, top5_acc: 1.0000, loss_cls: 0.7005, loss: 0.7005
2022-03-23 23:09:32,601 - mmaction - INFO - Epoch [12][20/109]	lr: 7.034e-05, eta: 0:22:08, time: 0.850, data_time: 0.186, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.4679, loss: 0.4679
2022-03-23 23:09:45,884 - mmaction - INFO - Epoch [12][40/109]	lr: 7.034e-05, eta: 0:21:56, time: 0.664, data_time: 0.000, memory: 2835, top1_acc: 0.9500, top5_acc: 1.0000, loss_cls: 0.2493, loss: 0.2493
2022-03-23 23:09:59,231 - mmaction - INFO - Epoch [12][60/109]	lr: 7.034e-05, eta: 0:21:44, time: 0.667, data_time: 0.000, memory: 2835, top1_acc: 0.7250, top5_acc: 1.0000, loss_cls: 0.4796, loss: 0.4796
2022-03-23 23:10:12,622 - mmaction - INFO - Epoch [12][80/109]	lr: 7.034e-05, eta: 0:21:31, time: 0.670, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.7 task/s, elapsed: 34s, ETA:     0s

2022-03-23 23:11:21,451 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-23 23:11:21,452 - mmaction - INFO - 
top1_acc	0.7797
top5_acc	1.0000
2022-03-23 23:11:21,454 - mmaction - INFO - Epoch(val) [12][30]	top1_acc: 0.7797, top5_acc: 1.0000, loss_cls: 0.5825, loss: 0.5825
2022-03-23 23:11:38,229 - mmaction - INFO - Epoch [13][20/109]	lr: 6.545e-05, eta: 0:20:57, time: 0.838, data_time: 0.161, memory: 2835, top1_acc: 0.7750, top5_acc: 1.0000, loss_cls: 0.4970, loss: 0.4970
2022-03-23 23:11:51,777 - mmaction - INFO - Epoch [13][40/109]	lr: 6.545e-05, eta: 0:20:45, time: 0.677, data_time: 0.000, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.3825, loss: 0.3825
2022-03-23 23:12:05,406 - mmaction - INFO - Epoch [13][60/109]	lr: 6.545e-05, eta: 0:20:33, time: 0.681, data_time: 0.000, memory: 2835, top1_acc: 0.9750, top5_acc: 1.0000, loss_cls: 0.1108, loss: 0.1108
2022-03-23 23:12:19,111 - mmaction - INFO - Epoch [13][80/109]	lr: 6.545e-05, eta: 0:20:21, time: 0.685, d

[>>>>>                           ] 11/59, 1.0 task/s, elapsed: 11s, ETA:    50s

KeyboardInterrupt: 

In [None]:
wandb.finish()

## Unused

In [None]:
# # init distributed env first, since logger depends on the dist info.
# if args.launcher == 'none':
#     distributed = False
# else:
#     distributed = True
#     init_dist(args.launcher, **cfg.dist_params)
#     _, world_size = get_dist_info()
#     cfg.gpu_ids = range(world_size)


