In [1]:
import os
import sys
sys.path.append(os.path.abspath("../Video-Swin-Transformer"))

In [2]:
# Change teh working directory to a location that the code prefers
os.chdir("../Video-Swin-Transformer")

In [3]:
import argparse
import copy
import os.path as osp
import time
import warnings

import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist, set_random_seed
from mmcv.utils import get_git_hash

from mmaction import __version__
from mmaction.apis import train_model
from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.utils import collect_env, get_root_logger, register_module_hooks

In [4]:
!pip3 install wandb -qqq

In [5]:
import wandb
# Log in to your W&B account
wandb.login()

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [6]:
wandb_project_name = 'lamba_bsl'

In [7]:
# TODO import test functions from mmcv and delete them from mmaction2
try:
    from mmcv.engine import multi_gpu_test, single_gpu_test
except (ImportError, ModuleNotFoundError):
    warnings.warn(
        'DeprecationWarning: single_gpu_test, multi_gpu_test, '
        'collect_results_cpu, collect_results_gpu from mmaction2 will be '
        'deprecated. Please install mmcv through master branch.')
    from mmaction.apis import multi_gpu_test, single_gpu_test

In [8]:
def parse_args(parse_options=None):
    parser = argparse.ArgumentParser(description='Train a recognizer')
    parser.add_argument('config', help='train config file path')
    parser.add_argument('--work-dir', help='the dir to save logs and models')
    parser.add_argument(
        '--resume-from', help='the checkpoint file to resume from')
    parser.add_argument(
        '--load-from', help='the checkpoint file to load from')
    parser.add_argument(
        '--validate',
        action='store_true',
        help='whether to evaluate the checkpoint during training')
    parser.add_argument(
        '--test-last',
        action='store_true',
        help='whether to test the checkpoint after training')
    parser.add_argument(
        '--test-best',
        action='store_true',
        help=('whether to test the best checkpoint (if applicable) after '
              'training'))
    group_gpus = parser.add_mutually_exclusive_group()
    group_gpus.add_argument(
        '--gpus',
        type=int,
        help='number of gpus to use '
        '(only applicable to non-distributed training)')
    group_gpus.add_argument(
        '--gpu-ids',
        type=int,
        nargs='+',
        help='ids of gpus to use '
        '(only applicable to non-distributed training)')
    parser.add_argument('--seed', type=int, default=None, help='random seed')
    parser.add_argument(
        '--deterministic',
        action='store_true',
        help='whether to set deterministic options for CUDNN backend.')
    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        default={},
        help='override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file. For example, '
        "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
    parser.add_argument(
        '--launcher',
        choices=['none', 'pytorch', 'slurm', 'mpi'],
        default='none',
        help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)
    
    if parse_options is None: 
        args = parser.parse_args()
    else:
        args = parser.parse_args(parse_options)
        
    if 'LOCAL_RANK' not in os.environ:
        os.environ['LOCAL_RANK'] = str(args.local_rank)

    return args

In [9]:
# Setup the cofiguration and data file
config_file = '../configs/bsl_config.py'
check_point_file = '../configs/swin_tiny_patch244_window877_kinetics400_1k.pth'
# , "model.backbone.pretrained="+check_point_file
# cmd_options = [config_file, "--cfg-options", "model.backbone.use_checkpoint=True", "--load-from", check_point_file,
#                "--seed", "12345"]
cmd_options = [config_file, "--cfg-options", "model.backbone.use_checkpoint=True", "--load-from", check_point_file,
              "--validate", "--seed", "12345"]

In [10]:
distributed = False

In [11]:
# Create a configuration object that describes the training and testing
args = parse_args(cmd_options)
cfg = Config.fromfile(args.config)
cfg.merge_from_dict(args.cfg_options)

# Customization for training the BSL data set
# https://mmcv.readthedocs.io/en/latest/_modules/mmcv/runner/epoch_based_runner.html
cfg.workflow = [('train', 1), ('val', 1)]
# cfg.workflow = [('train', 1), ]
cfg.model.cls_head.num_classes = 5

# Resume from this pyhton checkpoint file
cfg.resume_from = args.resume_from
cfg.load_from = args.load_from

# One GPU
cfg.gpu_ids = range(1)

# The flag is used to determine whether it is omnisource training
# Omnisource reference: https://arxiv.org/abs/2003.13042
cfg.setdefault('omnisource', False)

# The flag is used to register module's hooks
cfg.setdefault('module_hooks', [])

# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

# dump config
cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))

# init logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
            dash_line)
meta['env_info'] = env_info

# log some basic info
logger.info(f'Distributed training: {distributed}')
logger.info(f'Config: {cfg.pretty_text}')

# Set seed for training
logger.info(f'Set random seed to {args.seed}, '
            f'deterministic: {args.deterministic}')
set_random_seed(args.seed, deterministic=args.deterministic)

cfg.seed = args.seed
meta['seed'] = args.seed
meta['config_name'] = osp.basename(args.config)
meta['work_dir'] = osp.basename(cfg.work_dir.rstrip('/\\'))

../Video-Swin-Transformer/configs/_base_/models/swin/swin_tiny.py
../Video-Swin-Transformer/configs/_base_/default_runtime.py


2022-04-10 21:08:25,106 - mmaction - INFO - Environment info:
------------------------------------------------------------
sys.platform: linux
Python: 3.8.12 | packaged by conda-forge | (default, Oct 12 2021, 21:59:51) [GCC 9.4.0]
CUDA available: True
GPU 0,1: NVIDIA GeForce RTX 3090
CUDA_HOME: /usr/local/cuda
NVCC: Build cuda_11.6.r11.6/compiler.30794723_0
GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
PyTorch: 1.11.0a0+17540c5
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.3.3 (Git Hash N/A)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX512
  - CUDA Runtime 11.6
  - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,co

In [12]:
# Create the dataset
datasets = [build_dataset(cfg.data.train)]

# Validation is setup as a hook that kicks off every 5 iterations
# This is required for wandb to kickk off every iteration
if 1:
    # Create the validation dataset
    val_dataset = copy.deepcopy(cfg.data.val)
    datasets.append(build_dataset(val_dataset))

In [17]:
# for num, batch in enumerate(datasets[0]):
#     print(num, batch['imgs'].shape)

0 torch.Size([1, 3, 32, 224, 224])
1 torch.Size([1, 3, 32, 224, 224])
2 torch.Size([1, 3, 32, 224, 224])
3 torch.Size([1, 3, 32, 224, 224])
4 torch.Size([1, 3, 32, 224, 224])
5 torch.Size([1, 3, 32, 224, 224])
6 torch.Size([1, 3, 32, 224, 224])
7 torch.Size([1, 3, 32, 224, 224])
8 torch.Size([1, 3, 32, 224, 224])
9 torch.Size([1, 3, 32, 224, 224])
10 torch.Size([1, 3, 32, 224, 224])
11 torch.Size([1, 3, 32, 224, 224])
12 torch.Size([1, 3, 32, 224, 224])
13 torch.Size([1, 3, 32, 224, 224])
14 torch.Size([1, 3, 32, 224, 224])
15 torch.Size([1, 3, 32, 224, 224])
16 torch.Size([1, 3, 32, 224, 224])
17 torch.Size([1, 3, 32, 224, 224])
18 torch.Size([1, 3, 32, 224, 224])
19 torch.Size([1, 3, 32, 224, 224])
20 torch.Size([1, 3, 32, 224, 224])
21 torch.Size([1, 3, 32, 224, 224])
22 torch.Size([1, 3, 32, 224, 224])
23 torch.Size([1, 3, 32, 224, 224])
24 torch.Size([1, 3, 32, 224, 224])
25 torch.Size([1, 3, 32, 224, 224])
26 torch.Size([1, 3, 32, 224, 224])
27 torch.Size([1, 3, 32, 224, 224])
28

In [24]:
# Which model to test after training, best or last?
test_option = dict(test_last=args.test_last, test_best=args.test_best)

In [25]:
# Build the model for 
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


# Train the model

In [26]:
import apex
from mmaction.core import DistEvalHook, EvalHook
from mmaction.datasets import build_dataloader, build_dataset
from mmcv_custom.runner import EpochBasedRunnerAmp
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, EpochBasedRunner, OptimizerHook, build_optimizer, get_dist_info

In [27]:
# Get the root logger
logger = get_root_logger(log_level=cfg.log_level)


# Load the data using the GPU
dataloader_setting = dict(
    videos_per_gpu=cfg.data.get('videos_per_gpu', 1) // cfg.optimizer_config.get('update_interval', 1),
    workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
    num_gpus=len(cfg.gpu_ids),
    dist=distributed,
    seed=cfg.seed)

# 
dataloader_setting = dict(dataloader_setting, **cfg.data.get('train_dataloader', {}))
data_loaders = [build_dataloader(ds, **dataloader_setting) for ds in datasets]

# 
val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
dataloader_setting = dict(
    videos_per_gpu=cfg.data.get('videos_per_gpu', 1),
    workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
    # cfg.gpus will be ignored if distributed
    num_gpus=len(cfg.gpu_ids),
    dist=distributed,
    shuffle=False)

dataloader_setting = dict(dataloader_setting, **cfg.data.get('val_dataloader', {}))
val_dataloader = build_dataloader(val_dataset, **dataloader_setting)

In [28]:
# build optimizer
optimizer = build_optimizer(model, cfg.optimizer)
model, optimizer = apex.amp.initialize(model.cuda(), optimizer, opt_level="O1")

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


In [29]:
for m in model.modules():
    if hasattr(m, "fp16_enabled"):
        m.fp16_enabled = True

In [30]:
# Put the model on GPU's for training
if distributed:
    find_unused_parameters = cfg.get('find_unused_parameters', False)
    # Sets the `find_unused_parameters` parameter in
    # torch.nn.parallel.DistributedDataParallel
    model = MMDistributedDataParallel(
        model.cuda(),
        device_ids=[torch.cuda.current_device()],
        broadcast_buffers=False,
        find_unused_parameters=find_unused_parameters)
else:
    model = MMDataParallel(
        model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)

In [31]:
# Create the class that will run the code 
Runner = EpochBasedRunnerAmp
runner = Runner(model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)

# an ugly workaround to make .log and .log.json filenames the same
runner.timestamp = timestamp

# 
optimizer_config = cfg.optimizer_config

# register hooks
runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None))

if distributed:
    runner.register_hook(DistSamplerSeedHook())

In [32]:
# 
# eval_cfg = cfg.get('evaluation', {})
# eval_hook = DistEvalHook if distributed else EvalHook
# runner.register_hook(eval_hook(val_dataloader, **eval_cfg))

In [33]:
if cfg.resume_from:
    runner.resume(cfg.resume_from, resume_amp=use_amp)
elif cfg.get("auto_resume", False) and osp.exists(osp.join(runner.work_dir, 'latest.pth')):
    runner.auto_resume()
elif cfg.load_from:
    runner.load_checkpoint(cfg.load_from)

2022-04-10 21:14:07,696 - mmaction - INFO - load checkpoint from local path: ../configs/swin_tiny_patch244_window877_kinetics400_1k.pth

size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 768]) from checkpoint, the shape in current model is torch.Size([5, 768]).
size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([5]).


## Dashboarding using wandb

In [34]:
wandb.init(project=wandb_project_name, config=cfg)

[34m[1mwandb[0m: Currently logged in as: [33maswin_thiru[0m (use `wandb login --relogin` to force relogin)


In [35]:
from mmcv.runner import Hook
from torch.utils.data import DataLoader
from mmaction.apis import single_gpu_test


class WandBHook(Hook):  # noqa: F811
    """Non-Distributed evaluation hook.

    Notes:
        If new arguments are added for EvalHook, tools/test.py,
        tools/eval_metric.py may be effected.

    This hook will regularly perform evaluation in a given interval when
    performing in non-distributed environment.

    Args:
        dataloader (DataLoader): A PyTorch dataloader.
        wandb_obj: A wandb object
        optimizer_obj: optimizer object
        **eval_kwargs: Evaluation arguments fed into the evaluate function
            of the dataset.
    """

    def __init__(self,
                 dataloader,
                 wandb_obj,
                 optimizer_obj,
                 **eval_kwargs):

        if not isinstance(dataloader, DataLoader):
            raise TypeError(f'dataloader must be a pytorch DataLoader, '
                            f'but got {type(dataloader)}')

        self.dataloader = dataloader
        self.wandb = wandb
        self.eval_kwargs = eval_kwargs
    
    def before_train_epoch(self, runner):
        """Called after every train epoch to save learning rate"""
        self.wandb.log({"lr": optimizer.param_groups[0]['lr']})

    def after_val_epoch(self, runner):
        """Called after every validation epoch to evaluate the results."""
        self._do_evaluate(runner)

    def _do_evaluate(self, runner):
        results = single_gpu_test(runner.model, self.dataloader)
        eval_res = self.dataloader.dataset.evaluate(results, logger=runner.logger, **self.eval_kwargs)
        self.wandb.log(eval_res)

In [36]:
runner.register_hook(WandBHook(val_dataloader, wandb, optimizer))

In [37]:
runner_kwargs = dict()
runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs)

2022-04-10 21:14:19,344 - mmaction - INFO - Start running, host: root@lambda-dual, work_dir: /workspace/Video-Swin-Transformer/work_dirs/k400_swin_tiny_patch244_window877.py
2022-04-10 21:14:19,346 - mmaction - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) CosineAnnealingLrUpdaterHook       
(ABOVE_NORMAL) DistOptimizerHook                  
(NORMAL      ) CheckpointHook                     
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) CosineAnnealingLrUpdaterHook       
(NORMAL      ) WandBHook                          
(LOW         ) IterTimerHook                      
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_iter:
(VERY_HIGH   ) CosineAnnealingLrUpdaterHook       
(LOW         ) IterTimerHook                      
 -------------------- 
after_train_iter:
(ABOVE_NORMAL) DistOptimizerHook                  
(NORMAL      ) CheckpointHook       

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:15:16,948 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:15:16,950 - mmaction - INFO - 
top1_acc	0.4000
top5_acc	1.0000
2022-04-10 21:15:16,952 - mmaction - INFO - Epoch(val) [1][28]	top1_acc: 0.4000, top5_acc: 1.0000, loss_cls: 1.5185, loss: 1.5185
2022-04-10 21:15:24,246 - mmaction - INFO - Epoch [2][20/111]	lr: 5.159e-05, eta: 0:06:50, time: 0.365, data_time: 0.157, memory: 2833, top1_acc: 0.4000, top5_acc: 1.0000, loss_cls: 1.5182, loss: 1.5182
2022-04-10 21:15:28,388 - mmaction - INFO - Epoch [2][40/111]	lr: 5.801e-05, eta: 0:06:33, time: 0.207, data_time: 0.001, memory: 2833, top1_acc: 0.5250, top5_acc: 1.0000, loss_cls: 1.4292, loss: 1.4292
2022-04-10 21:15:32,551 - mmaction - INFO - Epoch [2][60/111]	lr: 6.442e-05, eta: 0:06:19, time: 0.208, data_time: 0.001, memory: 2835, top1_acc: 0.5000, top5_acc: 1.0000, loss_cls: 1.3811, loss: 1.3811
2022-04-10 21:15:36,670 - mmaction - INFO - Epoch [2][80/111]	lr: 7.084e-05, eta: 0:06:06, time: 0.206, data_t

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


2022-04-10 21:15:40,775 - mmaction - INFO - Epoch [2][100/111]	lr: 7.725e-05, eta: 0:05:55, time: 0.205, data_time: 0.001, memory: 2835, top1_acc: 0.5000, top5_acc: 1.0000, loss_cls: 1.2883, loss: 1.2883
2022-04-10 21:15:43,041 - mmaction - INFO - Saving checkpoint at 2 epochs


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 3.1 task/s, elapsed: 18s, ETA:     0s

2022-04-10 21:16:09,291 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:16:09,294 - mmaction - INFO - 
top1_acc	0.5455
top5_acc	1.0000
2022-04-10 21:16:09,297 - mmaction - INFO - Epoch(val) [2][28]	top1_acc: 0.5455, top5_acc: 1.0000, loss_cls: 1.3260, loss: 1.3260
2022-04-10 21:16:16,295 - mmaction - INFO - Epoch [3][20/111]	lr: 8.435e-05, eta: 0:05:44, time: 0.350, data_time: 0.142, memory: 2835, top1_acc: 0.5250, top5_acc: 1.0000, loss_cls: 1.2847, loss: 1.2847
2022-04-10 21:16:20,451 - mmaction - INFO - Epoch [3][40/111]	lr: 9.056e-05, eta: 0:05:36, time: 0.208, data_time: 0.001, memory: 2835, top1_acc: 0.5750, top5_acc: 1.0000, loss_cls: 1.1319, loss: 1.1319
2022-04-10 21:16:24,636 - mmaction - INFO - Epoch [3][60/111]	lr: 9.552e-05, eta: 0:05:28, time: 0.209, data_time: 0.000, memory: 2835, top1_acc: 0.6750, top5_acc: 1.0000, loss_cls: 1.0240, loss: 1.0240
2022-04-10 21:16:28,727 - mmaction - INFO - Epoch [3][80/111]	lr: 9.552e-05, eta: 0:05:20, time: 0.205, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 3.1 task/s, elapsed: 18s, ETA:     0s

2022-04-10 21:17:01,517 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:17:01,518 - mmaction - INFO - 
top1_acc	0.7818
top5_acc	1.0000
2022-04-10 21:17:01,520 - mmaction - INFO - Epoch(val) [3][28]	top1_acc: 0.7818, top5_acc: 1.0000, loss_cls: 0.5227, loss: 0.5227
2022-04-10 21:17:08,505 - mmaction - INFO - Epoch [4][20/111]	lr: 9.045e-05, eta: 0:05:05, time: 0.349, data_time: 0.138, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.4880, loss: 0.4880
2022-04-10 21:17:12,659 - mmaction - INFO - Epoch [4][40/111]	lr: 9.045e-05, eta: 0:04:59, time: 0.208, data_time: 0.000, memory: 2835, top1_acc: 0.7500, top5_acc: 1.0000, loss_cls: 0.5759, loss: 0.5759
2022-04-10 21:17:16,803 - mmaction - INFO - Epoch [4][60/111]	lr: 9.045e-05, eta: 0:04:52, time: 0.207, data_time: 0.000, memory: 2835, top1_acc: 0.7000, top5_acc: 1.0000, loss_cls: 0.7580, loss: 0.7580
2022-04-10 21:17:20,978 - mmaction - INFO - Epoch [4][80/111]	lr: 9.045e-05, eta: 0:04:46, time: 0.209, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:17:54,757 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:17:54,759 - mmaction - INFO - 
top1_acc	0.7818
top5_acc	1.0000
2022-04-10 21:17:54,763 - mmaction - INFO - Epoch(val) [4][28]	top1_acc: 0.7818, top5_acc: 1.0000, loss_cls: 0.6644, loss: 0.6644
2022-04-10 21:18:02,096 - mmaction - INFO - Epoch [5][20/111]	lr: 8.346e-05, eta: 0:04:34, time: 0.366, data_time: 0.159, memory: 2835, top1_acc: 0.7500, top5_acc: 1.0000, loss_cls: 0.6170, loss: 0.6170
2022-04-10 21:18:06,292 - mmaction - INFO - Epoch [5][40/111]	lr: 8.346e-05, eta: 0:04:29, time: 0.210, data_time: 0.000, memory: 2835, top1_acc: 0.7250, top5_acc: 1.0000, loss_cls: 0.7637, loss: 0.7637
2022-04-10 21:18:10,438 - mmaction - INFO - Epoch [5][60/111]	lr: 8.346e-05, eta: 0:04:23, time: 0.207, data_time: 0.000, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.5043, loss: 0.5043
2022-04-10 21:18:14,568 - mmaction - INFO - Epoch [5][80/111]	lr: 8.346e-05, eta: 0:04:18, time: 0.206, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:18:48,449 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:18:48,452 - mmaction - INFO - 
top1_acc	0.8727
top5_acc	1.0000
2022-04-10 21:18:48,455 - mmaction - INFO - Epoch(val) [5][28]	top1_acc: 0.8727, top5_acc: 1.0000, loss_cls: 0.2745, loss: 0.2745
2022-04-10 21:18:55,501 - mmaction - INFO - Epoch [6][20/111]	lr: 7.500e-05, eta: 0:04:06, time: 0.352, data_time: 0.145, memory: 2835, top1_acc: 0.6750, top5_acc: 1.0000, loss_cls: 0.6146, loss: 0.6146
2022-04-10 21:18:59,745 - mmaction - INFO - Epoch [6][40/111]	lr: 7.500e-05, eta: 0:04:01, time: 0.212, data_time: 0.002, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.4923, loss: 0.4923
2022-04-10 21:19:03,896 - mmaction - INFO - Epoch [6][60/111]	lr: 7.500e-05, eta: 0:03:56, time: 0.208, data_time: 0.000, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.4045, loss: 0.4045
2022-04-10 21:19:08,067 - mmaction - INFO - Epoch [6][80/111]	lr: 7.500e-05, eta: 0:03:50, time: 0.209, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 3.0 task/s, elapsed: 18s, ETA:     0s

2022-04-10 21:19:40,900 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:19:40,902 - mmaction - INFO - 
top1_acc	0.9273
top5_acc	1.0000
2022-04-10 21:19:40,905 - mmaction - INFO - Epoch(val) [6][28]	top1_acc: 0.9273, top5_acc: 1.0000, loss_cls: 0.2441, loss: 0.2441
2022-04-10 21:19:47,819 - mmaction - INFO - Epoch [7][20/111]	lr: 6.545e-05, eta: 0:03:38, time: 0.345, data_time: 0.138, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.5269, loss: 0.5269
2022-04-10 21:19:52,176 - mmaction - INFO - Epoch [7][40/111]	lr: 6.545e-05, eta: 0:03:34, time: 0.218, data_time: 0.010, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.4552, loss: 0.4552
2022-04-10 21:19:56,323 - mmaction - INFO - Epoch [7][60/111]	lr: 6.545e-05, eta: 0:03:29, time: 0.207, data_time: 0.000, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.4320, loss: 0.4320
2022-04-10 21:20:00,457 - mmaction - INFO - Epoch [7][80/111]	lr: 6.545e-05, eta: 0:03:24, time: 0.207, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:20:34,567 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:20:34,569 - mmaction - INFO - 
top1_acc	0.8909
top5_acc	1.0000
2022-04-10 21:20:34,572 - mmaction - INFO - Epoch(val) [7][28]	top1_acc: 0.8909, top5_acc: 1.0000, loss_cls: 0.5118, loss: 0.5118
2022-04-10 21:20:41,860 - mmaction - INFO - Epoch [8][20/111]	lr: 5.523e-05, eta: 0:03:13, time: 0.364, data_time: 0.160, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.5017, loss: 0.5017
2022-04-10 21:20:45,983 - mmaction - INFO - Epoch [8][40/111]	lr: 5.523e-05, eta: 0:03:08, time: 0.206, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.3195, loss: 0.3195
2022-04-10 21:20:50,127 - mmaction - INFO - Epoch [8][60/111]	lr: 5.523e-05, eta: 0:03:03, time: 0.207, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.2970, loss: 0.2970
2022-04-10 21:20:54,304 - mmaction - INFO - Epoch [8][80/111]	lr: 5.523e-05, eta: 0:02:59, time: 0.209, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:21:28,438 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:21:28,440 - mmaction - INFO - 
top1_acc	0.9273
top5_acc	1.0000
2022-04-10 21:21:28,442 - mmaction - INFO - Epoch(val) [8][28]	top1_acc: 0.9273, top5_acc: 1.0000, loss_cls: 0.3802, loss: 0.3802
2022-04-10 21:21:35,719 - mmaction - INFO - Epoch [9][20/111]	lr: 4.477e-05, eta: 0:02:47, time: 0.364, data_time: 0.157, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.4321, loss: 0.4321
2022-04-10 21:21:39,864 - mmaction - INFO - Epoch [9][40/111]	lr: 4.477e-05, eta: 0:02:43, time: 0.207, data_time: 0.000, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.4293, loss: 0.4293
2022-04-10 21:21:43,994 - mmaction - INFO - Epoch [9][60/111]	lr: 4.477e-05, eta: 0:02:38, time: 0.206, data_time: 0.000, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.4032, loss: 0.4032
2022-04-10 21:21:48,159 - mmaction - INFO - Epoch [9][80/111]	lr: 4.477e-05, eta: 0:02:33, time: 0.208, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:22:21,819 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:22:21,821 - mmaction - INFO - 
top1_acc	0.9091
top5_acc	1.0000
2022-04-10 21:22:21,824 - mmaction - INFO - Epoch(val) [9][28]	top1_acc: 0.9091, top5_acc: 1.0000, loss_cls: 0.3821, loss: 0.3821
2022-04-10 21:22:29,179 - mmaction - INFO - Epoch [10][20/111]	lr: 3.455e-05, eta: 0:02:22, time: 0.367, data_time: 0.161, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.3768, loss: 0.3768
2022-04-10 21:22:33,291 - mmaction - INFO - Epoch [10][40/111]	lr: 3.455e-05, eta: 0:02:18, time: 0.206, data_time: 0.000, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.3701, loss: 0.3701
2022-04-10 21:22:37,447 - mmaction - INFO - Epoch [10][60/111]	lr: 3.455e-05, eta: 0:02:13, time: 0.208, data_time: 0.001, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.1833, loss: 0.1833
2022-04-10 21:22:41,615 - mmaction - INFO - Epoch [10][80/111]	lr: 3.455e-05, eta: 0:02:09, time: 0.208, da

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:23:15,101 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:23:15,103 - mmaction - INFO - 
top1_acc	0.8909
top5_acc	1.0000
2022-04-10 21:23:15,106 - mmaction - INFO - Epoch(val) [10][28]	top1_acc: 0.8909, top5_acc: 1.0000, loss_cls: 0.4802, loss: 0.4802
2022-04-10 21:23:22,398 - mmaction - INFO - Epoch [11][20/111]	lr: 2.500e-05, eta: 0:01:57, time: 0.364, data_time: 0.156, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.6193, loss: 0.6193
2022-04-10 21:23:26,564 - mmaction - INFO - Epoch [11][40/111]	lr: 2.500e-05, eta: 0:01:53, time: 0.208, data_time: 0.000, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.3305, loss: 0.3305
2022-04-10 21:23:30,761 - mmaction - INFO - Epoch [11][60/111]	lr: 2.500e-05, eta: 0:01:48, time: 0.210, data_time: 0.000, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.2126, loss: 0.2126
2022-04-10 21:23:34,914 - mmaction - INFO - Epoch [11][80/111]	lr: 2.500e-05, eta: 0:01:44, time: 0.208, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:24:09,021 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:24:09,023 - mmaction - INFO - 
top1_acc	0.9273
top5_acc	1.0000
2022-04-10 21:24:09,026 - mmaction - INFO - Epoch(val) [11][28]	top1_acc: 0.9273, top5_acc: 1.0000, loss_cls: 0.3737, loss: 0.3737
2022-04-10 21:24:16,112 - mmaction - INFO - Epoch [12][20/111]	lr: 1.654e-05, eta: 0:01:33, time: 0.354, data_time: 0.146, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.6251, loss: 0.6251
2022-04-10 21:24:20,297 - mmaction - INFO - Epoch [12][40/111]	lr: 1.654e-05, eta: 0:01:28, time: 0.209, data_time: 0.000, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.3684, loss: 0.3684
2022-04-10 21:24:24,414 - mmaction - INFO - Epoch [12][60/111]	lr: 1.654e-05, eta: 0:01:24, time: 0.206, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.3217, loss: 0.3217
2022-04-10 21:24:28,602 - mmaction - INFO - Epoch [12][80/111]	lr: 1.654e-05, eta: 0:01:19, time: 0.209, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:25:01,977 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:25:01,979 - mmaction - INFO - 
top1_acc	0.9455
top5_acc	1.0000
2022-04-10 21:25:01,982 - mmaction - INFO - Epoch(val) [12][28]	top1_acc: 0.9455, top5_acc: 1.0000, loss_cls: 0.3105, loss: 0.3105
2022-04-10 21:25:08,957 - mmaction - INFO - Epoch [13][20/111]	lr: 9.549e-06, eta: 0:01:08, time: 0.348, data_time: 0.140, memory: 2835, top1_acc: 0.9750, top5_acc: 1.0000, loss_cls: 0.1378, loss: 0.1378
2022-04-10 21:25:13,076 - mmaction - INFO - Epoch [13][40/111]	lr: 9.549e-06, eta: 0:01:04, time: 0.206, data_time: 0.000, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.4209, loss: 0.4209
2022-04-10 21:25:17,261 - mmaction - INFO - Epoch [13][60/111]	lr: 9.549e-06, eta: 0:00:59, time: 0.209, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.3000, loss: 0.3000
2022-04-10 21:25:21,417 - mmaction - INFO - Epoch [13][80/111]	lr: 9.549e-06, eta: 0:00:55, time: 0.208, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 3.0 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:25:54,944 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:25:54,946 - mmaction - INFO - 
top1_acc	0.9455
top5_acc	1.0000
2022-04-10 21:25:54,950 - mmaction - INFO - Epoch(val) [13][28]	top1_acc: 0.9455, top5_acc: 1.0000, loss_cls: 0.4076, loss: 0.4076
2022-04-10 21:26:02,156 - mmaction - INFO - Epoch [14][20/111]	lr: 4.323e-06, eta: 0:00:44, time: 0.360, data_time: 0.153, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.2476, loss: 0.2476
2022-04-10 21:26:06,427 - mmaction - INFO - Epoch [14][40/111]	lr: 4.323e-06, eta: 0:00:39, time: 0.214, data_time: 0.000, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.2249, loss: 0.2249
2022-04-10 21:26:10,579 - mmaction - INFO - Epoch [14][60/111]	lr: 4.323e-06, eta: 0:00:35, time: 0.208, data_time: 0.000, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.2535, loss: 0.2535
2022-04-10 21:26:14,693 - mmaction - INFO - Epoch [14][80/111]	lr: 4.323e-06, eta: 0:00:31, time: 0.206, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:26:48,264 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:26:48,267 - mmaction - INFO - 
top1_acc	0.9455
top5_acc	1.0000
2022-04-10 21:26:48,270 - mmaction - INFO - Epoch(val) [14][28]	top1_acc: 0.9455, top5_acc: 1.0000, loss_cls: 0.3429, loss: 0.3429
2022-04-10 21:26:55,502 - mmaction - INFO - Epoch [15][20/111]	lr: 1.093e-06, eta: 0:00:19, time: 0.361, data_time: 0.156, memory: 2835, top1_acc: 0.9500, top5_acc: 1.0000, loss_cls: 0.1869, loss: 0.1869
2022-04-10 21:26:59,838 - mmaction - INFO - Epoch [15][40/111]	lr: 1.093e-06, eta: 0:00:15, time: 0.217, data_time: 0.008, memory: 2835, top1_acc: 0.9500, top5_acc: 1.0000, loss_cls: 0.1922, loss: 0.1922
2022-04-10 21:27:04,046 - mmaction - INFO - Epoch [15][60/111]	lr: 1.093e-06, eta: 0:00:11, time: 0.210, data_time: 0.000, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.2450, loss: 0.2450
2022-04-10 21:27:08,156 - mmaction - INFO - Epoch [15][80/111]	lr: 1.093e-06, eta: 0:00:06, time: 0.206, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 55/55, 2.9 task/s, elapsed: 19s, ETA:     0s

2022-04-10 21:27:41,854 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-04-10 21:27:41,856 - mmaction - INFO - 
top1_acc	0.9455
top5_acc	1.0000
2022-04-10 21:27:41,860 - mmaction - INFO - Epoch(val) [15][28]	top1_acc: 0.9455, top5_acc: 1.0000, loss_cls: 0.3426, loss: 0.3426


In [38]:
wandb.finish()




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
lr,███▇▇▆▆▅▄▃▃▂▂▁▁
top1_acc,▁▃▆▆▇█▇██▇█████
top5_acc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
lr,0.0
top1_acc,0.94545
top5_acc,1.0


## Unused

In [29]:
# # init distributed env first, since logger depends on the dist info.
# if args.launcher == 'none':
#     distributed = False
# else:
#     distributed = True
#     init_dist(args.launcher, **cfg.dist_params)
#     _, world_size = get_dist_info()
#     cfg.gpu_ids = range(world_size)