In [1]:
import os
import sys
sys.path.append(os.path.abspath("../Video-Swin-Transformer"))

In [2]:
# Change teh working directory to a location that the code prefers
os.chdir("../Video-Swin-Transformer")

In [3]:
import argparse
import copy
import os.path as osp
import time
import warnings

import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist, set_random_seed
from mmcv.utils import get_git_hash

from mmaction import __version__
from mmaction.apis import train_model
from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.utils import collect_env, get_root_logger, register_module_hooks

In [4]:
!pip3 install wandb -qqq

In [5]:
import wandb
# Log in to your W&B account
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33maswin_thiru[0m (use `wandb login --relogin` to force relogin)


True

In [6]:
wandb_project_name = 'bsl'

In [7]:
# TODO import test functions from mmcv and delete them from mmaction2
try:
    from mmcv.engine import multi_gpu_test, single_gpu_test
except (ImportError, ModuleNotFoundError):
    warnings.warn(
        'DeprecationWarning: single_gpu_test, multi_gpu_test, '
        'collect_results_cpu, collect_results_gpu from mmaction2 will be '
        'deprecated. Please install mmcv through master branch.')
    from mmaction.apis import multi_gpu_test, single_gpu_test

In [8]:
def parse_args(parse_options=None):
    parser = argparse.ArgumentParser(description='Train a recognizer')
    parser.add_argument('config', help='train config file path')
    parser.add_argument('--work-dir', help='the dir to save logs and models')
    parser.add_argument(
        '--resume-from', help='the checkpoint file to resume from')
    parser.add_argument(
        '--load-from', help='the checkpoint file to load from')
    parser.add_argument(
        '--validate',
        action='store_true',
        help='whether to evaluate the checkpoint during training')
    parser.add_argument(
        '--test-last',
        action='store_true',
        help='whether to test the checkpoint after training')
    parser.add_argument(
        '--test-best',
        action='store_true',
        help=('whether to test the best checkpoint (if applicable) after '
              'training'))
    group_gpus = parser.add_mutually_exclusive_group()
    group_gpus.add_argument(
        '--gpus',
        type=int,
        help='number of gpus to use '
        '(only applicable to non-distributed training)')
    group_gpus.add_argument(
        '--gpu-ids',
        type=int,
        nargs='+',
        help='ids of gpus to use '
        '(only applicable to non-distributed training)')
    parser.add_argument('--seed', type=int, default=None, help='random seed')
    parser.add_argument(
        '--deterministic',
        action='store_true',
        help='whether to set deterministic options for CUDNN backend.')
    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        default={},
        help='override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file. For example, '
        "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
    parser.add_argument(
        '--launcher',
        choices=['none', 'pytorch', 'slurm', 'mpi'],
        default='none',
        help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)
    
    if parse_options is None: 
        args = parser.parse_args()
    else:
        args = parser.parse_args(parse_options)
        
    if 'LOCAL_RANK' not in os.environ:
        os.environ['LOCAL_RANK'] = str(args.local_rank)

    return args

In [9]:
# Setup the cofiguration and data file
config_file = '../configs/bsl_config.py'
check_point_file = '../configs/swin_tiny_patch244_window877_kinetics400_1k.pth'
# , "model.backbone.pretrained="+check_point_file
# cmd_options = [config_file, "--cfg-options", "model.backbone.use_checkpoint=True", "--load-from", check_point_file,
#                "--seed", "12345"]
cmd_options = [config_file, "--cfg-options", "model.backbone.use_checkpoint=True", "--load-from", check_point_file,
              "--validate", "--seed", "12345"]

In [10]:
distributed = False

In [11]:
# Create a configuration object that describes the training and testing
args = parse_args(cmd_options)
cfg = Config.fromfile(args.config)
cfg.merge_from_dict(args.cfg_options)

# Customization for training the BSL data set
# https://mmcv.readthedocs.io/en/latest/_modules/mmcv/runner/epoch_based_runner.html
cfg.workflow = [('train', 1), ('val', 1)]
# cfg.workflow = [('train', 1), ]
cfg.model.cls_head.num_classes = 5

# Resume from this pyhton checkpoint file
cfg.resume_from = args.resume_from
cfg.load_from = args.load_from

# One GPU
cfg.gpu_ids = range(1)

# The flag is used to determine whether it is omnisource training
# Omnisource reference: https://arxiv.org/abs/2003.13042
cfg.setdefault('omnisource', False)

# The flag is used to register module's hooks
cfg.setdefault('module_hooks', [])

# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

# dump config
cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))

# init logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
            dash_line)
meta['env_info'] = env_info

# log some basic info
logger.info(f'Distributed training: {distributed}')
logger.info(f'Config: {cfg.pretty_text}')

# Set seed for training
logger.info(f'Set random seed to {args.seed}, '
            f'deterministic: {args.deterministic}')
set_random_seed(args.seed, deterministic=args.deterministic)

cfg.seed = args.seed
meta['seed'] = args.seed
meta['config_name'] = osp.basename(args.config)
meta['work_dir'] = osp.basename(cfg.work_dir.rstrip('/\\'))

../Video-Swin-Transformer/configs/_base_/models/swin/swin_tiny.py
../Video-Swin-Transformer/configs/_base_/default_runtime.py


2022-03-24 23:05:15,281 - mmaction - INFO - Environment info:
------------------------------------------------------------
sys.platform: linux
Python: 3.8.12 | packaged by conda-forge | (default, Oct 12 2021, 21:59:51) [GCC 9.4.0]
CUDA available: True
GPU 0: Tesla T4
CUDA_HOME: /usr/local/cuda
NVCC: Build cuda_11.6.r11.6/compiler.30794723_0
GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
PyTorch: 1.11.0a0+17540c5
PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.3.3 (Git Hash N/A)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX512
  - CUDA Runtime 11.6
  - NVCC architecture flags: -gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode

In [12]:
# Create the dataset
datasets = [build_dataset(cfg.data.train)]

# Validation is setup as a hook that kicks off every 5 iterations
# This is not required
if 1:
    # Create the validation dataset
    val_dataset = copy.deepcopy(cfg.data.val)
    datasets.append(build_dataset(val_dataset))

In [13]:
for batch in val_dataset:
    break

In [14]:
# Which model to test after training, best or last?
test_option = dict(test_last=args.test_last, test_best=args.test_best)

In [15]:
# Build the model for 
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


# Train the model

In [16]:
import apex
from mmaction.core import DistEvalHook, EvalHook
from mmaction.datasets import build_dataloader, build_dataset
from mmcv_custom.runner import EpochBasedRunnerAmp
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import DistSamplerSeedHook, EpochBasedRunner, OptimizerHook, build_optimizer, get_dist_info

In [17]:
# Get the root logger
logger = get_root_logger(log_level=cfg.log_level)


# Load the data using the GPU
dataloader_setting = dict(
    videos_per_gpu=cfg.data.get('videos_per_gpu', 1) // cfg.optimizer_config.get('update_interval', 1),
    workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
    num_gpus=len(cfg.gpu_ids),
    dist=distributed,
    seed=cfg.seed)

# 
dataloader_setting = dict(dataloader_setting, **cfg.data.get('train_dataloader', {}))
data_loaders = [build_dataloader(ds, **dataloader_setting) for ds in datasets]

# 
val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
dataloader_setting = dict(
    videos_per_gpu=cfg.data.get('videos_per_gpu', 1),
    workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
    # cfg.gpus will be ignored if distributed
    num_gpus=len(cfg.gpu_ids),
    dist=distributed,
    shuffle=False)

dataloader_setting = dict(dataloader_setting, **cfg.data.get('val_dataloader', {}))
val_dataloader = build_dataloader(val_dataset, **dataloader_setting)

In [18]:
# build optimizer
optimizer = build_optimizer(model, cfg.optimizer)
model, optimizer = apex.amp.initialize(model.cuda(), optimizer, opt_level="O1")

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


In [19]:
for m in model.modules():
    if hasattr(m, "fp16_enabled"):
        m.fp16_enabled = True

In [20]:
# Put the model on GPU's for training
if distributed:
    find_unused_parameters = cfg.get('find_unused_parameters', False)
    # Sets the `find_unused_parameters` parameter in
    # torch.nn.parallel.DistributedDataParallel
    model = MMDistributedDataParallel(
        model.cuda(),
        device_ids=[torch.cuda.current_device()],
        broadcast_buffers=False,
        find_unused_parameters=find_unused_parameters)
else:
    model = MMDataParallel(
        model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)

In [21]:
# Create the class that will run the code 
Runner = EpochBasedRunnerAmp
runner = Runner(model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)

# an ugly workaround to make .log and .log.json filenames the same
runner.timestamp = timestamp

# 
optimizer_config = cfg.optimizer_config

# register hooks
runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None))

if distributed:
    runner.register_hook(DistSamplerSeedHook())

In [22]:
# 
eval_cfg = cfg.get('evaluation', {})
eval_hook = DistEvalHook if distributed else EvalHook
runner.register_hook(eval_hook(val_dataloader, **eval_cfg))

In [23]:
if cfg.resume_from:
    runner.resume(cfg.resume_from, resume_amp=use_amp)
elif cfg.get("auto_resume", False) and osp.exists(osp.join(runner.work_dir, 'latest.pth')):
    runner.auto_resume()
elif cfg.load_from:
    runner.load_checkpoint(cfg.load_from)

2022-03-24 23:05:17,712 - mmaction - INFO - load checkpoint from local path: ../configs/swin_tiny_patch244_window877_kinetics400_1k.pth

size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 768]) from checkpoint, the shape in current model is torch.Size([5, 768]).
size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([5]).


## Dashboarding using wandb

In [24]:
wandb.init(project=wandb_project_name, config=cfg)

In [25]:
from mmcv.runner import Hook
from torch.utils.data import DataLoader
from mmaction.apis import single_gpu_test


class WandBHook(Hook):  # noqa: F811
    """Non-Distributed evaluation hook.

    Notes:
        If new arguments are added for EvalHook, tools/test.py,
        tools/eval_metric.py may be effected.

    This hook will regularly perform evaluation in a given interval when
    performing in non-distributed environment.

    Args:
        dataloader (DataLoader): A PyTorch dataloader.
        wandb_obj: A wandb object
        optimizer_obj: optimizer object
        **eval_kwargs: Evaluation arguments fed into the evaluate function
            of the dataset.
    """

    def __init__(self,
                 dataloader,
                 wandb_obj,
                 optimizer_obj,
                 **eval_kwargs):

        if not isinstance(dataloader, DataLoader):
            raise TypeError(f'dataloader must be a pytorch DataLoader, '
                            f'but got {type(dataloader)}')

        self.dataloader = dataloader
        self.wandb = wandb
        self.eval_kwargs = eval_kwargs
    
    def before_train_epoch(self, runner):
        """Called after every train epoch to save learning rate"""
        self.wandb.log({"lr": optimizer.param_groups[0]['lr']})

    def after_val_epoch(self, runner):
        """Called after every validation epoch to evaluate the results."""
        self._do_evaluate(runner)

    def _do_evaluate(self, runner):
        results = single_gpu_test(runner.model, self.dataloader)
        eval_res = self.dataloader.dataset.evaluate(results, logger=runner.logger, **self.eval_kwargs)
        self.wandb.log(eval_res)

In [26]:
runner.register_hook(WandBHook(val_dataloader, wandb, optimizer))

In [27]:
runner_kwargs = dict()
runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs)

2022-03-24 23:05:19,465 - mmaction - INFO - Start running, host: root@ip-10-0-0-32, work_dir: /workspace/Video-Swin-Transformer/work_dirs/k400_swin_tiny_patch244_window877.py
2022-03-24 23:05:19,466 - mmaction - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) CosineAnnealingLrUpdaterHook       
(ABOVE_NORMAL) DistOptimizerHook                  
(NORMAL      ) CheckpointHook                     
(NORMAL      ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) CosineAnnealingLrUpdaterHook       
(NORMAL      ) EvalHook                           
(NORMAL      ) WandBHook                          
(LOW         ) IterTimerHook                      
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_iter:
(VERY_HIGH   ) CosineAnnealingLrUpdaterHook       
(NORMAL      ) EvalHook                           
(LOW         ) IterTimerH

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:07:24,752 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:07:24,754 - mmaction - INFO - 
top1_acc	0.4068
top5_acc	1.0000
2022-03-24 23:07:24,755 - mmaction - INFO - Epoch(val) [1][30]	top1_acc: 0.4068, top5_acc: 1.0000, loss_cls: 1.5151, loss: 1.5151
2022-03-24 23:07:41,002 - mmaction - INFO - Epoch [2][20/109]	lr: 5.170e-05, eta: 0:16:55, time: 0.812, data_time: 0.146, memory: 2832, top1_acc: 0.5000, top5_acc: 1.0000, loss_cls: 1.4916, loss: 1.4916
2022-03-24 23:07:54,330 - mmaction - INFO - Epoch [2][40/109]	lr: 5.824e-05, eta: 0:16:40, time: 0.666, data_time: 0.001, memory: 2833, top1_acc: 0.6000, top5_acc: 1.0000, loss_cls: 1.3391, loss: 1.3391
2022-03-24 23:08:07,727 - mmaction - INFO - Epoch [2][60/109]	lr: 6.477e-05, eta: 0:16:26, time: 0.670, data_time: 0.000, memory: 2833, top1_acc: 0.4500, top5_acc: 1.0000, loss_cls: 1.3423, loss: 1.3423
2022-03-24 23:08:21,178 - mmaction - INFO - Epoch [2][80/109]	lr: 7.130e-05, eta: 0:16:13, time: 0.673, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:09:29,306 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:09:29,307 - mmaction - INFO - 
top1_acc	0.5254
top5_acc	1.0000
2022-03-24 23:09:29,309 - mmaction - INFO - Epoch(val) [2][30]	top1_acc: 0.5254, top5_acc: 1.0000, loss_cls: 1.1110, loss: 1.1110
2022-03-24 23:09:46,318 - mmaction - INFO - Epoch [3][20/109]	lr: 8.446e-05, eta: 0:15:25, time: 0.850, data_time: 0.173, memory: 2833, top1_acc: 0.6500, top5_acc: 1.0000, loss_cls: 1.0206, loss: 1.0206
2022-03-24 23:09:59,872 - mmaction - INFO - Epoch [3][40/109]	lr: 9.078e-05, eta: 0:15:14, time: 0.678, data_time: 0.001, memory: 2835, top1_acc: 0.5750, top5_acc: 1.0000, loss_cls: 1.0899, loss: 1.0899
2022-03-24 23:10:13,474 - mmaction - INFO - Epoch [3][60/109]	lr: 9.552e-05, eta: 0:15:02, time: 0.680, data_time: 0.000, memory: 2835, top1_acc: 0.5250, top5_acc: 1.0000, loss_cls: 0.8492, loss: 0.8492
2022-03-24 23:10:27,127 - mmaction - INFO - Epoch [3][80/109]	lr: 9.552e-05, eta: 0:14:50, time: 0.683, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:11:35,208 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:11:35,210 - mmaction - INFO - 
top1_acc	0.7966
top5_acc	1.0000
2022-03-24 23:11:35,212 - mmaction - INFO - Epoch(val) [3][30]	top1_acc: 0.7966, top5_acc: 1.0000, loss_cls: 0.5799, loss: 0.5799
2022-03-24 23:11:53,022 - mmaction - INFO - Epoch [4][20/109]	lr: 9.045e-05, eta: 0:14:13, time: 0.890, data_time: 0.210, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.7455, loss: 0.7455
2022-03-24 23:12:06,661 - mmaction - INFO - Epoch [4][40/109]	lr: 9.045e-05, eta: 0:14:01, time: 0.682, data_time: 0.000, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.5584, loss: 0.5584
2022-03-24 23:12:20,363 - mmaction - INFO - Epoch [4][60/109]	lr: 9.045e-05, eta: 0:13:49, time: 0.685, data_time: 0.000, memory: 2835, top1_acc: 0.7000, top5_acc: 1.0000, loss_cls: 0.7011, loss: 0.7011
2022-03-24 23:12:34,107 - mmaction - INFO - Epoch [4][80/109]	lr: 9.045e-05, eta: 0:13:37, time: 0.687, data_t

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:13:41,947 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:13:41,949 - mmaction - INFO - 
top1_acc	0.7797
top5_acc	1.0000
2022-03-24 23:13:41,950 - mmaction - INFO - Epoch(val) [4][30]	top1_acc: 0.7797, top5_acc: 1.0000, loss_cls: 0.6674, loss: 0.6674
2022-03-24 23:13:59,894 - mmaction - INFO - Epoch [5][20/109]	lr: 8.346e-05, eta: 0:13:03, time: 0.897, data_time: 0.212, memory: 2835, top1_acc: 0.5500, top5_acc: 1.0000, loss_cls: 0.8672, loss: 0.8672
2022-03-24 23:14:13,620 - mmaction - INFO - Epoch [5][40/109]	lr: 8.346e-05, eta: 0:12:50, time: 0.686, data_time: 0.000, memory: 2835, top1_acc: 0.6250, top5_acc: 1.0000, loss_cls: 0.7534, loss: 0.7534
2022-03-24 23:14:27,434 - mmaction - INFO - Epoch [5][60/109]	lr: 8.346e-05, eta: 0:12:38, time: 0.691, data_time: 0.000, memory: 2835, top1_acc: 0.7000, top5_acc: 1.0000, loss_cls: 0.8704, loss: 0.8704
2022-03-24 23:14:41,288 - mmaction - INFO - Epoch [5][80/109]	lr: 8.346e-05, eta: 0:12:26, time: 0.693, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:15:35,613 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:15:35,615 - mmaction - INFO - 
top1_acc	0.8475
top5_acc	1.0000
2022-03-24 23:15:35,615 - mmaction - INFO - Evaluating mean_class_accuracy ...
2022-03-24 23:15:35,616 - mmaction - INFO - 
mean_acc	0.8312
2022-03-24 23:15:36,533 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_5.pth.
2022-03-24 23:15:36,534 - mmaction - INFO - Best top1_acc is 0.8475 at 5 epoch.
2022-03-24 23:15:36,534 - mmaction - INFO - Epoch(val) [5][59]	top1_acc: 0.8475, top5_acc: 1.0000, mean_class_accuracy: 0.8312


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:16:23,660 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:16:23,661 - mmaction - INFO - 
top1_acc	0.8475
top5_acc	1.0000
2022-03-24 23:16:23,663 - mmaction - INFO - Epoch(val) [5][30]	top1_acc: 0.8475, top5_acc: 1.0000, loss_cls: 0.4813, loss: 0.4813
2022-03-24 23:16:40,909 - mmaction - INFO - Epoch [6][20/109]	lr: 7.500e-05, eta: 0:11:50, time: 0.862, data_time: 0.182, memory: 2835, top1_acc: 0.7500, top5_acc: 1.0000, loss_cls: 0.5844, loss: 0.5844
2022-03-24 23:16:54,520 - mmaction - INFO - Epoch [6][40/109]	lr: 7.500e-05, eta: 0:11:38, time: 0.681, data_time: 0.000, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.5667, loss: 0.5667
2022-03-24 23:17:08,206 - mmaction - INFO - Epoch [6][60/109]	lr: 7.500e-05, eta: 0:11:25, time: 0.684, data_time: 0.000, memory: 2835, top1_acc: 0.7500, top5_acc: 1.0000, loss_cls: 0.5947, loss: 0.5947
2022-03-24 23:17:21,963 - mmaction - INFO - Epoch [6][80/109]	lr: 7.500e-05, eta: 0:11:12, time: 0.688, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:18:30,684 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:18:30,686 - mmaction - INFO - 
top1_acc	0.9153
top5_acc	1.0000
2022-03-24 23:18:30,688 - mmaction - INFO - Epoch(val) [6][30]	top1_acc: 0.9153, top5_acc: 1.0000, loss_cls: 0.1971, loss: 0.1971
2022-03-24 23:18:48,611 - mmaction - INFO - Epoch [7][20/109]	lr: 6.545e-05, eta: 0:10:39, time: 0.896, data_time: 0.210, memory: 2835, top1_acc: 0.7250, top5_acc: 1.0000, loss_cls: 0.6347, loss: 0.6347
2022-03-24 23:19:02,340 - mmaction - INFO - Epoch [7][40/109]	lr: 6.545e-05, eta: 0:10:26, time: 0.686, data_time: 0.000, memory: 2835, top1_acc: 0.7500, top5_acc: 1.0000, loss_cls: 0.5885, loss: 0.5885
2022-03-24 23:19:16,114 - mmaction - INFO - Epoch [7][60/109]	lr: 6.545e-05, eta: 0:10:13, time: 0.689, data_time: 0.000, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.5423, loss: 0.5423
2022-03-24 23:19:29,947 - mmaction - INFO - Epoch [7][80/109]	lr: 6.545e-05, eta: 0:10:00, time: 0.692, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.7 task/s, elapsed: 34s, ETA:     0s

2022-03-24 23:20:38,222 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:20:38,223 - mmaction - INFO - 
top1_acc	0.8983
top5_acc	1.0000
2022-03-24 23:20:38,225 - mmaction - INFO - Epoch(val) [7][30]	top1_acc: 0.8983, top5_acc: 1.0000, loss_cls: 0.2036, loss: 0.2036
2022-03-24 23:20:55,912 - mmaction - INFO - Epoch [8][20/109]	lr: 5.523e-05, eta: 0:09:27, time: 0.884, data_time: 0.200, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.3315, loss: 0.3315
2022-03-24 23:21:09,611 - mmaction - INFO - Epoch [8][40/109]	lr: 5.523e-05, eta: 0:09:14, time: 0.685, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.3004, loss: 0.3004
2022-03-24 23:21:23,389 - mmaction - INFO - Epoch [8][60/109]	lr: 5.523e-05, eta: 0:09:01, time: 0.689, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.4280, loss: 0.4280
2022-03-24 23:21:37,199 - mmaction - INFO - Epoch [8][80/109]	lr: 5.523e-05, eta: 0:08:48, time: 0.690, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 34s, ETA:     0s

2022-03-24 23:22:46,776 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:22:46,778 - mmaction - INFO - 
top1_acc	0.9153
top5_acc	1.0000
2022-03-24 23:22:46,780 - mmaction - INFO - Epoch(val) [8][30]	top1_acc: 0.9153, top5_acc: 1.0000, loss_cls: 0.1768, loss: 0.1768
2022-03-24 23:23:03,873 - mmaction - INFO - Epoch [9][20/109]	lr: 4.477e-05, eta: 0:08:14, time: 0.854, data_time: 0.166, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.3692, loss: 0.3692
2022-03-24 23:23:17,622 - mmaction - INFO - Epoch [9][40/109]	lr: 4.477e-05, eta: 0:08:01, time: 0.687, data_time: 0.001, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.4583, loss: 0.4583
2022-03-24 23:23:31,429 - mmaction - INFO - Epoch [9][60/109]	lr: 4.477e-05, eta: 0:07:48, time: 0.690, data_time: 0.001, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.2753, loss: 0.2753
2022-03-24 23:23:45,287 - mmaction - INFO - Epoch [9][80/109]	lr: 4.477e-05, eta: 0:07:35, time: 0.693, data_t

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.7 task/s, elapsed: 34s, ETA:     0s

2022-03-24 23:24:54,799 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:24:54,801 - mmaction - INFO - 
top1_acc	0.8983
top5_acc	1.0000
2022-03-24 23:24:54,803 - mmaction - INFO - Epoch(val) [9][30]	top1_acc: 0.8983, top5_acc: 1.0000, loss_cls: 0.1821, loss: 0.1821
2022-03-24 23:25:11,826 - mmaction - INFO - Epoch [10][20/109]	lr: 3.455e-05, eta: 0:07:01, time: 0.851, data_time: 0.167, memory: 2835, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.4397, loss: 0.4397
2022-03-24 23:25:25,534 - mmaction - INFO - Epoch [10][40/109]	lr: 3.455e-05, eta: 0:06:48, time: 0.685, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.2523, loss: 0.2523
2022-03-24 23:25:39,311 - mmaction - INFO - Epoch [10][60/109]	lr: 3.455e-05, eta: 0:06:35, time: 0.689, data_time: 0.000, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.3913, loss: 0.3913
2022-03-24 23:25:53,138 - mmaction - INFO - Epoch [10][80/109]	lr: 3.455e-05, eta: 0:06:22, time: 0.691, da

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 34s, ETA:     0s

2022-03-24 23:26:47,479 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:26:47,481 - mmaction - INFO - 
top1_acc	0.9322
top5_acc	1.0000
2022-03-24 23:26:47,481 - mmaction - INFO - Evaluating mean_class_accuracy ...
2022-03-24 23:26:47,482 - mmaction - INFO - 
mean_acc	0.9288
2022-03-24 23:26:47,519 - mmaction - INFO - The previous best checkpoint /workspace/Video-Swin-Transformer/work_dirs/k400_swin_tiny_patch244_window877.py/best_top1_acc_epoch_5.pth was removed
2022-03-24 23:26:48,431 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_10.pth.
2022-03-24 23:26:48,432 - mmaction - INFO - Best top1_acc is 0.9322 at 10 epoch.
2022-03-24 23:26:48,433 - mmaction - INFO - Epoch(val) [10][59]	top1_acc: 0.9322, top5_acc: 1.0000, mean_class_accuracy: 0.9288


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:27:35,887 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:27:35,888 - mmaction - INFO - 
top1_acc	0.9322
top5_acc	1.0000
2022-03-24 23:27:35,891 - mmaction - INFO - Epoch(val) [10][30]	top1_acc: 0.9322, top5_acc: 1.0000, loss_cls: 0.1326, loss: 0.1326
2022-03-24 23:27:52,762 - mmaction - INFO - Epoch [11][20/109]	lr: 2.500e-05, eta: 0:05:48, time: 0.843, data_time: 0.162, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.2796, loss: 0.2796
2022-03-24 23:28:06,448 - mmaction - INFO - Epoch [11][40/109]	lr: 2.500e-05, eta: 0:05:35, time: 0.684, data_time: 0.000, memory: 2835, top1_acc: 0.8250, top5_acc: 1.0000, loss_cls: 0.3131, loss: 0.3131
2022-03-24 23:28:20,198 - mmaction - INFO - Epoch [11][60/109]	lr: 2.500e-05, eta: 0:05:22, time: 0.687, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.2693, loss: 0.2693
2022-03-24 23:28:34,025 - mmaction - INFO - Epoch [11][80/109]	lr: 2.500e-05, eta: 0:05:09, time: 0.691, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:29:42,949 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:29:42,950 - mmaction - INFO - 
top1_acc	0.9661
top5_acc	1.0000
2022-03-24 23:29:42,952 - mmaction - INFO - Epoch(val) [11][30]	top1_acc: 0.9661, top5_acc: 1.0000, loss_cls: 0.0889, loss: 0.0889
2022-03-24 23:30:00,463 - mmaction - INFO - Epoch [12][20/109]	lr: 1.654e-05, eta: 0:04:36, time: 0.875, data_time: 0.189, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.2698, loss: 0.2698
2022-03-24 23:30:14,227 - mmaction - INFO - Epoch [12][40/109]	lr: 1.654e-05, eta: 0:04:23, time: 0.688, data_time: 0.000, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.3326, loss: 0.3326
2022-03-24 23:30:28,051 - mmaction - INFO - Epoch [12][60/109]	lr: 1.654e-05, eta: 0:04:10, time: 0.691, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.2399, loss: 0.2399
2022-03-24 23:30:41,925 - mmaction - INFO - Epoch [12][80/109]	lr: 1.654e-05, eta: 0:03:57, time: 0.694, d

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


2022-03-24 23:30:55,734 - mmaction - INFO - Epoch [12][100/109]	lr: 1.654e-05, eta: 0:03:43, time: 0.690, data_time: 0.000, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.2185, loss: 0.2185
2022-03-24 23:31:01,660 - mmaction - INFO - Saving checkpoint at 12 epochs


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 34s, ETA:     0s

2022-03-24 23:31:50,432 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:31:50,433 - mmaction - INFO - 
top1_acc	0.9492
top5_acc	1.0000
2022-03-24 23:31:50,435 - mmaction - INFO - Epoch(val) [12][30]	top1_acc: 0.9492, top5_acc: 1.0000, loss_cls: 0.1345, loss: 0.1345
2022-03-24 23:32:07,462 - mmaction - INFO - Epoch [13][20/109]	lr: 9.549e-06, eta: 0:03:24, time: 0.851, data_time: 0.168, memory: 2835, top1_acc: 0.8500, top5_acc: 1.0000, loss_cls: 0.4177, loss: 0.4177
2022-03-24 23:32:21,170 - mmaction - INFO - Epoch [13][40/109]	lr: 9.549e-06, eta: 0:03:10, time: 0.685, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.2952, loss: 0.2952
2022-03-24 23:32:34,957 - mmaction - INFO - Epoch [13][60/109]	lr: 9.549e-06, eta: 0:02:57, time: 0.689, data_time: 0.000, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.2410, loss: 0.2410
2022-03-24 23:32:48,776 - mmaction - INFO - Epoch [13][80/109]	lr: 9.549e-06, eta: 0:02:44, time: 0.691, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:33:56,526 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:33:56,527 - mmaction - INFO - 
top1_acc	0.9322
top5_acc	1.0000
2022-03-24 23:33:56,529 - mmaction - INFO - Epoch(val) [13][30]	top1_acc: 0.9322, top5_acc: 1.0000, loss_cls: 0.1433, loss: 0.1433
2022-03-24 23:34:13,961 - mmaction - INFO - Epoch [14][20/109]	lr: 4.323e-06, eta: 0:02:11, time: 0.871, data_time: 0.187, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.3640, loss: 0.3640
2022-03-24 23:34:27,651 - mmaction - INFO - Epoch [14][40/109]	lr: 4.323e-06, eta: 0:01:58, time: 0.684, data_time: 0.000, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.2117, loss: 0.2117
2022-03-24 23:34:41,424 - mmaction - INFO - Epoch [14][60/109]	lr: 4.323e-06, eta: 0:01:45, time: 0.689, data_time: 0.000, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.1856, loss: 0.1856
2022-03-24 23:34:55,274 - mmaction - INFO - Epoch [14][80/109]	lr: 4.323e-06, eta: 0:01:31, time: 0.692, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:36:03,627 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:36:03,628 - mmaction - INFO - 
top1_acc	0.9153
top5_acc	1.0000
2022-03-24 23:36:03,630 - mmaction - INFO - Epoch(val) [14][30]	top1_acc: 0.9153, top5_acc: 1.0000, loss_cls: 0.1310, loss: 0.1310
2022-03-24 23:36:20,457 - mmaction - INFO - Epoch [15][20/109]	lr: 1.093e-06, eta: 0:00:59, time: 0.841, data_time: 0.158, memory: 2835, top1_acc: 0.9000, top5_acc: 1.0000, loss_cls: 0.2719, loss: 0.2719
2022-03-24 23:36:34,146 - mmaction - INFO - Epoch [15][40/109]	lr: 1.093e-06, eta: 0:00:45, time: 0.684, data_time: 0.000, memory: 2835, top1_acc: 0.8750, top5_acc: 1.0000, loss_cls: 0.2793, loss: 0.2793
2022-03-24 23:36:47,940 - mmaction - INFO - Epoch [15][60/109]	lr: 1.093e-06, eta: 0:00:32, time: 0.690, data_time: 0.001, memory: 2835, top1_acc: 0.9250, top5_acc: 1.0000, loss_cls: 0.2623, loss: 0.2623
2022-03-24 23:37:01,786 - mmaction - INFO - Epoch [15][80/109]	lr: 1.093e-06, eta: 0:00:19, time: 0.692, d

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:37:55,510 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:37:55,511 - mmaction - INFO - 
top1_acc	0.9322
top5_acc	1.0000
2022-03-24 23:37:55,512 - mmaction - INFO - Evaluating mean_class_accuracy ...
2022-03-24 23:37:55,513 - mmaction - INFO - 
mean_acc	0.9255
2022-03-24 23:37:55,514 - mmaction - INFO - Epoch(val) [15][59]	top1_acc: 0.9322, top5_acc: 1.0000, mean_class_accuracy: 0.9255


[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 59/59, 1.8 task/s, elapsed: 33s, ETA:     0s

2022-03-24 23:38:43,033 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-03-24 23:38:43,034 - mmaction - INFO - 
top1_acc	0.9322
top5_acc	1.0000
2022-03-24 23:38:43,036 - mmaction - INFO - Epoch(val) [15][30]	top1_acc: 0.9322, top5_acc: 1.0000, loss_cls: 0.1321, loss: 0.1321


In [28]:
wandb.finish()




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
lr,███▇▇▆▆▅▄▃▃▂▂▁▁
top1_acc,▁▂▆▆▇▇▇▇▇████▇█
top5_acc,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
lr,0.0
top1_acc,0.9322
top5_acc,1.0


## Unused

In [29]:
# # init distributed env first, since logger depends on the dist info.
# if args.launcher == 'none':
#     distributed = False
# else:
#     distributed = True
#     init_dist(args.launcher, **cfg.dist_params)
#     _, world_size = get_dist_info()
#     cfg.gpu_ids = range(world_size)