In [10]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMAction2 installation
import mmaction
print(mmaction.__version__)

# Check MMCV installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

1.7.0 True
0.14.0
11.2
GCC 7.5


### CONFIG MMACTION2
1. Option (RECOMMENDED): create a customconfig.py file 
2. Option: modify other config and train from script (see folder MMACTION2/TESTS/mmaction2_tutorial)

In [11]:
# mmaction2 dir
mmadir="/home/administrator/Z/Algorithms/mmaction2/"

from mmcv import Config

root= mmadir+"data/" #default: "data/"
dataset="kinetics400_tiny_v2"
dataset_type = 'VideoDataset'
if dataset_type=='RawframeDataset': 
    dtype_name="rawframes"
    cfg = Config.fromfile(mmadir+'/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py')
if dataset_type=='VideoDataset': 
    dtype_name="videos"
    cfg = Config.fromfile(mmadir+'/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py')

In [12]:
# Modify dataset type and path
cfg.dataset_type = dataset_type
cfg.data_root =  root+dataset+'/'+dtype_name+'_train'
cfg.data_root_val = root+dataset+'/'+dtype_name+'_val'
cfg.ann_file_train =  root+dataset+'/'+dataset+'_train_'+dtype_name+'.txt'
cfg.ann_file_val = root+dataset+'/'+dataset+'_val_'+dtype_name+'.txt'
cfg.ann_file_test = root+dataset+'/'+dataset+'_val_'+dtype_name+'.txt'

cfg.data.test.type = dataset_type
cfg.data.test.ann_file = root+dataset+'/'+dataset+'_val_'+dtype_name+'.txt'
cfg.data.test.data_prefix = cfg.data_root_val 

cfg.data.train.type = dataset_type
cfg.data.train.ann_file = root+dataset+'/'+dataset+'_train_'+dtype_name+'.txt'
cfg.data.train.data_prefix = cfg.data_root

cfg.data.val.type = dataset_type
cfg.data.val.ann_file = root+dataset+'/'+dataset+'_val_'+dtype_name+'.txt'
cfg.data.val.data_prefix = cfg.data_root_val 

# The flag is used to determine whether it is omnisource training
cfg.setdefault('omnisource', False)
# Modify num classes of the model in cls_head
cfg.model.cls_head.num_classes = 2
# We can use the pre-trained TSN model
cfg.load_from = 'https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/mmaction-v1/recognition/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'

# Set up working dir to save files and logs.
work_dir_root= '/home/administrator/Z/Work/EASYRIDE/P19/NC/mmaction2/TESTS/train/' #defualt: './'
cfg.work_dir = work_dir_root+'work_dirs/tsn_kinetics_pretrained_r50_1x1x3_50e_k400tiny_rgb/'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
# we also only use 2 videos per gpu
cfg.data.videos_per_gpu = cfg.data.videos_per_gpu // 16
cfg.optimizer.lr = cfg.optimizer.lr / 8 / 16
cfg.total_epochs = 30

# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 10
# We can set the log print interval to reduce the the times of printing log
cfg.log_config.interval = 5

# Set seed thus the results are more reproducible
cfg.seed = 0
from mmcv.runner import set_random_seed
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

Config:
model = dict(
    type='Recognizer2D',
    backbone=dict(
        type='ResNet',
        pretrained='torchvision://resnet50',
        depth=50,
        norm_eval=False),
    cls_head=dict(
        type='TSNHead',
        num_classes=2,
        in_channels=2048,
        spatial_type='avg',
        consensus=dict(type='AvgConsensus', dim=1),
        dropout_ratio=0.4,
        init_std=0.01),
    train_cfg=None,
    test_cfg=dict(average_clips=None))
optimizer = dict(type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
lr_config = dict(policy='step', step=[40, 80])
total_epochs = 30
checkpoint_config = dict(interval=10)
log_config = dict(interval=5, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = 'https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/mmaction-v1/recognition/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_2020

### Train a new recognizer

Finally, lets initialize the dataset and recognizer, then train a new recognizer!

In [13]:
import os.path as osp

from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.apis import train_model

import mmcv

# Build the dataset
datasets = [build_dataset(cfg.data.train)]

# Build the recognizer
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_model(model, datasets, cfg, distributed=False, validate=True)

Use load_from_torchvision loader


2021-08-05 18:13:34,489 - mmaction - INFO - These parameters in pretrained checkpoint are not loaded: {'fc.weight', 'fc.bias'}
2021-08-05 18:13:36,788 - mmaction - INFO - load checkpoint from https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/mmaction-v1/recognition/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth
2021-08-05 18:13:36,789 - mmaction - INFO - Use load_from_http loader

size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([2, 2048]).
size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([2]).
2021-08-05 18:13:36,876 - mmaction - INFO - Start running, host: administrator@administrator, work_dir: /home/administrator/Z/Work/EASYRIDE/P19/NC/mmaction2/TESTS/train/work_dirs/tsn_kinetics_pretrained_r50_1x1x3_50e_k400tiny_rgb
2021-08-05 18:13:36

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 9.7 task/s, elapsed: 1s, ETA:     0s

2021-08-05 18:13:59,699 - mmaction - INFO - Evaluating top_k_accuracy ...
2021-08-05 18:13:59,700 - mmaction - INFO - 
top1_acc	0.8000
top5_acc	1.0000
2021-08-05 18:13:59,700 - mmaction - INFO - Evaluating mean_class_accuracy ...
2021-08-05 18:13:59,701 - mmaction - INFO - 
mean_acc	0.8000
2021-08-05 18:14:00,238 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_5.pth.
2021-08-05 18:14:00,239 - mmaction - INFO - Best top1_acc is 0.8000 at 5 epoch.
2021-08-05 18:14:00,240 - mmaction - INFO - Epoch(val) [5][5]	top1_acc: 0.8000, top5_acc: 1.0000, mean_class_accuracy: 0.8000
2021-08-05 18:14:03,332 - mmaction - INFO - Epoch [6][5/15]	lr: 7.813e-05, eta: 0:01:52, time: 0.618, data_time: 0.506, memory: 1620, top1_acc: 0.8000, top5_acc: 1.0000, loss_cls: 0.5897, loss: 0.5897, grad_norm: 11.0825
2021-08-05 18:14:03,880 - mmaction - INFO - Epoch [6][10/15]	lr: 7.813e-05, eta: 0:01:46, time: 0.110, data_time: 0.002, memory: 1620, top1_acc: 0.6000, top5_acc: 1.0000, loss_cls

OSError: [Errno 95] Operation not supported: 'epoch_10.pth' -> '/home/administrator/Z/Work/EASYRIDE/P19/NC/mmaction2/TESTS/train/work_dirs/tsn_kinetics_pretrained_r50_1x1x3_50e_k400tiny_rgb/latest.pth'

## Test the trained recognizer

After finetuning the recognizer, let's check the prediction results!

In [None]:
from mmaction.apis import single_gpu_test
from mmaction.datasets import build_dataloader
from mmcv.parallel import MMDataParallel

# Build a test dataloader
dataset = build_dataset(cfg.data.test, dict(test_mode=True))
data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)
model = MMDataParallel(model, device_ids=[0])
outputs = single_gpu_test(model, data_loader)

eval_config = cfg.evaluation
eval_config.pop('interval')
eval_res = dataset.evaluate(outputs, **eval_config)
for name, val in eval_res.items():
    print(f'{name}: {val:.04f}')