In [1]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMAction2 installation
import mmaction
print(mmaction.__version__)

# Check MMCV installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

# check os platform
import platform
print('OS: {}'.format(platform.platform()))

1.7.0 True
0.15.0
11.3
MSVC 192930037
OS: Windows-10-10.0.22000-SP0


### CONFIG MMACTION2
1. Option (RECOMMENDED): create a customconfig.py file 
2. Option: modify other config and train from script (see folder MMACTION2/TESTS/mmaction2_tutorial)

#NOTE: VideoDataset working; RawframeDataset?

In [2]:
# mmaction2 dir
mmadir="C:/Users/nmc_costa/Desktop/Desk/mmaction2/" 
work_dir_root= mmadir
if str(platform.platform()).upper().find('linux'.upper())>-1: 
    mmadir="/home/administrator/Z/Algorithms/mmaction2/"
    work_dir_root= '/home/administrator/Z/Work/EASYRIDE/P19/NC/mmaction2/TESTS/train/' #defualt: './'


from mmcv import Config

root= mmadir+"data/" #default: "data/"
dataset="kinetics400_tiny_v2"
dataset_type = 'VideoDataset'
if dataset_type=='RawframeDataset': 
    dtype_name="rawframes"
    cfg = Config.fromfile(mmadir+'/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py')
if dataset_type=='VideoDataset': 
    dtype_name="videos"
    cfg = Config.fromfile(mmadir+'/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py')

In [3]:
# Modify dataset type and path
cfg.dataset_type = dataset_type
cfg.data_root =  root+dataset+'/'+dtype_name+'_train'
cfg.data_root_val = root+dataset+'/'+dtype_name+'_val'
cfg.ann_file_train =  root+dataset+'/'+dataset+'_train_'+dtype_name+'.txt'
cfg.ann_file_val = root+dataset+'/'+dataset+'_val_'+dtype_name+'.txt'
cfg.ann_file_test = root+dataset+'/'+dataset+'_val_'+dtype_name+'.txt'

cfg.data.test.type = dataset_type
cfg.data.test.ann_file = root+dataset+'/'+dataset+'_val_'+dtype_name+'.txt'
cfg.data.test.data_prefix = cfg.data_root_val 

cfg.data.train.type = dataset_type
cfg.data.train.ann_file = root+dataset+'/'+dataset+'_train_'+dtype_name+'.txt'
cfg.data.train.data_prefix = cfg.data_root

cfg.data.val.type = dataset_type
cfg.data.val.ann_file = root+dataset+'/'+dataset+'_val_'+dtype_name+'.txt'
cfg.data.val.data_prefix = cfg.data_root_val 

# The flag is used to determine whether it is omnisource training
cfg.setdefault('omnisource', False)
# Modify num classes of the model in cls_head
cfg.model.cls_head.num_classes = 2
# We can use the pre-trained TSN model
cfg.load_from = 'https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/mmaction-v1/recognition/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'

# Set up working dir to save files and logs.
cfg.work_dir = work_dir_root+'work_dirs/tsn_kinetics_pretrained_r50_1x1x3_50e_k400tiny_rgb/'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
# we also only use 2 videos per gpu
cfg.data.videos_per_gpu = cfg.data.videos_per_gpu // 16
cfg.optimizer.lr = cfg.optimizer.lr / 8 / 16
cfg.total_epochs = 30

# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 10
# We can set the log print interval to reduce the the times of printing log
cfg.log_config.interval = 5

# Set seed thus the results are more reproducible
cfg.seed = 0
from mmcv.runner import set_random_seed
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

Config:
model = dict(
    type='Recognizer2D',
    backbone=dict(
        type='ResNet',
        pretrained='torchvision://resnet50',
        depth=50,
        norm_eval=False),
    cls_head=dict(
        type='TSNHead',
        num_classes=2,
        in_channels=2048,
        spatial_type='avg',
        consensus=dict(type='AvgConsensus', dim=1),
        dropout_ratio=0.4,
        init_std=0.01),
    train_cfg=None,
    test_cfg=dict(average_clips=None))
optimizer = dict(type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
lr_config = dict(policy='step', step=[40, 80])
total_epochs = 30
checkpoint_config = dict(interval=10)
log_config = dict(interval=5, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = 'https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/mmaction-v1/recognition/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_2020

### Train a new recognizer

Finally, lets initialize the dataset and recognizer, then train a new recognizer!

In [4]:
import os.path as osp

from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.apis import train_model

import mmcv

# Build the dataset
datasets = [build_dataset(cfg.data.train)]

# Build the recognizer
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_model(model, datasets, cfg, distributed=False, validate=True)

Use load_from_torchvision loader


2021-09-20 16:19:47,945 - mmaction - INFO - These parameters in pretrained checkpoint are not loaded: {'fc.weight', 'fc.bias'}
2021-09-20 16:19:51,620 - mmaction - INFO - load checkpoint from https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/mmaction-v1/recognition/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth
2021-09-20 16:19:51,621 - mmaction - INFO - Use load_from_http loader

size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([2, 2048]).
size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([2]).
2021-09-20 16:19:51,772 - mmaction - INFO - Start running, host: nmc_costa@DESKTOP-C3GGF8U, work_dir: C:\Users\nmc_costa\Desktop\Desk\mmaction2\work_dirs\tsn_kinetics_pretrained_r50_1x1x3_50e_k400tiny_rgb
2021-09-20 16:19:51,772 - mmaction - INFO - 

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 1.7 task/s, elapsed: 6s, ETA:     0s

2021-09-20 16:20:51,939 - mmaction - INFO - Evaluating top_k_accuracy ...
2021-09-20 16:20:51,940 - mmaction - INFO - 
top1_acc	0.8000
top5_acc	1.0000
2021-09-20 16:20:51,941 - mmaction - INFO - Evaluating mean_class_accuracy ...


TypeError: y_real dtype must be np.int64, but got int32

## Test the trained recognizer

After finetuning the recognizer, let's check the prediction results!

In [5]:
from mmaction.apis import single_gpu_test
from mmaction.datasets import build_dataloader
from mmcv.parallel import MMDataParallel

# Build a test dataloader
dataset = build_dataset(cfg.data.test, dict(test_mode=True))
data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)
model = MMDataParallel(model, device_ids=[0])
outputs = single_gpu_test(model, data_loader)

eval_config = cfg.evaluation
eval_config.pop('interval')
eval_res = dataset.evaluate(outputs, **eval_config)
for name, val in eval_res.items():
    print(f'{name}: {val:.04f}')

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 10/10, 1.3 task/s, elapsed: 8s, ETA:     0s
Evaluating top_k_accuracy ...

top1_acc	0.8000
top5_acc	1.0000

Evaluating mean_class_accuracy ...


TypeError: y_real dtype must be np.int64, but got int32