###Perform Inference with a MMAction2 recognizer   
###Train a new recognizer with new dataset

In [None]:
# install dependencies: (use cu111 because colab has CUDA 11.1)
!pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html

# install mmcv-full thus we could use CUDA operators
!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html

# Install mmaction2
!rm -rf mmaction2
!git clone https://github.com/open-mmlab/mmaction2.git
%cd mmaction2

!pip install -e .

# Install some optional requirements
!pip install -r requirements/optional.txt

In [None]:
import torch, torchvision
import mmaction
from mmcv.ops import get_compiling_cuda_version, get_compiler_version

In [None]:
!mkdir checkpoints
!wget -c https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x8_100e_kinetics400_rgb/tsm_r50_video_1x1x8_100e_kinetics400_rgb_20200702-a77f4328.pth \
      -O checkpoints/tsm_r50_video_1x1x8_100e_kinetics400_rgb_20200702-a77f4328.pth

In [None]:
!mkdir checkpoints
!wget -c https://download.openmmlab.com/mmaction/recognition/tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb_20200810-4a146a70.pth \
      -O checkpoints/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb_20200810-4a146a70.pth

In [None]:
from mmaction.apis import inference_recognizer, init_recognizer

# Choose to use a config and initialize the recognizer
config = 'configs/recognition/tin/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb.py'
# Setup a checkpoint file to load
checkpoint = 'checkpoints/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb_20200810-4a146a70.pth'
# checkpoint = 'tutorial_exps/epoch_10.pth'
# Initialize the recognizer
model = init_recognizer(config, checkpoint, device='cuda:0') # pre-trained model

load checkpoint from local path: checkpoints/tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb_20200810-4a146a70.pth


In [None]:
from google.colab import drive
drive.mount('/content/drive')
# drive/MyDrive/IEMS5910/rgbDataset

Mounted at /content/drive


In [None]:
!mkdir checkpoints
!cp ../drive/MyDrive/IEMS5910/tsm_r50_340x256_1x1x16_50e_kinetics400_rgb_20201011-2f27f229.pth checkpoints/

In [None]:
!dir ../drive/MyDrive/IEMS5910/grayDataset

train  trainVideo.txt  val  valVideo.txt


In [None]:
!rm -r rgbDataset

In [None]:
!cp -r ../drive/MyDrive/IEMS5910/grayEvent50 rgbDataset

In [None]:
model

In [None]:
video = 'rgbDataset/train/V08a4.avi' # fps 25

label = 'tools/data/kinetics/label_map_k400.txt'
results = inference_recognizer(model, video) # model is Recognizer2D

labels = open(label).readlines()
labels = [x.strip() for x in labels]
results = [(labels[k[0]], k[1]) for k in results]

In [None]:
for result in results:
    print(f'{result[0]}: ', result[1])

In [None]:
!apt-get -q install tree
!tree rgbDataset

In [None]:
!cat rgbDataset/trainVideo.txt

In [None]:
!rm ./configs/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py
!cp ../drive/MyDrive/IEMS5910/tsm_r50_1x1x16_50e_kinetics400_rgb.py ./configs/recognition/tsm

In [None]:
# train_pipeline, test_pipeline, val_pipeline
# dict(type='DecordInit'),
# dict(type='DecordInit'),
from mmcv import Config
cfg = Config.fromfile('./configs/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py')

In [None]:
from mmcv.runner import set_random_seed

# Modify dataset type and path
cfg.dataset_type = 'VideoDataset'
cfg.data_root = 'rgbDataset/train'
cfg.data_root_val = 'rgbDataset/val'
cfg.ann_file_train = 'rgbDataset/trainVideo.txt'
cfg.ann_file_val = 'rgbDataset/valVideo.txt'
cfg.ann_file_test = 'rgbDataset/valVideo.txt'

cfg.data.test.type = 'VideoDataset'
cfg.data.test.ann_file = 'rgbDataset/valVideo.txt'
cfg.data.test.data_prefix = 'rgbDataset/val'

cfg.data.train.type = 'VideoDataset'
cfg.data.train.ann_file = 'rgbDataset/trainVideo.txt'
cfg.data.train.data_prefix = 'rgbDataset/train'

cfg.data.val.type = 'VideoDataset'
cfg.data.val.ann_file = 'rgbDataset/valVideo.txt'
cfg.data.val.data_prefix = 'rgbDataset/val'

# The flag is used to determine whether it is omnisource training
cfg.setdefault('omnisource', False)
# Modify num classes of the model in cls_head
cfg.model.cls_head.num_classes = 31
# We can use the pre-trained TSN model
cfg.load_from = './checkpoints/tsm_r50_340x256_1x1x16_50e_kinetics400_rgb_20201011-2f27f229.pth'

# Set up working dir to save files and logs.
cfg.work_dir = './tutorial_exps'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
# CUDA out of memory when cfg.data.videos_per_gpu = 6 
# cfg.data.videos_per_gpu = 8 originally
cfg.data.videos_per_gpu = cfg.data.videos_per_gpu 
cfg.optimizer.lr = cfg.optimizer.lr / 8
cfg.total_epochs = 20

# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 5
# We can set the log print interval to reduce the the times of printing log
cfg.log_config.interval = 5

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

# Save the best
cfg.evaluation.save_best='auto'


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

Config:
model = dict(
    type='Recognizer2D',
    backbone=dict(
        type='ResNetTSM',
        pretrained='torchvision://resnet50',
        depth=50,
        norm_eval=False,
        shift_div=8,
        num_segments=16),
    cls_head=dict(
        type='TSMHead',
        num_classes=31,
        in_channels=2048,
        spatial_type='avg',
        consensus=dict(type='AvgConsensus', dim=1),
        dropout_ratio=0.5,
        init_std=0.001,
        is_shift=True,
        num_segments=16),
    train_cfg=None,
    test_cfg=dict(average_clips='prob'))
optimizer = dict(
    type='SGD',
    constructor='TSMOptimizerConstructor',
    paramwise_cfg=dict(fc_lr5=True),
    lr=0.0009375,
    momentum=0.9,
    weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=20, norm_type=2))
lr_config = dict(policy='step', step=[20, 40])
total_epochs = 20
checkpoint_config = dict(interval=5)
log_config = dict(interval=5, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='n

In [None]:
from mmaction.apis import inference_recognizer, init_recognizer
# checkpoint = 'tutorial_exps/best_top1_acc_epoch_15.pth'
checkpoint = '../drive/MyDrive/IEMS5910/logs_ouput/grayE50_tsm16/best_top1_acc_epoch_10.pth'
# checkpoint = '../drive/MyDrive/IEMS5910/logs_ouput/grayE_tsm16/best_top1_acc_epoch_10.pth'
# checkpoint = '../drive/MyDrive/IEMS5910/logs_ouput/gray_tsm16_10train3val/best_top1_acc_epoch_10.pth'
# checkpoint = '../drive/MyDrive/IEMS5910/logs_ouput/grayE10_tsm16/best_top1_acc_epoch_15.pth'
model = init_recognizer(cfg, checkpoint, device='cuda:0') # pre-trained model

load checkpoint from local path: ../drive/MyDrive/IEMS5910/logs_ouput/grayE50_tsm16/best_top1_acc_epoch_10.pth


In [None]:
inference_recognizer(model, "rgbDataset/val/V02a30.avi")
# a04,a05,a19,a20,a21,a23,a26,a30

False

In [None]:
label = '../drive/MyDrive/IEMS5910/idAction.txt'
labels = open(label).readlines()
labels = [x.strip().split('\t') for x in labels][1:]

In [None]:
import os
top1 = 0
top5 = 0
total = 0
for p,d,f in os.walk("rgbDataset/val"):
  for fn in f:
    if fn.endswith('.avi'):
      total += 1
      predSet = inference_recognizer(model,os.path.join(p,fn))
      pred = predSet[0][0] + 1
      predSet = [i[0]+1 for i in predSet]
      groundtruth = int(fn[fn.find('a')+1:fn.find('.')])
      # action = labels[groundtruth-1][1]
      if (groundtruth == pred):
        top1 += 1
      if (groundtruth in predSet):
        top5 += 1

# First we need to load correct dataset

# print('top1 accuracy in val event_tsm16: ',top1/total) # 0.4946
# print('top5 accuracy in val event_tsm16: ',top5/total) # 0.7957
# print('top1 accuracy in train event_tsm16: ',top1/total) # 0.6387
# print('top5 accuracy in train event_tsm16: ',top5/total) # 0.9354

# print('top1 accuracy in val gray_tsm16: ',top1/total) # 0.5269
# print('top5 accuracy in val gray_tsm16: ',top5/total) # 0.8817
# print('top1 accuracy in train gray_tsm16: ',top1/total) # 0.8742
# print('top5 accuracy in train gray_tsm16: ',top5/total) # 0.9935

# print('top1 accuracy in val grayE10_tsm16: ',top1/total) # 0.4409
# print('top5 accuracy in val grayE10_tsm16: ',top5/total) # 0.8925
# print('top1 accuracy in train grayE10_tsm16: ',top1/total) # 0.8839
# print('top5 accuracy in train grayE10_tsm16: ',top5/total) # 0.9903

# print('top1 accuracy in val grayE20_tsm16: ',top1/total) # 0.5484
# print('top5 accuracy in val grayE20_tsm16: ',top5/total) # 0.9032
# print('top1 accuracy in train grayE20_tsm16: ',top1/total) # 0.9194
# print('top5 accuracy in train grayE20_tsm16: ',top5/total) # 0.9935

# print('top1 accuracy in val grayE50_tsm16: ',top1/total) # 0.4946
# print('top5 accuracy in val grayE50_tsm16: ',top5/total) # 0.8817
# print('top1 accuracy in train grayE50_tsm16: ',top1/total) # 0.8484
# print('top5 accuracy in train grayE50_tsm16: ',top5/total) # 0.9903

# print('top1 accuracy in val grayE100_tsm16: ',top1/total) # 0.5699
# print('top5 accuracy in val grayE100_tsm16: ',top5/total) # 0.9032
# print('top1 accuracy in train grayE100_tsm16: ',top1/total) # 0.8194
# print('top5 accuracy in train grayE100_tsm16: ',top5/total) # 0.9871 

# top1 accuracy in val eflip_tsm16:  0.521505376344086
# top5 accuracy in val eflip_tsm16:  0.8387096774193549
# top1 accuracy in train eflip_tsm16:  0.7387096774193549
# top5 accuracy in train eflip_tsm16:  0.9725806451612903


top1 accuracy in train grayE50_tsm16:  0.8483870967741935
top5 accuracy in train grayE50_tsm16:  0.9903225806451613


In [None]:
'''
train_pipeline = [
    dict(type='DecordInit'),
    dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=8),
    dict(type='DecordDecode'),
    # dict(type='RawFrameDecode'),
    dict(type='Resize', scale=(-1, 256)),
''' 
import os.path as osp

from mmaction.datasets import build_dataset
from mmaction.models import build_model
from mmaction.apis import train_model

import mmcv

# Build the dataset
datasets = [build_dataset(cfg.data.train)]

# Build the recognizer
model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_model(model, datasets, cfg, distributed=False, validate=True)

2022-12-08 02:00:55,043 - mmaction - INFO - These parameters in pretrained checkpoint are not loaded: {'fc.bias', 'fc.weight'}
2022-12-08 02:00:55,097 - mmaction - INFO - load checkpoint from local path: ./checkpoints/tsm_r50_340x256_1x1x16_50e_kinetics400_rgb_20201011-2f27f229.pth


load checkpoint from torchvision path: torchvision://resnet50



size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([31, 2048]).
size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([31]).
2022-12-08 02:00:55,201 - mmaction - INFO - Start running, host: root@a9ffd3a0dfeb, work_dir: /content/mmaction2/tutorial_exps
2022-12-08 02:00:55,204 - mmaction - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH   ) StepLrUpdaterHook                  
(NORMAL      ) CheckpointHook                     
(LOW         ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 -------------------- 
before_train_epoch:
(VERY_HIGH   ) StepLrUpdaterHook                  
(LOW         ) IterTimerHook                      
(LOW         ) EvalHook                           
(VERY_LOW    ) TextLoggerHook                     
 ---

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 93/93, 6.5 task/s, elapsed: 14s, ETA:     0s

2022-12-08 02:07:11,671 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-12-08 02:07:11,676 - mmaction - INFO - 
top1_acc	0.1505
top5_acc	0.5376
2022-12-08 02:07:11,677 - mmaction - INFO - Evaluating mean_class_accuracy ...
2022-12-08 02:07:11,682 - mmaction - INFO - 
mean_acc	0.1505
2022-12-08 02:07:12,538 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_5.pth.
2022-12-08 02:07:12,540 - mmaction - INFO - Best top1_acc is 0.1505 at 5 epoch.
2022-12-08 02:07:12,547 - mmaction - INFO - Epoch(val) [5][16]	top1_acc: 0.1505, top5_acc: 0.5376, mean_class_accuracy: 0.1505
2022-12-08 02:07:22,520 - mmaction - INFO - Epoch [6][5/52]	lr: 9.375e-04, eta: 0:17:31, time: 1.993, data_time: 0.665, memory: 10390, top1_acc: 0.2667, top5_acc: 0.5667, loss_cls: 2.4371, loss: 2.4371, grad_norm: 12.3360
2022-12-08 02:07:29,258 - mmaction - INFO - Epoch [6][10/52]	lr: 9.375e-04, eta: 0:17:24, time: 1.348, data_time: 0.009, memory: 10390, top1_acc: 0.4000, top5_acc: 0.7000, loss_

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 93/93, 7.1 task/s, elapsed: 13s, ETA:     0s

2022-12-08 02:13:31,383 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-12-08 02:13:31,390 - mmaction - INFO - 
top1_acc	0.2366
top5_acc	0.6559
2022-12-08 02:13:31,391 - mmaction - INFO - Evaluating mean_class_accuracy ...
2022-12-08 02:13:31,393 - mmaction - INFO - 
mean_acc	0.2366
2022-12-08 02:13:31,427 - mmaction - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_top1_acc_epoch_5.pth was removed
2022-12-08 02:13:32,277 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_10.pth.
2022-12-08 02:13:32,290 - mmaction - INFO - Best top1_acc is 0.2366 at 10 epoch.
2022-12-08 02:13:32,292 - mmaction - INFO - Epoch(val) [10][16]	top1_acc: 0.2366, top5_acc: 0.6559, mean_class_accuracy: 0.2366
2022-12-08 02:13:42,075 - mmaction - INFO - Epoch [11][5/52]	lr: 9.375e-04, eta: 0:11:38, time: 1.952, data_time: 0.628, memory: 10390, top1_acc: 0.3000, top5_acc: 0.8000, loss_cls: 1.8005, loss: 1.8005, grad_norm: 14.3935
2022-12-08 02:13:48,749 - mma

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 93/93, 7.5 task/s, elapsed: 12s, ETA:     0s

2022-12-08 02:19:49,771 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-12-08 02:19:49,776 - mmaction - INFO - 
top1_acc	0.3118
top5_acc	0.6989
2022-12-08 02:19:49,780 - mmaction - INFO - Evaluating mean_class_accuracy ...
2022-12-08 02:19:49,782 - mmaction - INFO - 
mean_acc	0.3118
2022-12-08 02:19:49,830 - mmaction - INFO - The previous best checkpoint /content/mmaction2/tutorial_exps/best_top1_acc_epoch_10.pth was removed
2022-12-08 02:19:50,845 - mmaction - INFO - Now best checkpoint is saved as best_top1_acc_epoch_15.pth.
2022-12-08 02:19:50,847 - mmaction - INFO - Best top1_acc is 0.3118 at 15 epoch.
2022-12-08 02:19:50,853 - mmaction - INFO - Epoch(val) [15][16]	top1_acc: 0.3118, top5_acc: 0.6989, mean_class_accuracy: 0.3118
2022-12-08 02:20:00,907 - mmaction - INFO - Epoch [16][5/52]	lr: 9.375e-04, eta: 0:05:45, time: 2.009, data_time: 0.686, memory: 10390, top1_acc: 0.4667, top5_acc: 0.8000, loss_cls: 1.6685, loss: 1.6685, grad_norm: 16.2900
2022-12-08 02:20:07,611 - mm

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 93/93, 8.6 task/s, elapsed: 11s, ETA:     0s

2022-12-08 02:26:06,871 - mmaction - INFO - Evaluating top_k_accuracy ...
2022-12-08 02:26:06,877 - mmaction - INFO - 
top1_acc	0.3011
top5_acc	0.6559
2022-12-08 02:26:06,879 - mmaction - INFO - Evaluating mean_class_accuracy ...
2022-12-08 02:26:06,886 - mmaction - INFO - 
mean_acc	0.3011
2022-12-08 02:26:06,888 - mmaction - INFO - Epoch(val) [20][16]	top1_acc: 0.3011, top5_acc: 0.6559, mean_class_accuracy: 0.3011


In [None]:
!cp -r ./tutorial_exps ../drive/MyDrive/IEMS5910/logs_ouput/

In [None]:
from mmaction.apis import single_gpu_test
from mmaction.datasets import build_dataloader
from mmcv.parallel import MMDataParallel

# Build a test dataloader
dataset = build_dataset(cfg.data.test, dict(test_mode=True))
data_loader = build_dataloader(
        dataset,
        videos_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)
modelPL = MMDataParallel(model, device_ids=[0])
outputs = single_gpu_test(modelPL, data_loader)

eval_config = cfg.evaluation
try:
  eval_config.pop('interval')
except:
  pass
eval_res = dataset.evaluate(outputs, **eval_config)
for name, val in eval_res.items():
    print(f'{name}: {val:.04f}')

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
# model.backbone.conv1.conv = torch.nn.Conv2d(1,64,kernel_size=(7,7), stride=(2,2), padding=(3,3),bias=False)