# Data & Environment Preparation

## Load Pre-processed data


In [None]:
!git clone https://github.com/VinceChin/group_project_data.git

Cloning into 'group_project_data'...
remote: Enumerating objects: 36672, done.[K
remote: Counting objects: 100% (3561/3561), done.[K
remote: Compressing objects: 100% (3558/3558), done.[K
remote: Total 36672 (delta 4), reused 3147 (delta 3), pack-reused 33111[K
Receiving objects: 100% (36672/36672), 1.86 GiB | 35.28 MiB/s, done.
Resolving deltas: 100% (15/15), done.
Updating files: 100% (32104/32104), done.


## Import Python Packages

In [None]:
#install dependencies: given that my colab has CUDA 11.8
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
%pip install timm

Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting timm
  Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: timm
Successfully installed timm-0.9.12


In [None]:
import random
import pandas as pd

## Import mmaction2 & other packages needed

In [None]:
# check nvcc version
!nvcc -V
# check GCC version
!gcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
Copyright (C) 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [None]:
# install MMEngine, MMCV and MMDetection using MIM

%pip install -U openmim
!mim install mmengine
!mim install "mmcv>=2.0.0"

# Install mmaction2
!rm -rf mmaction2
!git clone https://github.com/open-mmlab/mmaction2.git -b main
%cd mmaction2

!pip install -e .

# Install some optional requirements
!pip install -r requirements/optional.txt

Collecting openmim
  Downloading openmim-0.3.9-py2.py3-none-any.whl (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.7/52.7 kB[0m [31m883.0 kB/s[0m eta [36m0:00:00[0m
Collecting colorama (from openmim)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting model-index (from openmim)
  Downloading model_index-0.1.11-py3-none-any.whl (34 kB)
Collecting opendatalab (from openmim)
  Downloading opendatalab-0.0.10-py3-none-any.whl (29 kB)
Collecting ordered-set (from model-index->openmim)
  Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)
Collecting pycryptodome (from opendatalab->openmim)
  Downloading pycryptodome-3.19.1-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting openxlab (from opendatalab->openmim)
  Downloading openxlab-0.0.33-py3-none-any.whl (299 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━

Looking in links: https://download.openmmlab.com/mmcv/dist/cu121/torch2.1.0/index.html
Collecting mmengine
  Downloading mmengine-0.10.2-py3-none-any.whl (450 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m450.4/450.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting addict (from mmengine)
  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)
Collecting yapf (from mmengine)
  Downloading yapf-0.40.2-py3-none-any.whl (254 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.7/254.7 kB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: addict, yapf, mmengine
Successfully installed addict-2.4.0 mmengine-0.10.2 yapf-0.40.2
Looking in links: https://download.openmmlab.com/mmcv/dist/cu121/torch2.1.0/index.html
Collecting mmcv>=2.0.0
  Downloading https://download.openmmlab.com/mmcv/dist/cu121/torch2.1.0/mmcv-2.1.0-cp310-cp310-manylinux1_x86_64.whl (94.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

## Check the result of Importing mmaction2

In [None]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMAction2 installation
import mmaction
print(mmaction.__version__)

# Check MMCV installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

# Check MMEngine installation
from mmengine.utils.dl_utils import collect_env
print(collect_env())

2.1.0+cu121 True
1.2.0
12.1
GCC 9.3
OrderedDict([('sys.platform', 'linux'), ('Python', '3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]'), ('CUDA available', True), ('numpy_random_seed', 2147483648), ('GPU 0', 'Tesla T4'), ('CUDA_HOME', '/usr/local/cuda'), ('NVCC', 'Cuda compilation tools, release 12.2, V12.2.140'), ('GCC', 'x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0'), ('PyTorch', '2.1.0+cu121'), ('PyTorch compiling details', 'PyTorch built with:\n  - GCC 9.3\n  - C++ Version: 201703\n  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - LAPACK is enabled (usually provided by MKL)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 12.1\n  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute

## Download pre trained model

In [None]:
!mkdir checkpoints
!wget -c https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
      -O checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth

--2024-01-10 10:58:59--  https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth
Resolving download.openmmlab.com (download.openmmlab.com)... 163.181.66.108, 163.181.66.111, 163.181.66.109, ...
Connecting to download.openmmlab.com (download.openmmlab.com)|163.181.66.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 97579339 (93M) [application/octet-stream]
Saving to: ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’


2024-01-10 10:59:11 (8.55 MB/s) - ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’ saved [97579339/97579339]



# Video Swin Transformer

## Self Custom Metric

In [None]:
from typing import Sequence, List

from mmengine.evaluator import BaseMetric
from mmengine.registry import METRICS
from sklearn.metrics import f1_score, recall_score, roc_auc_score, roc_curve

import numpy as np


@METRICS.register_module()  # registry
class MySelfCustomMetric(BaseMetric):
    default_prefix = 'SelfCustomMetric'  # set prefix showed in logs

    def process(self, data_batch: Sequence[dict], data_samples: Sequence[dict]):
        """Process one batch of data and predictions. The processed
        Results should be stored in `self.results`, which will be used
        to compute the metrics when all batches have been processed.

        Args:
            data_batch (Sequence[Tuple[Any, dict]]): A batch of data
                from the dataloader.
            data_samples (Sequence[dict]): A batch of outputs from
                the model.
        """
        data_samples = data_samples[0]

        result = {
            'pred': data_samples['pred_label'][0],
            'gt': data_samples['gt_label'][0],
            'pd_score': data_samples['pred_score'][1]
        }

        # save batch result in reuslt list
        self.results.append(result)

    def compute_metrics(self, results: List):
      print(results)

      preds = np.concatenate([np.expand_dims(res['pred'], axis=0) for res in results])
      gts = np.concatenate([np.expand_dims(res['gt'], axis=0) for res in results])
      pred_scores = np.concatenate([np.expand_dims(res['pd_score'], axis=0) for res in results])

      # accuracy
      acc = (preds == gts).sum() / preds.size

      # f1 score & recall
      f1 = f1_score(gts, preds, average='binary')
      recall = recall_score(gts, preds, average='binary')

      # auc & roc
      auc = roc_auc_score(gts, pred_scores)
      fpr, tpr, thresholds = roc_curve(gts, pred_scores)

      # return result
      return {
        'accuracy': acc,
        'f1_score': f1,
        'recall': recall,
        'auc': auc,
        'roc_curve': (fpr, tpr, thresholds)
      }

## Using Cropped Frames

In [None]:
from mmaction.apis import inference_recognizer, init_recognizer
from mmengine import Config
from mmengine.runner import set_random_seed

cfg = Config.fromfile('./configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32_5x1x3-110e_kinetics400-flow.py')

dataset_type = 'RawframeDataset'

# Modify dataset type and path
cfg.data_root = '../group_project_data/output_yolo3'
cfg.data_root_val = '../group_project_data/output_yolo3'
cfg.ann_file_train = './Annotations/train.txt'
cfg.ann_file_val = './Annotations/val.txt'
cfg.ann_file_test = './Annotations/test.txt'

# Modify num classes of the model in cls_head
cfg.model.cls_head.num_classes = 2
cfg.clip_len = 30
#modify the clip_len and num_clip

# We can use the pre-trained TSN model
cfg.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'

# Set up working dir to save files and logs.
cfg.work_dir = './tutorial_exps'

# modify the data_preprocessor
cfg.model.backbone.in_channels = 3
cfg.model.data_preprocessor.mean = [0.485, 0.456, 0.406]
cfg.model.data_preprocessor.std = [0.229, 0.224, 0.225]

#rewrite the pipline
train_pipeline = [
    dict(
        type='SampleFrames', clip_len=cfg.clip_len, frame_interval=1, num_clips=1),
    dict(type='RawFrameDecode', **cfg.file_client_args),
    dict(type='Resize', scale=(-1, 64)),
    dict(type='RandomResizedCrop'),
    dict(type='Resize', scale=(64, 64), keep_ratio=False),
    dict(type='Flip', flip_ratio=0.5),
    dict(type='FormatShape', input_format='NCHW'),
    dict(type='PackActionInputs')
]
val_pipeline = [
    dict(
        type='SampleFrames',
        clip_len=cfg.clip_len,
        frame_interval=1,
        test_mode=True),
    dict(type='RawFrameDecode', **cfg.file_client_args),
    dict(type='Resize', scale=(-1, 64)),
    dict(type='CenterCrop', crop_size=64),
    dict(type='FormatShape', input_format='NCHW'),
    dict(type='PackActionInputs')
]
test_pipeline = [
    dict(
        type='SampleFrames',
        clip_len=cfg.clip_len,
        frame_interval=1,
        test_mode=True),
    dict(type='RawFrameDecode'),
    dict(type='Resize', scale=(-1, 64)),
    dict(type='TenCrop', crop_size=64),
    dict(type='FormatShape', input_format='NCHW'),
    dict(type='PackActionInputs')
]
cfg.test_evaluator = [dict(type='MySelfCustomMetric')]
cfg.train_pipeline = train_pipeline
cfg.val_pipeline = val_pipeline
cfg.test_pipeline = test_pipeline

frame_modality = 'RGB'
filename_template = 'img_{:05d}.jpg'

cfg.test_dataloader.dataset.ann_file = cfg.ann_file_test
cfg.test_dataloader.dataset.data_prefix.img = cfg.data_root
cfg.test_dataloader.dataset.modality = frame_modality
cfg.test_dataloader.dataset.filename_tmpl = filename_template
cfg.test_dataloader.dataset.pipeline=test_pipeline

cfg.train_dataloader.dataset.ann_file = cfg.ann_file_train
cfg.train_dataloader.dataset.data_prefix.img = cfg.data_root_val
cfg.train_dataloader.dataset.modality = frame_modality
cfg.train_dataloader.dataset.filename_tmpl = filename_template
cfg.train_dataloader.dataset.pipeline=train_pipeline

cfg.val_dataloader.dataset.ann_file = cfg.ann_file_val
cfg.val_dataloader.dataset.data_prefix.img  = cfg.data_root_val
cfg.val_dataloader.dataset.modality = frame_modality
cfg.val_dataloader.dataset.filename_tmpl = filename_template
cfg.val_dataloader.dataset.pipeline=val_pipeline

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.train_dataloader.batch_size = cfg.train_dataloader.batch_size // 2
cfg.val_dataloader.batch_size = cfg.val_dataloader.batch_size // 2
cfg.optim_wrapper.optimizer.lr = cfg.optim_wrapper.optimizer.lr / 8 / 4
cfg.train_cfg.max_epochs = 10

cfg.train_dataloader.num_workers = 2
cfg.val_dataloader.num_workers = 2
cfg.test_dataloader.num_workers = 2

# We can initialize the logger for training and have a look
# at the final config used for training
# print(f'Config:\n{cfg.pretty_text}')

## Using Raw Frames

In [None]:
from mmaction.apis import inference_recognizer, init_recognizer
from mmengine import Config
from mmengine.runner import set_random_seed

cfg_raw = Config.fromfile('./configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32_5x1x3-110e_kinetics400-flow.py')

dataset_type = 'RawframeDataset'

# Modify dataset type and path
cfg_raw.data_root = '../group_project_data/output_frames'
cfg_raw.data_root_val = '../group_project_data/output_frames'
cfg_raw.ann_file_train = './Annotations/train.txt'
cfg_raw.ann_file_val = './Annotations/val.txt'
cfg_raw.ann_file_test = './Annotations/test.txt'

# Modify num classes of the model in cls_head
cfg_raw.model.cls_head.num_classes = 2
cfg_raw.clip_len = 30
#modify the clip_len and num_clip

# We can use the pre-trained TSN model
cfg_raw.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'

# Set up working dir to save files and logs.
cfg_raw.work_dir = './tutorial_exps'

# modify the data_preprocessor
cfg_raw.model.backbone.in_channels = 3
cfg_raw.model.data_preprocessor.mean = [0.485, 0.456, 0.406]
cfg_raw.model.data_preprocessor.std = [0.229, 0.224, 0.225]

#rewrite the pipline
train_pipeline = [
    dict(
        type='SampleFrames', clip_len=cfg_raw.clip_len, frame_interval=1, num_clips=1),
    dict(type='RawFrameDecode', **cfg_raw.file_client_args),
    dict(type='Resize', scale=(-1, 64)),
    dict(type='RandomResizedCrop'),
    dict(type='Resize', scale=(64, 64), keep_ratio=False),
    dict(type='Flip', flip_ratio=0.5),
    dict(type='FormatShape', input_format='NCHW'),
    dict(type='PackActionInputs')
]
val_pipeline = [
    dict(
        type='SampleFrames',
        clip_len=cfg_raw.clip_len,
        frame_interval=1,
        test_mode=True),
    dict(type='RawFrameDecode', **cfg_raw.file_client_args),
    dict(type='Resize', scale=(-1, 64)),
    dict(type='CenterCrop', crop_size=64),
    dict(type='FormatShape', input_format='NCHW'),
    dict(type='PackActionInputs')
]
test_pipeline = [
    dict(
        type='SampleFrames',
        clip_len=cfg_raw.clip_len,
        frame_interval=1,
        test_mode=True),
    dict(type='RawFrameDecode'),
    dict(type='Resize', scale=(-1, 64)),
    dict(type='TenCrop', crop_size=64),
    dict(type='FormatShape', input_format='NCHW'),
    dict(type='PackActionInputs')
]
cfg_raw.train_pipeline = train_pipeline
cfg_raw.val_pipeline = val_pipeline
cfg_raw.test_pipeline = test_pipeline

cfg_raw.test_evaluator = [dict(type='MySelfCustomMetric')]

frame_modality = 'RGB'
filename_template = 'img_{:05d}.jpg'

cfg_raw.test_dataloader.dataset.ann_file = cfg_raw.ann_file_test
cfg_raw.test_dataloader.dataset.data_prefix.img = cfg_raw.data_root
cfg_raw.test_dataloader.dataset.modality = frame_modality
cfg_raw.test_dataloader.dataset.filename_tmpl = filename_template
cfg_raw.test_dataloader.dataset.pipeline=test_pipeline

cfg_raw.train_dataloader.dataset.ann_file = cfg_raw.ann_file_train
cfg_raw.train_dataloader.dataset.data_prefix.img = cfg_raw.data_root_val
cfg_raw.train_dataloader.dataset.modality = frame_modality
cfg_raw.train_dataloader.dataset.filename_tmpl = filename_template
cfg_raw.train_dataloader.dataset.pipeline=train_pipeline

cfg_raw.val_dataloader.dataset.ann_file = cfg_raw.ann_file_val
cfg_raw.val_dataloader.dataset.data_prefix.img  = cfg_raw.data_root_val
cfg_raw.val_dataloader.dataset.modality = frame_modality
cfg_raw.val_dataloader.dataset.filename_tmpl = filename_template
cfg_raw.val_dataloader.dataset.pipeline=val_pipeline

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg_raw.train_dataloader.batch_size = cfg_raw.train_dataloader.batch_size // 2
cfg_raw.val_dataloader.batch_size = cfg_raw.val_dataloader.batch_size // 2
cfg_raw.optim_wrapper.optimizer.lr = cfg_raw.optim_wrapper.optimizer.lr / 8 / 4
cfg_raw.train_cfg.max_epochs = 10

cfg_raw.train_dataloader.num_workers = 2
cfg_raw.val_dataloader.num_workers = 2
cfg_raw.test_dataloader.num_workers = 2

# We can initialize the logger for training and have a look
# at the final config used for training
# print(f'Config:\n{cfg_raw.pretty_text}')


# Experiments

## Spilt the DataSet into Train, Validation and Test

In [None]:
from pickle import NONE
import shutil

def split_data_with_oversampling(videos_dict, do_oversampling=False):
  print(videos_dict)
  type_0_keys = [key for key, value in videos_dict.items() if value['video_type'] == 0]
  type_1_keys = [key for key, value in videos_dict.items() if value['video_type'] == 1]

  random.shuffle(type_0_keys)
  type_1_keys.extend(type_0_keys[2:])
  random.shuffle(type_1_keys)

  split_idx_1 = int(0.8 * len(type_1_keys))
  split_idx_2 = int(0.9 * len(type_1_keys))

  # at least each kind of dataset should have at least 1 type 0 videos
  train_keys = type_1_keys[:split_idx_1]
  train_keys.append(type_0_keys[0])
  valid_keys = type_1_keys[split_idx_1:split_idx_2]
  train_keys.append(type_0_keys[1])
  test_keys = type_1_keys[split_idx_2:]
  test_keys.append(type_0_keys[2])

  train_folders = [folder for key in train_keys for folder in videos_dict[key]['frame_folders']]
  valid_folders = [folder for key in valid_keys for folder in videos_dict[key]['frame_folders']]
  test_folders = [folder for key in test_keys for folder in videos_dict[key]['frame_folders']]

  if do_oversampling:
    type_0_frame_count = sum(len(videos_dict[key]['frame_folders']) for key in type_0_keys)
    type_1_frame_count = sum(len(videos_dict[key]['frame_folders']) for key in type_1_keys[:split_idx_1])

    if type_0_frame_count < type_1_frame_count:
      oversampling_count = type_1_frame_count - type_0_frame_count
      oversampled_folders = random.choices([folder for key in type_0_keys for folder in videos_dict[key]['frame_folders']], k=oversampling_count)
      train_folders.extend(oversampled_folders)

  random.shuffle(train_folders)
  return train_folders, valid_folders, test_folders

# 使用函数进行数据划分，选择是否进行 oversampling
import yaml
import os

def spilt_dataset_and_create_annotations(yaml_path, annotation_path, do_oversampling):
  video_dict = {}
  with open(yaml_path, 'r') as file:
    video_dict = yaml.safe_load(file)
  train_folders, valid_folders, test_folders = split_data_with_oversampling(video_dict, do_oversampling=True)

  # 创建 Annotations 文件夹（如果不存在）
  if os.path.exists(annotation_path):
    shutil.rmtree(annotation_path)

  annotations_dir = annotation_path
  os.makedirs(annotations_dir, exist_ok=True)

  # create annotations
  def write_annotations(filename, video_dict, frame_folders):
    with open(os.path.join(annotations_dir, filename), 'w') as file:
        for folder in frame_folders:
          parts = folder.split('_')
          video = '_'.join(parts[:-2])
          video_type = video_dict[video]['video_type']
          file.write(f"{folder} 30 {video_type}\n")

  write_annotations('train.txt', video_dict, train_folders)
  write_annotations('val.txt', video_dict, valid_folders)
  write_annotations('test.txt', video_dict, test_folders)




## Create & Set seed


In [None]:
# seeds
import random
random_seeds = [10, 42, 50, 66, 70]

result_dict = {'OverSampling+Cropped' : None,
               'Cropped': None,
               'Oversampling': None,
               'Baseline': None}


#set seeds
def set_seeds(seeds):
  torch.manual_seed(seeds)

  torch.cuda.manual_seed(seeds)
  torch.cuda.manual_seed_all(seeds)

  random.seed(seeds)
  np.random.seed(seeds)

  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False



## With Oversampling

In [None]:
import os.path as osp
import mmengine
from mmengine.runner import Runner
from mmengine.logging import print_log
import matplotlib.pyplot as plt

result_list = []

# split the datasets
for i in range(len(random_seeds)):
  # set seeds
  set_seeds(random_seeds[i])

  yaml_path = '../group_project_data/sample.yaml'
  annotation_path = './Annotations'
  spilt_dataset_and_create_annotations(yaml_path, annotation_path, do_oversampling=True)
  experiment_annotation_path = f'./experiment-annotations/Cropped_Oversampled/experiment_{i}'
  # save current annotations
  if osp.exists(experiment_annotation_path):
    shutil.rmtree(experiment_annotation_path)
  shutil.copytree(annotation_path, experiment_annotation_path)

  mmengine.mkdir_or_exist(osp.abspath(cfg.work_dir))
  # build the runner
  runner = Runner.from_cfg(cfg)
  # start training
  runner.train()
  result_list.append(runner.test())

result_dict['Oversampling+Cropped'] = result_list

{'1': {'frame_folders': ['1_1_0', '1_1_1', '1_1_2', '1_1_3', '1_2_0', '1_2_1', '1_3_0', '1_3_1', '1_3_2', '1_4_0', '1_4_1', '1_4_2', '1_4_3', '1_5_0', '1_5_1', '1_5_2', '1_5_3', '1_5_4', '1_6_0', '1_7_0', '1_7_1', '1_7_2'], 'video_type': 1}, '10': {'frame_folders': ['10_34_0', '10_34_1', '10_34_2', '10_34_3', '10_34_4', '10_35_0', '10_35_1', '10_35_2', '10_36_0', '10_36_1', '10_37_0', '10_37_1', '10_38_0', '10_38_1', '10_38_2', '10_39_0', '10_39_1', '10_39_2', '10_39_3', '10_39_4', '10_40_0', '10_40_1', '10_41_0', '10_41_1'], 'video_type': 1}, '11': {'frame_folders': ['11_42_0'], 'video_type': 1}, '12': {'frame_folders': ['12_43_0', '12_44_0', '12_44_1', '12_44_2', '12_44_3', '12_45_0', '12_45_1', '12_45_2', '12_45_3'], 'video_type': 0}, '13': {'frame_folders': ['13_46_0', '13_49_0', '13_49_1', '13_49_2', '13_49_3', '13_49_4', '13_50_0'], 'video_type': 1}, '14': {'frame_folders': ['14_51_0', '14_52_0', '14_52_1', '14_52_2', '14_52_3', '14_53_0', '14_53_1', '14_53_2', '14_53_3', '14_53_

KeyboardInterrupt: 

## Without oversampling

In [None]:
import os.path as osp
import mmengine
from mmengine.runner import Runner
import matplotlib.pyplot as plt

result_list = []
# split the datasets
for i in range(len(random_seeds)):
  set_seeds(random_seeds[i])
  yaml_path = '../group_project_data/sample.yaml'
  annotation_path = './Annotations'
  spilt_dataset_and_create_annotations(yaml_path, annotation_path, do_oversampling=False)

  # Create work_dir
  mmengine.mkdir_or_exist(osp.abspath(cfg.work_dir))

  # build the runner
  runner = Runner.from_cfg(cfg)

  # start training
  runner.train()

  result_list.append(runner.test())

result_dict['Cropped'] = result_list

{'1': {'frame_folders': ['1_1_0', '1_1_1', '1_1_2', '1_1_3', '1_2_0', '1_2_1', '1_3_0', '1_3_1', '1_3_2', '1_4_0', '1_4_1', '1_4_2', '1_4_3', '1_5_0', '1_5_1', '1_5_2', '1_5_3', '1_5_4', '1_6_0', '1_7_0', '1_7_1', '1_7_2'], 'video_type': 1}, '10': {'frame_folders': ['10_34_0', '10_34_1', '10_34_2', '10_34_3', '10_34_4', '10_35_0', '10_35_1', '10_35_2', '10_36_0', '10_36_1', '10_37_0', '10_37_1', '10_38_0', '10_38_1', '10_38_2', '10_39_0', '10_39_1', '10_39_2', '10_39_3', '10_39_4', '10_40_0', '10_40_1', '10_41_0', '10_41_1'], 'video_type': 1}, '11': {'frame_folders': ['11_42_0'], 'video_type': 1}, '12': {'frame_folders': ['12_43_0', '12_44_0', '12_44_1', '12_44_2', '12_44_3', '12_45_0', '12_45_1', '12_45_2', '12_45_3'], 'video_type': 0}, '13': {'frame_folders': ['13_46_0', '13_49_0', '13_49_1', '13_49_2', '13_49_3', '13_49_4', '13_50_0'], 'video_type': 1}, '14': {'frame_folders': ['14_51_0', '14_52_0', '14_52_1', '14_52_2', '14_52_3', '14_53_0', '14_53_1', '14_53_2', '14_53_3', '14_53_

## Using Initial Frames + Over Sampling

In [None]:
import os.path as osp
import mmengine
from mmengine.runner import Runner
import matplotlib.pyplot as plt

result_list = []
# split the datasets
for i in range(len(random_seeds)):
  set_seeds(random_seeds[i])
  yaml_path = '../group_project_data/sample.yaml'
  annotation_path = './Annotations'
  spilt_dataset_and_create_annotations(yaml_path, annotation_path, do_oversampling=True)

  # Create work_dir
  mmengine.mkdir_or_exist(osp.abspath(cfg_raw.work_dir))

  # build the runner
  runner = Runner.from_cfg(cfg_raw)

  # start training
  runner.train()

  result_list.append(runner.test())

result_dict['Oversampling'] = result_list

{'1': {'frame_folders': ['1_1_0', '1_1_1', '1_1_2', '1_1_3', '1_2_0', '1_2_1', '1_3_0', '1_3_1', '1_3_2', '1_4_0', '1_4_1', '1_4_2', '1_4_3', '1_5_0', '1_5_1', '1_5_2', '1_5_3', '1_5_4', '1_6_0', '1_7_0', '1_7_1', '1_7_2'], 'video_type': 1}, '10': {'frame_folders': ['10_34_0', '10_34_1', '10_34_2', '10_34_3', '10_34_4', '10_35_0', '10_35_1', '10_35_2', '10_36_0', '10_36_1', '10_37_0', '10_37_1', '10_38_0', '10_38_1', '10_38_2', '10_39_0', '10_39_1', '10_39_2', '10_39_3', '10_39_4', '10_40_0', '10_40_1', '10_41_0', '10_41_1'], 'video_type': 1}, '11': {'frame_folders': ['11_42_0'], 'video_type': 1}, '12': {'frame_folders': ['12_43_0', '12_44_0', '12_44_1', '12_44_2', '12_44_3', '12_45_0', '12_45_1', '12_45_2', '12_45_3'], 'video_type': 0}, '13': {'frame_folders': ['13_46_0', '13_49_0', '13_49_1', '13_49_2', '13_49_3', '13_49_4', '13_50_0'], 'video_type': 1}, '14': {'frame_folders': ['14_51_0', '14_52_0', '14_52_1', '14_52_2', '14_52_3', '14_53_0', '14_53_1', '14_53_2', '14_53_3', '14_53_

## Raw Frames without Oversampling

In [None]:
import os.path as osp
import mmengine
from mmengine.runner import Runner
import matplotlib.pyplot as plt

result_list = []
# split the datasets
for i in range(len(random_seeds)):
  set_seeds(random_seeds[i])
  yaml_path = '../group_project_data/sample.yaml'
  annotation_path = './Annotations'
  spilt_dataset_and_create_annotations(yaml_path, annotation_path, do_oversampling=False)

  # Create work_dir
  mmengine.mkdir_or_exist(osp.abspath(cfg_raw.work_dir))

  # build the runner
  runner = Runner.from_cfg(cfg_raw)

  # start training
  runner.train()

  result_list.append(runner.test())

result_dict['Baseline'] = result_list

{'1': {'frame_folders': ['1_1_0', '1_1_1', '1_1_2', '1_1_3', '1_2_0', '1_2_1', '1_3_0', '1_3_1', '1_3_2', '1_4_0', '1_4_1', '1_4_2', '1_4_3', '1_5_0', '1_5_1', '1_5_2', '1_5_3', '1_5_4', '1_6_0', '1_7_0', '1_7_1', '1_7_2'], 'video_type': 1}, '10': {'frame_folders': ['10_34_0', '10_34_1', '10_34_2', '10_34_3', '10_34_4', '10_35_0', '10_35_1', '10_35_2', '10_36_0', '10_36_1', '10_37_0', '10_37_1', '10_38_0', '10_38_1', '10_38_2', '10_39_0', '10_39_1', '10_39_2', '10_39_3', '10_39_4', '10_40_0', '10_40_1', '10_41_0', '10_41_1'], 'video_type': 1}, '11': {'frame_folders': ['11_42_0'], 'video_type': 1}, '12': {'frame_folders': ['12_43_0', '12_44_0', '12_44_1', '12_44_2', '12_44_3', '12_45_0', '12_45_1', '12_45_2', '12_45_3'], 'video_type': 0}, '13': {'frame_folders': ['13_46_0', '13_49_0', '13_49_1', '13_49_2', '13_49_3', '13_49_4', '13_50_0'], 'video_type': 1}, '14': {'frame_folders': ['14_51_0', '14_52_0', '14_52_1', '14_52_2', '14_52_3', '14_53_0', '14_53_1', '14_53_2', '14_53_3', '14_53_

# Data Analysis

## Model Complexity
- FLOPs
- Parameters

In [None]:
import yaml
from mmengine.analysis import get_model_complexity_info

with open('output.yaml', 'w') as file:
    yaml.dump(result_dict, file, default_flow_style=False)

runner = Runner.from_cfg(cfg)
input_shape = (1,3,64,64)
analysis_results = get_model_complexity_info(runner.model, input_shape)


01/10 15:39:30 - mmengine - INFO - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]
    CUDA available: True
    numpy_random_seed: 1135159399
    GPU 0: Tesla T4
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 12.2, V12.2.140
    GCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
    PyTorch: 2.1.0+cu121
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_

In [None]:
print("Model Flops:{}".format(analysis_results['flops_str']))
print("Model Parameters:{}".format(analysis_results['params_str']))

Model Flops:0.338G
Model Parameters:23.512M


## Model Performance

In [None]:
result_dict



{'OverSampling+Cropped': None,
 'Cropped': [{'SelfCustomMetric/accuracy': 0.981651376146789,
   'SelfCustomMetric/f1_score': 0.9743589743589743,
   'SelfCustomMetric/recall': 0.95,
   'SelfCustomMetric/auc': 1.0,
   'SelfCustomMetric/roc_curve': (array([0., 0., 0., 1.]),
    array([0.   , 0.025, 1.   , 1.   ]),
    array([1.9920324e+00, 9.9203241e-01, 4.4566023e-01, 8.2085689e-04],
          dtype=float32))},
  {'SelfCustomMetric/accuracy': 1.0,
   'SelfCustomMetric/f1_score': 1.0,
   'SelfCustomMetric/recall': 1.0,
   'SelfCustomMetric/auc': 1.0,
   'SelfCustomMetric/roc_curve': (array([0., 0., 0., 1.]),
    array([0.        , 0.02564103, 1.        , 1.        ]),
    array([1.9787033 , 0.9787033 , 0.7749559 , 0.00270668], dtype=float32))},
  {'SelfCustomMetric/accuracy': 0.9928571428571429,
   'SelfCustomMetric/f1_score': 0.9929078014184397,
   'SelfCustomMetric/recall': 0.9859154929577465,
   'SelfCustomMetric/auc': 1.0,
   'SelfCustomMetric/roc_curve': (array([0., 0., 0., 1.]),
   

In [None]:
import pandas as pd
# Calculate the average for each metric
results = {}

for experiment, metrics in result_dict.items():
    df = pd.DataFrame(metrics)
    averages = df.mean()
    variances = df.var()
    results[experiment] = pd.concat([averages, variances], keys=['Average', 'Variance'])

# Create a dataframe to display the results in tabular form
results_df = pd.DataFrame(results)
results_df.transpose()

  averages = df.mean()
  variances = df.var()
  averages = df.mean()
  variances = df.var()
  averages = df.mean()
  variances = df.var()
  averages = df.mean()
  variances = df.var()


Unnamed: 0_level_0,Average,Average,Average,Average,Variance,Variance,Variance,Variance
Unnamed: 0_level_1,SelfCustomMetric/accuracy,SelfCustomMetric/f1_score,SelfCustomMetric/recall,SelfCustomMetric/auc,SelfCustomMetric/accuracy,SelfCustomMetric/f1_score,SelfCustomMetric/recall,SelfCustomMetric/auc
OverSampling+Cropped,,,,,,,,
Cropped,0.984486,0.975453,0.952751,1.0,0.000324,0.000449,0.001631,0.0
Oversampling,0.980884,0.974859,0.971429,1.0,0.000893,0.001284,0.004082,0.0
Baseline,0.980884,0.974859,0.971429,1.0,0.000893,0.001284,0.004082,0.0
Oversampling+Cropped,0.984486,0.975453,0.952751,1.0,0.000324,0.000449,0.001631,0.0
