<a href="https://colab.research.google.com/github/itdusty/blood_cells_classification/blob/main/video_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Data preparations

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!pip install lightning
!pip install pytorchvideo

Collecting lightning
  Downloading lightning-2.0.8-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
Collecting arrow<3.0,>=1.2.0 (from lightning)
  Downloading arrow-1.2.3-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting backoff<4.0,>=2.2.1 (from lightning)
  Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Collecting croniter<1.5.0,>=1.3.0 (from lightning)
  Downloading croniter-1.4.1-py2.py3-none-any.whl (19 kB)
Collecting dateutils<2.0 (from lightning)
  Downloading dateutils-0.6.12-py2.py3-none-any.whl (5.7 kB)
Collecting deepdiff<8.0,>=5.7.0 (from lightning)
  Downloading deepdiff-6.5.0-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.3/71.3 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi<2.0,>=0.92.0 (from 

In [3]:
from pytorchvideo.data import LabeledVideoDataset, make_clip_sampler, labeled_video_dataset
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    UniformTemporalSubsample,
    Permute
)
from torchvision.transforms import (
    Compose,
    Lambda,
    RandomCrop,
    RandomHorizontalFlip,
    Resize
)
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo
)



In [4]:
video_transforms = Compose([
    ApplyTransformToKey(key='video',
    transform=Compose([
        UniformTemporalSubsample(20),
        Normalize((0.45, 0.45, 0.45),(0.225, 0.225, 0.225)),
        RandomShortSideScale(min_size=248, max_size=256),
        CenterCropVideo(224),
        RandomHorizontalFlip(p=0.5),
    ]),
    ),
])

In [5]:
import pandas as pd
import numpy as np
import os
import shutil

In [6]:
from torch.utils.data import DataLoader

In [7]:
dataset_path = "/content/drive/MyDrive/Video classification/youtube_action_naudio"

In [8]:
os.chdir(dataset_path)
folders = os.listdir()
files_list = {
    "file": [],
    "label": []
}

In [9]:
for i, folder in enumerate(folders):
    os.chdir(folder)
    file_names = os.listdir()
    files_list["file"] += file_names
    files_list["label"] += [i for _ in range(len(file_names))]
    os.chdir("..")
os.chdir("..")


In [10]:
df_for_split = pd.DataFrame(files_list)
df_for_split

Unnamed: 0,file,label
0,v_jumping_01_01.avi,0
1,v_jumping_01_02.avi,0
2,v_jumping_01_03.avi,0
3,v_jumping_02_01.avi,0
4,v_jumping_01_04.avi,0
...,...,...
1642,v_walk_dog_24_05.avi,10
1643,v_walk_dog_25_01.avi,10
1644,v_walk_dog_25_02.avi,10
1645,v_walk_dog_25_03.avi,10


In [11]:
df_for_split = df_for_split.sample(frac=1)
ratio = 0.8
train_size = int(df_for_split.shape[0] * ratio)
train_data = df_for_split[0:train_size]
test_data = df_for_split[train_size:]
train_data, test_data

(                      file  label
 58     v_jumping_12_05.avi      0
 603     v_riding_05_08.avi      4
 768   v_shooting_02_01.avi      5
 970      v_swing_12_02.avi      6
 164     v_biking_08_05.avi      1
 ...                    ...    ...
 19     v_jumping_05_01.avi      0
 1051     v_swing_03_04.mpg      6
 47     v_jumping_10_06.avi      0
 243     v_biking_22_01.avi      1
 134     v_biking_04_01.avi      1
 
 [1317 rows x 2 columns],
                       file  label
 1097    v_tennis_02_06.avi      7
 716     v_riding_21_06.avi      4
 45     v_jumping_10_05.avi      0
 52     v_jumping_11_05.avi      0
 1640  v_walk_dog_24_04.avi     10
 ...                    ...    ...
 188     v_biking_13_04.avi      1
 714     v_riding_21_05.avi      4
 847   v_shooting_18_03.avi      5
 348     v_juggle_15_03.avi      2
 301     v_juggle_07_06.avi      2
 
 [330 rows x 2 columns])

In [12]:
os.chdir("youtube_action_naudio")

if not os.path.isdir("train"):
    os.mkdir("train")
    os.chdir("train")
    for folder in folders:
        os.mkdir(folder)
    os.chdir("..")
if not os.path.isdir("test"):
    os.mkdir("test")
    os.chdir("test")
    for folder in folders:
        os.mkdir(folder)
    os.chdir("..")

inner_folder = ""
for filename in train_data.file:
    if "shooting" in filename:
        inner_folder = "basketball"
    elif "biking" in filename:
        inner_folder = "biking"
    elif "diving" in filename:
        inner_folder = "diving"
    elif "golf" in filename:
        inner_folder = "golf_swing"
    elif "riding" in filename:
        inner_folder = "horse_riding"
    elif "juggle" in filename:
        inner_folder = "soccer_juggling"
    elif "swing" in filename:
        inner_folder = "swing"
    elif "tennis" in filename:
        inner_folder = "tennis_swing"
    elif "jumping" in filename:
        inner_folder = "trampoline_jumping"
    elif "spiking" in filename:
        inner_folder = "volleyball_spiking"
    elif "walk_dog" in filename:
        inner_folder = "walking"
    os.chdir(inner_folder)
    shutil.copyfile(filename, f"../train/{inner_folder}/{filename}")
    os.chdir("..")

for filename in test_data.file:
    if "shooting" in filename:
        inner_folder = "basketball"
    elif "biking" in filename:
        inner_folder = "biking"
    elif "diving" in filename:
        inner_folder = "diving"
    elif "golf" in filename:
        inner_folder = "golf_swing"
    elif "riding" in filename:
        inner_folder = "horse_riding"
    elif "juggle" in filename:
        inner_folder = "soccer_juggling"
    elif "swing" in filename:
        inner_folder = "swing"
    elif "tennis" in filename:
        inner_folder = "tennis_swing"
    elif "jumping" in filename:
        inner_folder = "trampoline_jumping"
    elif "spiking" in filename:
        inner_folder = "volleyball_spiking"
    elif "walk_dog" in filename:
        inner_folder = "walking"
    os.chdir(inner_folder)
    shutil.copyfile(filename, f"../test/{inner_folder}/{filename}")
    os.chdir("..")



os.chdir("..")

In [None]:
train_dataset = labeled_video_dataset(f"{dataset_path}/train/",
                                      clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transforms, decode_audio=False)


In [None]:
train_dataset.num_videos

1281

### Model

In [13]:
import torch
import torch.nn as nn
from pytorch_lightning import LightningModule, seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import classification_report
import torchmetrics

In [23]:
class TestModel(LightningModule):
    def __init__(self, num_classes = 2):
        super(TestModel, self).__init__()
        # model architecture
        self.video_model = torch.hub.load("facebookresearch/pytorchvideo", "efficient_x3d_xs", pretrained=True)
        self.relu = nn.ReLU()
        self.linear = nn.Linear(400, num_classes)

        self.lr = 1e-3
        self.batch_size = 8
        self.numworkers = 0
        # evaluation metric
        self.metric = torchmetrics.Accuracy(task='multiclass', num_classes=num_classes)
        # loss function
        self.criterion = nn.CrossEntropyLoss()
        # helpers
        self.training_step_outputs = []
        self.validation_step_outputs = []
        self.testing_step_outputs = []

    def forward(self, x):
        x = self.video_model(x)
        x = self.relu(x)
        x = self.linear(x)
        return x

    def configure_optimizers(self):
        opt = torch.optim.AdamW(params=self.parameters(), lr=self.lr)
        scheduler = CosineAnnealingLR(opt, T_max=10, eta_min=1e-6, last_epoch=-1)
        return {'optimizer': opt, 'lr_scheduler': scheduler}

    def train_dataloader(self):
        dataset = labeled_video_dataset(f"{dataset_path}/train/",
                                      clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transforms, decode_audio=False)
        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworkers, pin_memory=True)
        return loader

    def training_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self.forward(video)
        loss = self.criterion(out, label)
        metric = self.metric(out, label.to(torch.int64))
        self.training_step_outputs.append({'loss': loss, 'metric': metric})
        return {'loss': loss, 'metric': metric}

    def on_train_epoch_end(self):
        outputs = self.training_step_outputs
        loss = torch.stack([x['loss'] for x in outputs]).mean().cpu().detach().numpy().round(2)
        metric = torch.stack([x['metric'] for x in outputs]).mean().cpu().detach().numpy().round(2)
        self.log('train_loss', loss)
        self.log('train_metric', metric)

    def val_dataloader(self):
        dataset = labeled_video_dataset(f"{dataset_path}/test/",
                                      clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transforms, decode_audio=False)
        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworkers, pin_memory=True)
        return loader

    def validation_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self.forward(video)
        loss = self.criterion(out, label)
        metric = self.metric(out, label.to(torch.int64))
        self.validation_step_outputs.append({'loss': loss, 'metric': metric})
        return {'loss': loss, 'metric': metric}

    def on_validation_epoch_end(self):
        outputs = self.validation_step_outputs
        loss = torch.stack([x['loss'] for x in outputs]).mean().cpu().detach().numpy().round(2)
        metric = torch.stack([x['metric'] for x in outputs]).mean().cpu().detach().numpy().round(2)
        self.log('val_loss', loss)
        self.log('val_metric', metric)

    def test_dataloader(self):
        dataset = labeled_video_dataset(f"{dataset_path}/test/",
                                      clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transforms, decode_audio=False)
        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworkers, pin_memory=True)
        return loader

    def test_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self.forward(video)
        self.testing_step_outputs.append({'label': label, 'pred': out})
        return {'label': label, 'pred': out}

    def on_test_epoch_end(self):
        outputs = self.testing_step_outputs
        label = torch.cat([x['label'] for x in outputs]).cpu().detach().numpy()
        pred = torch.cat([x['pred'].argmax(dim=1) for x in outputs]).cpu().detach().numpy()
        print(classification_report(label, pred))

In [24]:
checkpoint_callback = ModelCheckpoint(monitor="val_loss", dirpath="checkpoints", filename="file", save_last=True)
lr_monitor = LearningRateMonitor(logging_interval="epoch")

In [25]:
model = TestModel(num_classes=11)
seed_everything(0)
trainer = Trainer(max_epochs=1,
                  precision=16,
                  accumulate_grad_batches=2,
                  enable_progress_bar=True,
                  num_sanity_val_steps=0,
                  callbacks=[lr_monitor, checkpoint_callback])

Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main
INFO:lightning_fabric.utilities.seed:Global seed set to 0
  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [26]:
trainer.fit(model)

  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name        | Type               | Params
---------------------------------------------------
0 | video_model | EfficientX3d       | 3.8 M 
1 | relu        | ReLU               | 0     
2 | linear      | Linear             | 4.4 K 
3 | metric      | MulticlassAccuracy | 0     
4 | criterion   | CrossEntropyLoss   | 0     
---------------------------------------------------
3.8 M     Trainable params
0         Non-trainable params
3.8 M     Total params
15.195    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.


In [27]:
trainer.validate(model)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: 0it [00:00, ?it/s]

[{'val_loss': 0.20000000298023224, 'val_metric': 0.9399999976158142}]

In [28]:
trainer.test(model)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.97      0.94      0.96        34
           1       0.96      0.96      0.96        23
           2       1.00      1.00      1.00        31
           3       0.76      1.00      0.86        25
           4       0.93      0.96      0.95        27
           5       1.00      0.84      0.91        37
           6       0.89      0.96      0.93        26
           7       0.98      1.00      0.99        41
           8       0.95      0.90      0.93        21
           9       1.00      1.00      1.00        28
          10       1.00      0.85      0.92        26

    accuracy                           0.95       319
   macro avg       0.95      0.95      0.94       319
weighted avg       0.95      0.95      0.95       319



[{}]

In [29]:
# Load the TensorBoard notebook extension
%load_ext tensorboard
%tensorboard --logdir lightning_logs

<IPython.core.display.Javascript object>