### Data preparations

In [None]:
from pytorchvideo.data import LabeledVideoDataset, make_clip_sampler, labeled_video_dataset
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    UniformTemporalSubsample,
    Permute
)
from torchvision.transforms import (
    Compose,
    Lambda,
    RandomCrop,
    RandomHorizontalFlip,
    Resize
)
from torchvision.transforms._transforms_video import (
    CenterCropVideo, 
    NormalizeVideo
)

In [None]:
video_transforms = Compose([
    ApplyTransformToKey(key='video',
    transform=Compose([
        UniformTemporalSubsample(20),
        Normalize((0.45, 0.45, 0.45),(0.225, 0.225, 0.225)),
        RandomShortSideScale(min_size=248, max_size=256),
        CenterCropVideo(224),
        RandomHorizontalFlip(p=0.5),
    ]),
    ),
])

In [None]:
import pandas as pd
import numpy as np
import os
import shutil

In [None]:
from torch.utils.data import DataLoader

In [None]:
dataset_path = "D:\Projects\ML\Video classification\youtube_action_naudio"

In [None]:
os.chdir("youtube_action_naudio")
folders = os.listdir()
files_list = {
    "file": [],
    "label": []
}

In [None]:
for i, folder in enumerate(folders):
    os.chdir(folder)
    file_names = os.listdir()
    files_list["file"] += file_names
    files_list["label"] += [i for _ in range(len(file_names))]
    os.chdir("..")
os.chdir("..")


In [None]:
df_for_split = pd.DataFrame(files_list)
df_for_split

In [None]:
df_for_split = df_for_split.sample(frac=1)
ratio = 0.8
train_size = int(df_for_split.shape[0] * ratio)
train_data = df_for_split[0:train_size]
test_data = df_for_split[train_size:]
train_data, test_data

In [None]:
os.chdir("youtube_action_naudio")

if not os.path.isdir("train"):
    os.mkdir("train")
    os.chdir("train")
    for folder in folders:
        os.mkdir(folder)
    os.chdir("..")
if not os.path.isdir("test"):
    os.mkdir("test")
    os.chdir("test")
    for folder in folders:
        os.mkdir(folder)
    os.chdir("..")

inner_folder = ""
for filename in train_data.file:
    if "shooting" in filename:
        inner_folder = "basketball"
    elif "biking" in filename:
        inner_folder = "biking"
    elif "diving" in filename:
        inner_folder = "diving"
    elif "golf" in filename:
        inner_folder = "golf_swing"
    elif "riding" in filename:
        inner_folder = "horse_riding"
    elif "juggle" in filename:
        inner_folder = "soccer_juggling"
    elif "swing" in filename:
        inner_folder = "swing"
    elif "tennis" in filename:
        inner_folder = "tennis_swing"
    elif "jumping" in filename:
        inner_folder = "trampoline_jumping"
    elif "spiking" in filename:
        inner_folder = "volleyball_spiking"
    elif "walk_dog" in filename:
        inner_folder = "walking"
    os.chdir(inner_folder)
    shutil.copyfile(filename, f"../train/{inner_folder}/{filename}")
    os.chdir("..")

for filename in test_data.file:
    if "shooting" in filename:
        inner_folder = "basketball"
    elif "biking" in filename:
        inner_folder = "biking"
    elif "diving" in filename:
        inner_folder = "diving"
    elif "golf" in filename:
        inner_folder = "golf_swing"
    elif "riding" in filename:
        inner_folder = "horse_riding"
    elif "juggle" in filename:
        inner_folder = "soccer_juggling"
    elif "swing" in filename:
        inner_folder = "swing"
    elif "tennis" in filename:
        inner_folder = "tennis_swing"
    elif "jumping" in filename:
        inner_folder = "trampoline_jumping"
    elif "spiking" in filename:
        inner_folder = "volleyball_spiking"
    elif "walk_dog" in filename:
        inner_folder = "walking"
    os.chdir(inner_folder)
    shutil.copyfile(filename, f"../test/{inner_folder}/{filename}")
    os.chdir("..")



os.chdir("..")

In [None]:
train_dataset = labeled_video_dataset(f"{dataset_path}/train/", 
                                      clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transforms, decode_audio=False)


In [None]:
train_dataset.num_videos

### Model

In [None]:
import torch
import torch.nn as nn
from pytorch_lightning import LightningModule, seed_everything, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import classification_report
import torchmetrics

In [None]:
class TestModel(LightningModule):
    def __init__(self):
        super(TestModel, self).__init__()
        # model architecture
        self.video_model = torch.hub.load("facebookresearch/pytorchvideo", "efficient_x3d_xs", pretrained=True)
        self.relu = nn.ReLU()
        self.linear = nn.Linear(400, 11)

        self.lr = 1e-3
        self.batch_size = 8
        self.numworkers = 0
        # evaluation metric
        self.metric = torchmetrics.Accuracy(task='multiclass', num_classes=11)
        # loss function
        self.criterion = nn.CrossEntropyLoss()
    
    def forward(self, x):
        x = self.video_model(x)
        x = self.relu(x)
        x = self.linear(x)
        return x

    def configure_optimizers(self):
        opt = torch.optim.AdamW(params=self.parameters(), lr=self.lr)
        scheduler = CosineAnnealingLR(opt, T_max=10, eta_min=1e-6, last_epoch=-1)
        return {'optimizer': opt, 'lr_scheduler': scheduler}

    def train_dataloader(self):
        dataset = labeled_video_dataset(f"{dataset_path}/train/", 
                                      clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transforms, decode_audio=False)
        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworkers, pin_memory=True)
        return loader
    
    def training_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self.forward(video)
        loss = self.criterion(out, label)
        metric = self.metric(out, label.to(torch.int64))
        return {'loss': loss, 'metric': metric}
    
    def on_train_epoch_end(self, outputs):
        loss = torch.stack([x['loss'] for x in outputs]).mean().numpy().round(2)
        metric = torch.stack([x['metric'] for x in outputs]).mean().numpy().round(2)
        self.log('train_loss', loss)
        self.log('train_metric', metric)
    
    def val_dataloader(self):
        dataset = labeled_video_dataset(f"{dataset_path}/test/", 
                                      clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transforms, decode_audio=False)
        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworkers, pin_memory=True)
        return loader
    
    def validation_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self.forward(video)
        loss = self.criterion(out, label)
        metric = self.metric(out, label.to(torch.int64))
        return {'loss': loss, 'metric': metric}
    
    def on_validation_epoch_end(self, outputs):
        loss = torch.stack([x['loss'] for x in outputs]).mean().numpy().round(2)
        metric = torch.stack([x['metric'] for x in outputs]).mean().numpy().round(2)
        self.log('val_loss', loss)
        self.log('val_metric', metric)

    def test_dataloader(self):
        dataset = labeled_video_dataset(f"{dataset_path}/test/", 
                                      clip_sampler=make_clip_sampler('random', 2),
                                      transform=video_transforms, decode_audio=False)
        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworkers, pin_memory=True)
        return loader
    
    def test_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self.forward(video)
        return {'label': label, 'pred': out}
    
    def on_test_epoch_end(self, outputs):
        label = torch.cat([x['label'] for x in outputs]).numpy()
        pred = torch.cat([x['pred'] for x in outputs]).numpy()
        print(classification_report(label, pred))

In [None]:
checkpoint_callback = ModelCheckpoint(monitor="val_loss", dirpath="checkpoints", filename="file", save_last=True)
lr_monitor = LearningRateMonitor(logging_interval="epoch")

In [None]:
model = TestModel()
seed_everything(0)
trainer = Trainer(max_epochs=1,
                  precision=16,
                  accumulate_grad_batches=2,
                  enable_progress_bar=True,
                  num_sanity_val_steps=0,
                  callbacks=[lr_monitor, checkpoint_callback])

In [None]:
trainer.fit(model)