In [1]:
!pip install numpy
!pip install pandas
!pip install decord
# !pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117
!pip install pytorch-lightning
!pip install pytorchvideo
!pip install scikit-learn
!pip install scikit-multilearn
!pip install segmentation-models-pytorch
!pip install transformers
!pip install einops
!pip install tqdm





In [2]:
#https://github.com/XuezheMax/apollo/blob/master/optim/apollo.py
import numpy as np
import torch
from torch.optim.optimizer import Optimizer


class Apollo(Optimizer):
    r"""Implements Atom algorithm.
        Arguments:
            params (iterable): iterable of parameters to optimize or dicts defining
                parameter groups
            lr (float): learning rate
            beta (float, optional): coefficient used for computing running averages of gradient (default: 0.9)
            eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-4)
            rebound (str, optional): recified bound for diagonal hessian:
                ``'constant'`` | ``'belief'`` (default: None)
            warmup (int, optional): number of warmup steps (default: 500)
            init_lr (float, optional): initial learning rate for warmup (default: lr/1000)
            weight_decay (float, optional): weight decay coefficient (default: 0)
            weight_decay_type (str, optional): type of weight decay:
                ``'L2'`` | ``'decoupled'`` | ``'stable'`` (default: 'L2')
        """

    def __init__(self, params, lr, beta=0.9, eps=1e-4, rebound='constant', warmup=500, init_lr=None, weight_decay=0, weight_decay_type=None):
        if not 0.0 < lr:
            raise ValueError("Invalid learning rate value: {}".format(lr))
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if not 0.0 <= beta < 1.0:
            raise ValueError("Invalid beta parameter at index 0: {}".format(beta))
        if rebound not in ['constant', 'belief']:
            raise ValueError("Invalid recitifed bound: {}".format(rebound))
        if not 0.0 <= warmup:
            raise ValueError("Invalid warmup updates: {}".format(warmup))
        if init_lr is None:
            init_lr = lr / 1000
        if not 0.0 <= init_lr <= lr:
            raise ValueError("Invalid initial learning rate: {}".format(init_lr))
        if not 0.0 <= weight_decay:
            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
        if weight_decay_type is None:
            weight_decay_type = 'L2' if rebound == 'constant' else 'decoupled'
        if weight_decay_type not in ['L2', 'decoupled', 'stable']:
            raise ValueError("Invalid weight decay type: {}".format(weight_decay_type))

        defaults = dict(lr=lr, beta=beta, eps=eps, rebound=rebound,
                        warmup=warmup, init_lr=init_lr, base_lr=lr,
                        weight_decay=weight_decay, weight_decay_type=weight_decay_type)
        super(Apollo, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(Apollo, self).__setstate__(state)

    @torch.no_grad()
    def step(self, closure=None):
        """Performs a single optimization step.
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    # Exponential moving average of gradient values
                    state['exp_avg_grad'] = torch.zeros_like(p, memory_format=torch.preserve_format)
                    # Exponential moving average of squared gradient values
                    state['approx_hessian'] = torch.zeros_like(p, memory_format=torch.preserve_format)
                    # Previous update direction
                    state['update'] = torch.zeros_like(p, memory_format=torch.preserve_format)

                # Calculate current lr
                if state['step'] < group['warmup']:
                    curr_lr = (group['base_lr'] - group['init_lr']) * state['step'] / group['warmup'] + group['init_lr']
                else:
                    curr_lr = group['lr']

                # Perform optimization step
                grad = p.grad
                if grad.is_sparse:
                    raise RuntimeError('Atom does not support sparse gradients.')

                # Perform step weight decay
                if group['weight_decay'] != 0 and group['weight_decay_type'] == 'L2':
                    grad = grad.add(p, alpha=group['weight_decay'])

                beta = group['beta']
                eps = group['eps']
                exp_avg_grad = state['exp_avg_grad']
                B = state['approx_hessian']
                d_p = state['update']

                state['step'] += 1
                bias_correction = 1 - beta ** state['step']
                alpha = (1 - beta) / bias_correction

                # calc the diff grad
                delta_grad = grad - exp_avg_grad
                if group['rebound'] == 'belief':
                    rebound = delta_grad.norm(p=np.inf)
                else:
                    rebound = 0.01
                    eps = eps / rebound

                # Update the running average grad
                exp_avg_grad.add_(delta_grad, alpha=alpha)

                denom = d_p.norm(p=4).add(eps)
                d_p.div_(denom)
                v_sq = d_p.mul(d_p)
                delta = delta_grad.div_(denom).mul_(d_p).sum().mul(-alpha) - B.mul(v_sq).sum()

                # Update B
                B.addcmul_(v_sq, delta)

                # calc direction of parameter updates
                if group['rebound'] == 'belief':
                    denom = torch.max(B.abs(), rebound).add_(eps / alpha)
                else:
                    denom = B.abs().clamp_(min=rebound)

                d_p.copy_(exp_avg_grad.div(denom))

                # Perform step weight decay
                if group['weight_decay'] != 0 and group['weight_decay_type'] != 'L2':
                    if group['weight_decay_type'] == 'stable':
                        weight_decay = group['weight_decay'] / denom.mean().item()
                    else:
                        weight_decay = group['weight_decay']
                    d_p.add_(p, alpha=weight_decay)

                p.add_(d_p, alpha=-curr_lr)

        return 

In [3]:
#https://github.com/issamemari/pytorch-multilabel-balanced-sampler/blob/master/sampler.py
import random
import numpy as np

from torch.utils.data.sampler import Sampler


class MultilabelBalancedRandomSampler(Sampler):
    """
    MultilabelBalancedRandomSampler: Given a multilabel dataset of length n_samples and
    number of classes n_classes, samples from the data with equal probability per class
    effectively oversampling minority classes and undersampling majority classes at the
    same time. Note that using this sampler does not guarantee that the distribution of
    classes in the output samples will be uniform, since the dataset is multilabel and
    sampling is based on a single class. This does however guarantee that all classes
    will have at least batch_size / n_classes samples as batch_size approaches infinity
    """

    def __init__(self, labels, indices=None, class_choice="least_sampled"):
        """
        Parameters:
        -----------
            labels: a multi-hot encoding numpy array of shape (n_samples, n_classes)
            indices: an arbitrary-length 1-dimensional numpy array representing a list
            of indices to sample only from
            class_choice: a string indicating how class will be selected for every
            sample:
                "least_sampled": class with the least number of sampled labels so far
                "random": class is chosen uniformly at random
                "cycle": the sampler cycles through the classes sequentially
        """
        self.labels = labels
        self.indices = indices
        if self.indices is None:
            self.indices = range(len(labels))

        self.num_classes = self.labels.shape[1]

        # List of lists of example indices per class
        self.class_indices = []
        for class_ in range(self.num_classes):
            lst = np.where(self.labels[:, class_] == 1)[0]
            lst = lst[np.isin(lst, self.indices)]
            self.class_indices.append(lst)

        self.counts = [0] * self.num_classes

        assert class_choice in ["least_sampled", "random", "cycle"]
        self.class_choice = class_choice
        self.current_class = 0

    def __iter__(self):
        self.count = 0
        return self

    def __next__(self):
        if self.count >= len(self.indices):
            raise StopIteration
        self.count += 1
        return self.sample()

    def sample(self):
        class_ = self.get_class()
        class_indices = self.class_indices[class_]
        chosen_index = np.random.choice(class_indices)
        if self.class_choice == "least_sampled":
            for class_, indicator in enumerate(self.labels[chosen_index]):
                if indicator == 1:
                    self.counts[class_] += 1
        return chosen_index

    def get_class(self):
        if self.class_choice == "random":
            class_ = random.randint(0, self.labels.shape[1] - 1)
        elif self.class_choice == "cycle":
            class_ = self.current_class
            self.current_class = (self.current_class + 1) % self.labels.shape[1]
        elif self.class_choice == "least_sampled":
            min_count = self.counts[0]
            min_classes = [0]
            for class_ in range(1, self.num_classes):
                if self.counts[class_] < min_count:
                    min_count = self.counts[class_]
                    min_classes = [class_]
                if self.counts[class_] == min_count:
                    min_classes.append(class_)
            class_ = np.random.choice(min_classes)
        return class_

    def __len__(self):
        return len(self.indices)

In [4]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl

from einops import rearrange
from decord import VideoReader
from sklearn.metrics import f1_score
from torch.utils.data import Dataset, DataLoader
from segmentation_models_pytorch.losses import FocalLoss
from transformers import AutoModel, AutoImageProcessor, AutoConfig
from skmultilearn.model_selection import iterative_train_test_split
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorchvideo.transforms.transforms_factory import create_video_transform

In [5]:
config = {
    "seed":2023,
    "model_name":"facebook/timesformer-base-finetuned-k400",#MCG-NJU/videomae-base
    "batch_size":2,
    "learning_rate":1e-5,
    "data_dir":'./data',
    "checkpoint_dir":'./checkpoint',
    "submission_dir":'./submission',
    "n_classes":(2,3,4,3),
    "label_dict":{
        -1:[-1,-1,-1,-1],
        0:[0,0,0,0],
        1:[1,1,1,1],
        2:[1,1,1,2],
        3:[1,1,2,1],
        4:[1,1,2,2],
        5:[1,1,3,1],
        6:[1,1,3,2],
        7:[1,2,1,1],
        8:[1,2,1,2],
        9:[1,2,2,1],
        10:[1,2,2,2],
        11:[1,2,3,1],
        12:[1,2,3,2]
    },
    "label_reverse_dict":{
        (0,0,0,0):0,
        (1,1,1,1):1,
        (1,1,1,2):2,
        (1,1,2,1):3,
        (1,1,2,2):4,
        (1,1,3,1):5,
        (1,1,3,2):6,
        (1,2,1,1):7,
        (1,2,1,2):8,
        (1,2,2,1):9,
        (1,2,2,2):10,
        (1,2,3,1):11,
        (1,2,3,2):12,
    }
}

In [6]:
pl.seed_everything(config['seed'])

Global seed set to 2023


2023

In [7]:
train_df = pd.read_csv(f"{config['data_dir']}/train.csv")
test_df = pd.read_csv(f"{config['data_dir']}/test.csv")

In [8]:
train_df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./train/TRAIN_0000.mp4,7
1,TRAIN_0001,./train/TRAIN_0001.mp4,7
2,TRAIN_0002,./train/TRAIN_0002.mp4,0
3,TRAIN_0003,./train/TRAIN_0003.mp4,0
4,TRAIN_0004,./train/TRAIN_0004.mp4,1
...,...,...,...
2693,TRAIN_2693,./train/TRAIN_2693.mp4,3
2694,TRAIN_2694,./train/TRAIN_2694.mp4,5
2695,TRAIN_2695,./train/TRAIN_2695.mp4,0
2696,TRAIN_2696,./train/TRAIN_2696.mp4,0


In [9]:
train_df['sample_id'] = train_df['sample_id'].apply(lambda x: int(x.split('_')[1]))
test_df['sample_id'] = test_df['sample_id'].apply(lambda x: int(x.split('_')[1]))

In [10]:
train_df

Unnamed: 0,sample_id,video_path,label
0,0,./train/TRAIN_0000.mp4,7
1,1,./train/TRAIN_0001.mp4,7
2,2,./train/TRAIN_0002.mp4,0
3,3,./train/TRAIN_0003.mp4,0
4,4,./train/TRAIN_0004.mp4,1
...,...,...,...
2693,2693,./train/TRAIN_2693.mp4,3
2694,2694,./train/TRAIN_2694.mp4,5
2695,2695,./train/TRAIN_2695.mp4,0
2696,2696,./train/TRAIN_2696.mp4,0


In [11]:
train_df['video_path'] = train_df['video_path'].apply(lambda x: config['data_dir'] + x[1:])
test_df['video_path'] = test_df['video_path'].apply(lambda x: config['data_dir'] + x[1:])

In [12]:
train_df

Unnamed: 0,sample_id,video_path,label
0,0,./data/train/TRAIN_0000.mp4,7
1,1,./data/train/TRAIN_0001.mp4,7
2,2,./data/train/TRAIN_0002.mp4,0
3,3,./data/train/TRAIN_0003.mp4,0
4,4,./data/train/TRAIN_0004.mp4,1
...,...,...,...
2693,2693,./data/train/TRAIN_2693.mp4,3
2694,2694,./data/train/TRAIN_2694.mp4,5
2695,2695,./data/train/TRAIN_2695.mp4,0
2696,2696,./data/train/TRAIN_2696.mp4,0


In [13]:
test_df['label']=-1
test_df['label_split'] = test_df['label'].apply(config['label_dict'].get)

In [14]:
test_df

Unnamed: 0,sample_id,video_path,label,label_split
0,0,./data/test/TEST_0000.mp4,-1,"[-1, -1, -1, -1]"
1,1,./data/test/TEST_0001.mp4,-1,"[-1, -1, -1, -1]"
2,2,./data/test/TEST_0002.mp4,-1,"[-1, -1, -1, -1]"
3,3,./data/test/TEST_0003.mp4,-1,"[-1, -1, -1, -1]"
4,4,./data/test/TEST_0004.mp4,-1,"[-1, -1, -1, -1]"
...,...,...,...,...
1795,1795,./data/test/TEST_1795.mp4,-1,"[-1, -1, -1, -1]"
1796,1796,./data/test/TEST_1796.mp4,-1,"[-1, -1, -1, -1]"
1797,1797,./data/test/TEST_1797.mp4,-1,"[-1, -1, -1, -1]"
1798,1798,./data/test/TEST_1798.mp4,-1,"[-1, -1, -1, -1]"


In [15]:
train_df['label_split'] = train_df['label'].apply(config['label_dict'].get)
train_label_split = np.array(train_df['label_split'].tolist())

In [16]:
train_df

Unnamed: 0,sample_id,video_path,label,label_split
0,0,./data/train/TRAIN_0000.mp4,7,"[1, 2, 1, 1]"
1,1,./data/train/TRAIN_0001.mp4,7,"[1, 2, 1, 1]"
2,2,./data/train/TRAIN_0002.mp4,0,"[0, 0, 0, 0]"
3,3,./data/train/TRAIN_0003.mp4,0,"[0, 0, 0, 0]"
4,4,./data/train/TRAIN_0004.mp4,1,"[1, 1, 1, 1]"
...,...,...,...,...
2693,2693,./data/train/TRAIN_2693.mp4,3,"[1, 1, 2, 1]"
2694,2694,./data/train/TRAIN_2694.mp4,5,"[1, 1, 3, 1]"
2695,2695,./data/train/TRAIN_2695.mp4,0,"[0, 0, 0, 0]"
2696,2696,./data/train/TRAIN_2696.mp4,0,"[0, 0, 0, 0]"


In [17]:
train_label_multi_hot = np.hstack([np.eye(n_class, dtype=np.int32)[train_label_split[:,idx]] for idx, n_class in enumerate(config['n_classes'])])
train_df['label_multi_hot'] = train_label_multi_hot.tolist()

In [18]:
train_df

Unnamed: 0,sample_id,video_path,label,label_split,label_multi_hot
0,0,./data/train/TRAIN_0000.mp4,7,"[1, 2, 1, 1]","[0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0]"
1,1,./data/train/TRAIN_0001.mp4,7,"[1, 2, 1, 1]","[0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0]"
2,2,./data/train/TRAIN_0002.mp4,0,"[0, 0, 0, 0]","[1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0]"
3,3,./data/train/TRAIN_0003.mp4,0,"[0, 0, 0, 0]","[1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0]"
4,4,./data/train/TRAIN_0004.mp4,1,"[1, 1, 1, 1]","[0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0]"
...,...,...,...,...,...
2693,2693,./data/train/TRAIN_2693.mp4,3,"[1, 1, 2, 1]","[0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0]"
2694,2694,./data/train/TRAIN_2694.mp4,5,"[1, 1, 3, 1]","[0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0]"
2695,2695,./data/train/TRAIN_2695.mp4,0,"[0, 0, 0, 0]","[1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0]"
2696,2696,./data/train/TRAIN_2696.mp4,0,"[0, 0, 0, 0]","[1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0]"


In [19]:
train_df_for_dataset, _ , val_df_for_dataset, _  = iterative_train_test_split(X=train_df.values, y=train_label_multi_hot, test_size=0.2)
test_df_for_dataset = test_df.values

In [20]:
train_df_for_dataset

array([[1, './data/train/TRAIN_0001.mp4', 7, list([1, 2, 1, 1]),
        list([0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0])],
       [3, './data/train/TRAIN_0003.mp4', 0, list([0, 0, 0, 0]),
        list([1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0])],
       [4, './data/train/TRAIN_0004.mp4', 1, list([1, 1, 1, 1]),
        list([0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0])],
       ...,
       [2695, './data/train/TRAIN_2695.mp4', 0, list([0, 0, 0, 0]),
        list([1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0])],
       [2696, './data/train/TRAIN_2696.mp4', 0, list([0, 0, 0, 0]),
        list([1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0])],
       [2697, './data/train/TRAIN_2697.mp4', 0, list([0, 0, 0, 0]),
        list([1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0])]], dtype=object)

In [21]:
train_multi_hot_for_sampler = np.array(train_df_for_dataset[:,4].tolist())

In [22]:
np.shape(train_multi_hot_for_sampler)

(2158, 12)

In [23]:
class VideoDataset(Dataset):
    def __init__(self, df_for_dataset, transform=None):
        self.sample_id = df_for_dataset[:,0]
        self.video_path = df_for_dataset[:,1]
        self.label = df_for_dataset[:,2]
        self.label_split = np.array(df_for_dataset[:,3].tolist())
        self.transform = transform

    def __len__(self):
        return len(self.sample_id)

    def __getitem__(self, idx):
        sample_id = self.sample_id[idx]
        video_path = self.video_path[idx]
        vr = VideoReader(video_path)
        video = torch.from_numpy(vr.get_batch(range(50)).asnumpy())
        video = rearrange(video, 't h w c -> c t h w')
        label = self.label[idx]
        label_split = self.label_split[idx]
        
        if self.transform:
            video = self.transform(video)
        video = rearrange(video, 'c t h w -> t c h w')

        sample = {
            'sample_id':sample_id,
            'video':video,
            'label':label,
            'label_split':label_split
        }
        
        return sample

In [24]:
model_config = AutoConfig.from_pretrained(config['model_name'])
image_processor_config = AutoImageProcessor.from_pretrained(config['model_name'])

In [25]:
train_transform = create_video_transform(
    mode='train',
    num_samples=model_config.num_frames,
    video_mean = tuple(image_processor_config.image_mean),
    video_std = tuple(image_processor_config.image_std),
    crop_size = tuple(image_processor_config.crop_size.values())
)

val_transform = create_video_transform(
    mode='val',
    num_samples=model_config.num_frames,
    video_mean = tuple(image_processor_config.image_mean),
    video_std = tuple(image_processor_config.image_std),
    crop_size = tuple(image_processor_config.crop_size.values())
)

In [26]:
train_dataset = VideoDataset(train_df_for_dataset, transform=train_transform)
val_dataset = VideoDataset(val_df_for_dataset, transform=val_transform)
test_dataset = VideoDataset(test_df_for_dataset, transform=val_transform)

In [27]:
train_sampler = MultilabelBalancedRandomSampler(train_multi_hot_for_sampler)
train_dataloader = DataLoader(train_dataset, batch_size= config['batch_size'], sampler=train_sampler)
val_dataloader = DataLoader(val_dataset, batch_size = config['batch_size']*2)
test_dataloader = DataLoader(test_dataset, batch_size = config['batch_size']*2)

In [33]:
class PLVideoModel(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.learning_rate = config['learning_rate']
        self.model = AutoModel.from_pretrained(config['model_name'])
        self.classifiers = nn.ModuleList([
            nn.LazyLinear(n_class) for n_class in config['n_classes']
        ])
        self.loss = FocalLoss('multiclass')

    def forward(self, x):
        print(x.size())
        x = self.model(x).last_hidden_state.mean(dim=1)
        print(x.size())
        x_out = [classifier(x) for classifier in self.classifiers]
        print(x_out)
        return x_out

    def training_step(self, batch, batch_idx):
        video, label, label_split = batch['video'], batch['label'], batch['label_split']
        y_hats = self.forward(batch["video"])
        loss = sum([self.loss(y_hats[i], batch["label_split"][:,i]) for i in range(len(self.config['n_classes']))])
        loss = loss/len(self.config['n_classes'])
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        video, label, label_split = batch['video'], batch['label'], batch['label_split']
        y_hats = self.forward(batch["video"])
        step_output = [*y_hats, label]
        return step_output
    
    def predict_step(self, batch, batch_idx):
        video, _, _ = batch['video'], batch['label'], batch['label_split']
        y_hats = self.forward(batch["video"])
        step_output = y_hats
        return step_output

    def validation_epoch_end(self, step_outputs):
        pred1, pred2, pred3, pred4, label = [], [], [], [], []
        for step_output in step_outputs:
            pred1.append(step_output[0])
            pred2.append(step_output[1])
            pred3.append(step_output[2])
            pred4.append(step_output[3])
            label.append(step_output[4])
            
        pred1 = torch.cat(pred1).argmax(1)
        pred2 = torch.cat(pred2).argmax(1)
        pred3 = torch.cat(pred3).argmax(1)
        pred4 = torch.cat(pred4).argmax(1)
        label = torch.cat(label).tolist()

        pred = torch.stack([pred1,pred2,pred3,pred4],dim=1).cpu().detach().numpy().tolist()
        pred = list(map(lambda x: self.config['label_reverse_dict'].get(tuple(x),0),pred))
        
        score = f1_score(label,pred, average='macro')
        print(score)
        self.log("val_score", score)
        return score
    
    def post_preproc(self, step_outputs):
        pred1, pred2, pred3, pred4 = [], [], [], []
        for step_output in step_outputs:
            pred1.append(step_output[0])
            pred2.append(step_output[1])
            pred3.append(step_output[2])
            pred4.append(step_output[3])
            
        pred1 = torch.cat(pred1).argmax(1)
        pred2 = torch.cat(pred2).argmax(1)
        pred3 = torch.cat(pred3).argmax(1)
        pred4 = torch.cat(pred4).argmax(1)

        pred = torch.stack([pred1,pred2,pred3,pred4],dim=1).cpu().detach().numpy().tolist()
        pred = list(map(lambda x: self.config['label_reverse_dict'].get(tuple(x),0),pred))

        return pred
            
    def configure_optimizers(self):
        optimizer = Apollo(self.parameters(), lr=self.learning_rate)
        return [optimizer]

In [34]:
checkpoint_callback = ModelCheckpoint(
    monitor='val_score',
    dirpath=config['checkpoint_dir'],
    filename=f'{config["model_name"]}'+'-{epoch:02d}-{train_loss:.4f}-{val_score:.4f}',
    mode='max'
)
early_stop_callback = EarlyStopping(
    monitor="train_loss",
    patience=3,
    verbose=False,
    mode="min"
)

pl_video_model = PLVideoModel(config)

trainer = pl.Trainer(
    max_epochs=100,
    accelerator='auto', 
    precision=16,
    callbacks=[early_stop_callback, checkpoint_callback]
                    
)
trainer.fit(pl_video_model, train_dataloader, val_dataloader)

Some weights of the model checkpoint at facebook/timesformer-base-finetuned-k400 were not used when initializing TimesformerModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing TimesformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TimesformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Using 16bit None Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type             | Params
-------------------------------------------------
0 | model       | Times

Sanity Checking: 0it [00:00, ?it/s]

torch.Size([4, 8, 3, 224, 224])
torch.Size([4, 768])
[tensor([[-0.3755,  0.1315],
        [ 0.1779,  0.1495],
        [-0.1920,  0.6040],
        [ 0.0194,  0.1460]], device='cuda:0', dtype=torch.float16), tensor([[ 0.0178, -0.0349,  0.1082],
        [ 0.8633, -0.3875,  0.4675],
        [ 0.5112, -0.0091,  0.2064],
        [ 0.5308, -0.0790,  0.5483]], device='cuda:0', dtype=torch.float16), tensor([[ 0.2771,  0.8252,  0.1499,  0.4822],
        [ 0.0164,  0.3091, -0.2158, -0.0652],
        [ 0.5723, -0.1686,  0.0879,  0.2129],
        [-0.1787,  0.5571, -0.1542,  0.3877]], device='cuda:0',
       dtype=torch.float16), tensor([[ 0.5464, -0.4514,  0.4651],
        [ 0.6831,  0.1516,  0.0208],
        [ 0.2886, -0.2388,  0.0267],
        [-0.0567, -0.4988,  0.3354]], device='cuda:0', dtype=torch.float16)]
torch.Size([4, 8, 3, 224, 224])
torch.Size([4, 768])
[tensor([[-0.0512,  0.3577],
        [ 0.0923, -0.4131],
        [ 0.1078, -0.1378],
        [ 0.4050, -0.8223]], device='cuda:0', dty

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

torch.Size([2, 8, 3, 224, 224])
torch.Size([2, 768])
[tensor([[ 0.3740, -0.0908],
        [ 0.1077,  0.2303]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddmmBackward0>), tensor([[-0.1600, -0.1077, -0.0716],
        [-0.1410, -0.1024,  0.1221]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddmmBackward0>), tensor([[-0.4299, -0.3625, -0.0904,  0.2900],
        [ 0.2208, -0.0435,  0.2352,  0.1519]], device='cuda:0',
       dtype=torch.float16, grad_fn=<AddmmBackward0>), tensor([[-0.3579, -0.1097, -0.0953],
        [-0.1508, -0.4819,  0.1792]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddmmBackward0>)]
torch.Size([2, 8, 3, 224, 224])
torch.Size([2, 768])
[tensor([[-0.0063,  0.3027],
        [ 0.3606, -0.1448]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddmmBackward0>), tensor([[ 0.3403, -0.1469,  0.0217],
        [ 0.5122,  0.2395,  0.1624]], device='cuda:0', dtype=torch.float16,
       grad_fn=<AddmmBackward0>), tensor([[ 0.0795,  0.0496,  

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [35]:
pl_video_model_pretrained = PLVideoModel.load_from_checkpoint(
    "./checkpoint/facebook/timesformer-base-finetuned-k400-epoch=08-train_loss=0.1318-val_score=0.5356.ckpt",
    config=config
)

trainer = pl.Trainer(accelerator='auto')
pred = trainer.predict(pl_video_model_pretrained, test_dataloader)

FileNotFoundError: [Errno 2] No such file or directory: '/home/server-003/workspace/competition/collision/checkpoint/facebook/timesformer-base-finetuned-k400-epoch=08-train_loss=0.1318-val_score=0.5356.ckpt'

In [None]:
pred_post_proc = pl_video_model_pretrained.post_preproc(pred)

In [None]:
submit = pd.read_csv(f"{config['data_dir']}/sample_submission.csv")

In [None]:
submit['label'] = pred_post_proc

In [None]:
submit.to_csv(f"{config['submission_dir']}/testsubmit.csv", index=False)