In [1]:
# !pip install torchsampler
#https://github.com/ufoym/imbalanced-dataset-sampler


# train_loader = torch.utils.data.DataLoader(
#     train_dataset,
#     sampler=ImbalancedDatasetSampler(train_dataset),
#     batch_size=args.batch_size,
#     **kwargs
# )
#https://www.kaggle.com/competitions/dfl-bundesliga-data-shootout/discussion/360236
#model kinetic version
#label seperate
#video augmentation
#imbalance data
#focal loss
#https://github.com/HHTseng/video-classification
#https://huggingface.co/models?other=video-classification
#앙상블

In [2]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from torchsampler import ImbalancedDatasetSampler
from sklearn.model_selection import KFold,StratifiedKFold
import albumentations as A
from datetime import datetime
# from einops import rearrange
# from decord import VideoReader
from sklearn.metrics import f1_score
from torch.utils.data import Dataset, DataLoader
from segmentation_models_pytorch.losses import FocalLoss
# from transformers import AutoModel, AutoImageProcessor, AutoConfig
# from skmultilearn.model_selection import iterative_train_test_split
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorchvideo.transforms.transforms_factory import create_video_transform
import random
import os
import cv2
from tqdm import tqdm

from transformers import AutoModel, AutoImageProcessor, AutoConfig



In [3]:
### https://github.com/davda54/sam

class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                e_w = p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"
                self.state[p]["e_w"] = e_w

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.sub_(self.state[p]["e_w"])  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
                    torch.stack([
                        p.grad.norm(p=2).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    p=2
               )
        return norm

In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [5]:
import pandas as pd
all_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/test.csv')

In [6]:

CFG = {
    'VIDEO_LENGTH':10, 
    'IMG_SIZE':235,
    'EPOCHS':5,
    'LEARNING_RATE':1e-5,
    'BATCH_SIZE':2,
    'SEED':2023,
    'SPLIT':5,
    'ROOT':'./data',
    'MODEL':'MCG-NJU/videomae-base-finetuned-ssv2',
    'LOAD_WEIGHT':True
    
    
}

In [7]:
skf = StratifiedKFold(n_splits = CFG['SPLIT'])

In [8]:
all_df['video_path'] = all_df['video_path'].apply(lambda x:CFG['ROOT']+x[1:])
test_df['video_path'] = test_df['video_path'].apply(lambda x:CFG['ROOT']+x[1:])

In [9]:
all_df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./data/train/TRAIN_0000.mp4,7
1,TRAIN_0001,./data/train/TRAIN_0001.mp4,7
2,TRAIN_0002,./data/train/TRAIN_0002.mp4,0
3,TRAIN_0003,./data/train/TRAIN_0003.mp4,0
4,TRAIN_0004,./data/train/TRAIN_0004.mp4,1
...,...,...,...
2693,TRAIN_2693,./data/train/TRAIN_2693.mp4,3
2694,TRAIN_2694,./data/train/TRAIN_2694.mp4,5
2695,TRAIN_2695,./data/train/TRAIN_2695.mp4,0
2696,TRAIN_2696,./data/train/TRAIN_2696.mp4,0


In [10]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [11]:
from transformers import VideoMAEConfig, VideoMAEModel
from transformers import AutoImageProcessor, VideoMAEForVideoClassification
from transformers import XCLIPVisionModel, XCLIPVisionConfig

configuration = VideoMAEConfig()
configuration = AutoConfig.from_pretrained(CFG['MODEL'])

configuration.num_frames = CFG['VIDEO_LENGTH']
configuration.num_frames=CFG['VIDEO_LENGTH']
configuration.image_size=CFG['IMG_SIZE']
configuration.id2label = {0:'no crash',1:'crash ego normal day',2:'crash ego normal night',3:'crash ego snow day',4:'crash ego snow night',
                         5:'crash ego rain day',6:'crash ego rain night',7:'crash other normal day',8:'crash other normal night',
                         9:'crash other snow day',10:'crash other snow night',11:'crash other rain day',12:'crash other rain night'}
configuration.label2id = {'no crash':0,'crash ego normal day':1,'crash ego normal night':2,'crash ego snow day':3,'crash ego snow night':4,
                         'crash ego rain day':5,'crash ego rain night':6,'crash other normal day':7,'crash other normal night':8,
                         'crash other snow day':9,'crash other snow night':10,'crash other rain day':11,'crash other rain night':12}

image_processor_config = AutoImageProcessor.from_pretrained(CFG['MODEL'])

mae_model = VideoMAEForVideoClassification.from_pretrained(CFG['MODEL'],config=configuration,ignore_mismatched_sizes=True)


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base-finetuned-ssv2 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([174, 768]) in the checkpoint and torch.Size([13, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([174]) in the checkpoint and torch.Size([13]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
Alb = A.Compose([
        A.Resize(width=CFG['IMG_SIZE'], height=CFG['IMG_SIZE']),
        A.HorizontalFlip(p=0.5),
        A.OneOf([
            A.Blur(blur_limit=3,p=0.3),
            A.GaussNoise(p=0.3,var_limit=(0, 26)),
            A.Downscale(p=0.3,scale_min=0.7, scale_max=0.99, interpolation=2),
#             A.RandomBrightness(p=0.2, limit=0.05),    
#             A.CoarseDropout(p=0.2, max_holes=10, max_height=8, max_width=8, min_holes=5, min_height=2, min_width=2),
        ], p=0.7),
        A.Normalize(mean=tuple(image_processor_config.image_mean)
                   ,std=tuple(image_processor_config.image_std))
#             A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),

    ], p=1)


def aug_video(vid, tfms):
    aug_vid = []
    for x in vid:
        aug_vid.append((tfms(image = np.asarray(x)))['image'])
    return torch.from_numpy(np.stack(aug_vid))

In [13]:

class VideoDataset(Dataset):
    def __init__(self, video_path_list, label_list,transform=None):
        self.video_path_list = video_path_list
        self.label_list = label_list
        self.Alb = transform
    
    def get_labels(self):   
        return self.label_list  
    
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for idx in range(50):
            if idx%5 == 3:
                _, img = cap.read()
                img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
                frames.append(img)
        if self.Alb is not None:
            frames = aug_video(frames, tfms=self.Alb)
        return torch.FloatTensor(np.array(frames)).permute(0, 3, 1, 2)


In [14]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = FocalLoss('multiclass')
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            optimizer.zero_grad() 
            output = model(videos)
            loss = criterion(output.logits, labels)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
            date=datetime.today().strftime("%m_%d_%H_%M")
            torch.save(best_model.state_dict(), './'+'all_adam' +'_'+ date + '_best_model.pth')

    
    return best_model

In [15]:
def sam_train(skf_idx, model, optimizer, train_loader, val_loader, scheduler, device, cls_type):
    model.to(device)
    criterion = FocalLoss('multiclass')
    best_val_score = 0
    best_model = None
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
      
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            output = model(videos)
            loss = criterion(output.logits, labels)
            loss.backward()
#             optimizer.step()
            optimizer.first_step(zero_grad=True)

            criterion(model(videos).logits, labels).backward()
            optimizer.second_step(zero_grad=True)

            train_loss.append(loss.item())
            
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
       
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
    
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
            date=datetime.today().strftime("%m_%d_%H_%M")
            torch.save(best_model.state_dict(), './'+cls_type + '_' + str(skf_idx) +'_'+ date + '_best_model.pth')
        skf_idx+=1
    return best_model,achieve,skf_idx

In [16]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            output = model(videos)
            loss = criterion(output.logits, labels)
            val_loss.append(loss.item())
            
            preds += output.logits.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

In [17]:


# cls_type = 'all'
# base_optimizer = torch.optim.SGD  # define an optimizer for the "sharpness-aware" update
# optimizer = SAM(mae_model.parameters(), base_optimizer, lr=CFG["LEARNING_RATE"], momentum=0.1)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3,threshold_mode='abs',min_lr=1e-12, verbose=True)
# skf_idx=1
# for train_idx,val_idx in skf.split(all_df['video_path'],all_df['label']):
#     train_dataset = VideoDataset(all_df['video_path'][train_idx].values, all_df['label'][train_idx].values,transform=Alb)
#     val_dataset = VideoDataset(all_df['video_path'][val_idx].values, all_df['label'][val_idx].values, transform=Alb)
#     train_loader = DataLoader(train_dataset,sampler=ImbalancedDatasetSampler(train_dataset),shuffle=False,batch_size = CFG['BATCH_SIZE'],  num_workers=4)
#     val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], num_workers=4)
#     _,achieve,skf_idx= sam_train(skf_idx,mae_model, optimizer, train_loader, val_loader, scheduler, device, cls_type)


In [18]:
optimizer = torch.optim.Adam(params = mae_model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-10, verbose=True)

# if CFG['LOAD_WEIGHT'] == True:
#     checkpoint = torch.load('./best_model.pth')
#     mae_model.load_state_dict(checkpoint)


for train_idx,val_idx in skf.split(all_df['video_path'],all_df['label']):
    train_dataset = VideoDataset(all_df['video_path'][train_idx].values, all_df['label'][train_idx].values,transform=Alb)
    val_dataset = VideoDataset(all_df['video_path'][val_idx].values, all_df['label'][val_idx].values, transform=Alb)
    train_loader = DataLoader(train_dataset,sampler=ImbalancedDatasetSampler(train_dataset),shuffle=False,batch_size = CFG['BATCH_SIZE'],  num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], num_workers=4)
    infer_model = train(mae_model, optimizer, train_loader, val_loader, scheduler, device)

100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:18<00:00,  5.44it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.63it/s]


Epoch [1], Train Loss : [0.57810] Val Loss : [0.41034] Val F1 : [0.33898]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:18<00:00,  5.44it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.71it/s]

Epoch [2], Train Loss : [0.21128] Val Loss : [0.33104] Val F1 : [0.30790]



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.79it/s]

Epoch [3], Train Loss : [0.12210] Val Loss : [0.33924] Val F1 : [0.26983]



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.76it/s]


Epoch [4], Train Loss : [0.07852] Val Loss : [0.25827] Val F1 : [0.34705]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.78it/s]


Epoch [5], Train Loss : [0.07304] Val Loss : [0.25743] Val F1 : [0.35562]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.47it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.73it/s]


Epoch [1], Train Loss : [0.13194] Val Loss : [0.12368] Val F1 : [0.91068]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.84it/s]


Epoch [2], Train Loss : [0.05741] Val Loss : [0.10082] Val F1 : [0.92803]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.76it/s]

Epoch [3], Train Loss : [0.04634] Val Loss : [0.10368] Val F1 : [0.90011]



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.74it/s]

Epoch [4], Train Loss : [0.03076] Val Loss : [0.11501] Val F1 : [0.88973]



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.72it/s]

Epoch [5], Train Loss : [0.03278] Val Loss : [0.15706] Val F1 : [0.85225]
Epoch    10: reducing learning rate of group 0 to 5.0000e-06.



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.77it/s]


Epoch [1], Train Loss : [0.03134] Val Loss : [0.02824] Val F1 : [0.99081]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.84it/s]

Epoch [2], Train Loss : [0.02126] Val Loss : [0.02521] Val F1 : [0.98999]



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.75it/s]


Epoch [3], Train Loss : [0.01566] Val Loss : [0.02506] Val F1 : [0.99213]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.85it/s]

Epoch [4], Train Loss : [0.01018] Val Loss : [0.04799] Val F1 : [0.97183]



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1079/1079 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.84it/s]

Epoch [5], Train Loss : [0.00934] Val Loss : [0.04009] Val F1 : [0.97492]



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.80it/s]


Epoch [1], Train Loss : [0.02014] Val Loss : [0.01738] Val F1 : [0.98744]
Epoch    16: reducing learning rate of group 0 to 2.5000e-06.


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:16<00:00, 15.90it/s]


Epoch [2], Train Loss : [0.00926] Val Loss : [0.00739] Val F1 : [0.99735]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.83it/s]


Epoch [3], Train Loss : [0.00587] Val Loss : [0.00715] Val F1 : [0.99791]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.85it/s]


Epoch [4], Train Loss : [0.00497] Val Loss : [0.00590] Val F1 : [0.99868]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.86it/s]


Epoch [5], Train Loss : [0.00315] Val Loss : [0.00434] Val F1 : [1.00000]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.88it/s]


Epoch [1], Train Loss : [0.00204] Val Loss : [0.00185] Val F1 : [1.00000]


100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:16<00:00, 15.88it/s]

Epoch [2], Train Loss : [0.00130] Val Loss : [0.00162] Val F1 : [1.00000]



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.88it/s]

Epoch [3], Train Loss : [0.00085] Val Loss : [0.00187] Val F1 : [1.00000]
Epoch    23: reducing learning rate of group 0 to 1.2500e-06.



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.86it/s]

Epoch [4], Train Loss : [0.00300] Val Loss : [0.00153] Val F1 : [1.00000]



100%|██████████████████████████████████████████████████████████████████████████████████████████| 1080/1080 [03:17<00:00,  5.46it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 270/270 [00:17<00:00, 15.88it/s]

Epoch [5], Train Loss : [0.00077] Val Loss : [0.00082] Val F1 : [1.00000]





In [19]:
test_dataset = VideoDataset(test_df['video_path'].values,label_list= None, transform=Alb)
test_loader = DataLoader(test_dataset, shuffle=False,batch_size = CFG['BATCH_SIZE']*2,  num_workers=4)
   

In [20]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            output = model(videos)
#             preds += output.logits.argmax(1).detach().cpu().numpy().tolist()
            preds += output.logits.detach().cpu().numpy().tolist()

    return preds

In [24]:
preds_list = []

preds=None


for idx in range(5):
    preds = inference(mae_model, test_loader, device)
    preds_list.append(preds)
    
pred_sum = np.sum(preds_list,axis=0)
pred_max = pred_sum.argmax(1).tolist()







100%|████████████████████████████████████████████████████████████████████████████████████████████| 450/450 [00:52<00:00,  8.62it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 450/450 [00:52<00:00,  8.51it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 450/450 [00:52<00:00,  8.52it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 450/450 [00:53<00:00,  8.49it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 450/450 [00:52<00:00,  8.50it/s]


In [28]:
submit = pd.read_csv('./data/sample_submission.csv')
submit['label'] = pred_max

In [29]:
submit['label'].value_counts()

0     1217
1      240
7      203
3       63
2       33
11      19
8       13
9        8
5        3
4        1
Name: label, dtype: int64

In [30]:
date=datetime.today().strftime("%Y_%m_%d_%H_%M_%S")
submit.to_csv('./'+date+'.csv', index=False)