# Import

In [None]:
import pandas as pd
import numpy as np
from glob import glob
from PIL import Image
import cv2
from tqdm.auto import tqdm
import os
import json
import gc
import warnings
import random
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from transformers.optimization import AdamW, get_cosine_schedule_with_warmup
from pytorchvideo.models.hub.slowfast import slowfast_16x8_r101_50_50

warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# Seed

In [None]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = False 

seed_everything(42)

# Config

In [None]:
device = torch.device("cuda:0")

flags={}
flags['batch_size'] = 2
flags['num_worker'] = 0
flags['learning_rate'] = 5e-5
flags['epoch'] = 100
flags['size'] = 256
flags['mean'] = [0.45, 0.45, 0.45]
flags['std'] = [0.225, 0.225, 0.225]
flags['num_frames'] = 64
flags['sampling_rate'] = 1
flags['slowfast_alpha'] = 4
flags['save_folder'] = './model_weights'
flags['warmup_ratio'] = 0.1

# Dataset

In [None]:
class PackPathway(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, frames: torch.Tensor):
        fast_pathway = frames
        slow_pathway = torch.index_select(
            frames,
            1,
            torch.linspace(
                0, frames.shape[1] - 1, frames.shape[1] // flags['slowfast_alpha']
            ).long(),
        )
        frame_list = [slow_pathway, fast_pathway]
        return frame_list

In [None]:
paths = sorted(glob('/data/competition/Another/Busan/Competition_1/행위/행위/*/*'))

folder_path = []
for i in paths:
    if (i[-4:] not in "json") and (i[-3:] not in "mp4"):
        folder_path.append(i)

for j in folder_path:
    try:
        os.rmdir(j)
    except:
        continue

print("빈 폴더 제거 전 :", len(folder_path))

paths = sorted(glob('/data/competition/Another/Busan/Competition_1/행위/행위/*/*'))

folder_path = []
for i in paths:
    if (i[-4:] not in "json") and (i[-3:] not in "mp4"):
        folder_path.append(i)
        
print("빈 폴더 제거 후 :", len(folder_path))

df_test = pd.DataFrame()


test_paths = []
for num, i in enumerate(range(len(folder_path))):
    test_paths.append(folder_path[i])
        
print('test개수:', len(test_paths))
df_test['file_path'] = test_paths

In [None]:
class CustomDataset(torch.utils.data.Dataset):
  def __init__(self, 
               df, 
               mode='test', 
               size=flags['size'], 
               num_frames=flags['num_frames'], 
               sampling_rate=flags['sampling_rate']):
    self.df=df
    self.mode=mode
    self.size=size
    self.num_frames=num_frames
    self.sampling_rate=sampling_rate
    self.slowfast_preproc=PackPathway()

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    file_path=self.df['file_path'].iloc[idx]
    img_path=sorted(glob(file_path + '/*.png'))
    if self.mode=='train':
        start=random.choice(np.arange(len(img_path)-self.sampling_rate*self.num_frames-1))
    else:
        start=0
    imgs=torch.zeros(self.num_frames, 3, self.size, self.size)
    for i, x in enumerate(range(start, start+self.sampling_rate*self.num_frames, self.sampling_rate)):
        img = np.array(Image.open(img_path[x]))
        imgs[i]=self.transform_func()(image=img)['image']
    
    imgs = self._pad(imgs)
    frames=self.slowfast_preproc(imgs.permute(1, 0, 2, 3))
    label = 0
    if self.mode=='train':
        if "smoking" in file_path:
            label = 0
        elif "fishing" in file_path:
            label = 1
        elif "trash_dump" in file_path:
            label = 2
        elif "wall_over" in file_path:
            label = 3
        elif "damage_to_facilities" in file_path:
            label = 4
        elif "banner_action" in file_path:
            label = 5
        elif "fliers_action" in file_path:
            label = 5
        elif "tent_setup" in file_path:
            label = 6
    else:
        label=0
    return frames, label

  def transform_func(self):
    return A.Compose([
                      A.Resize(flags['size'],flags['size']), 
                      A.Normalize(mean=flags['mean'], std=flags['std']),
                      ToTensorV2(p=1.0)
                      ])
    
  def _pad(self, imgs):
      if imgs.shape[0] < self.num_frames:
          T, C, H, W = imgs.shape
          pad = torch.zeros(self.num_frames-T, C, H, W)
          imgs = torch.cat([imgs, pad], dim=0)
      else:
          imgs = imgs[:self.num_frames]

      return imgs

In [None]:
test_dataset = CustomDataset(df=df_test)

test_dataloader = DataLoader(test_dataset, batch_size=1, num_workers=flags['num_worker'], shuffle=False)

# Model

In [None]:
class CustomModel(nn.Module):
    def __init__(self, device):
        super().__init__()
        self.model=slowfast_16x8_r101_50_50(pretrained=True)
        self.model.blocks[6].proj = nn.Linear(self.model.blocks[6].proj.in_features, 7, bias=True)
        self.to(device)
        
    def forward(self, x):  
        x = self.model(x)
        return x

# Inference

In [None]:
def inference_func(model, test_dataloader):
    outputs = []
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_dataloader)):
            input = [x.to(device) for x in data[0]]
            output = model(input)
            logit = torch.softmax(output, dim=-1)
            outputs.append(logit)

    return torch.cat(outputs)

In [None]:
seed_everything(42)
custom_model = CustomModel(device=device)
checkpoint = torch.load('./Final_Model/Model1_best-acc_20.bin', map_location='cpu')
custom_model.load_state_dict(checkpoint)
custom_model.eval()
custom_model = custom_model.to(device)

outputs1 = inference_func(custom_model1, test_dataloader)

In [None]:
seed_everything(42)
custom_model = CustomModel(device=device)
checkpoint = torch.load('./Final_Model/Model2_best-acc_20.bin', map_location='cpu')
custom_model.load_state_dict(checkpoint)
custom_model.eval()
custom_model = custom_model.to(device)

outputs2 = inference_func(custom_model1, test_dataloader)

In [None]:
seed_everything(42)
custom_model = CustomModel(device=device)
checkpoint = torch.load('./Final_Model/Model3_best-acc_20.bin', map_location='cpu')
custom_model.load_state_dict(checkpoint)
custom_model.eval()
custom_model = custom_model.to(device)

outputs3 = inference_func(custom_model1, test_dataloader)

In [None]:
seed_everything(42)
custom_model = CustomModel(device=device)
checkpoint = torch.load('./Final_Model/Model4_best-acc_20.bin', map_location='cpu')
custom_model.load_state_dict(checkpoint)
custom_model.eval()
custom_model = custom_model.to(device)

outputs4 = inference_func(custom_model1, test_dataloader)

In [None]:
outputs = (outputs1 + outputs2 + outputs3 + outputs4) / 4
outputs = outputs.cpu().numpy()

In [None]:
test_paths = sorted(glob('/data/competition/Another/Busan/Competition_1/행위/행위/*/*.mp4'))

In [None]:
total_label = []

for i in tqdm(range(len(test_paths))):
    mean = []
    for j in range(outputs.shape[0]):
        if test_paths[i].split(".")[0] in df_test["file_path"].iloc[j]:
            mean.append(outputs[j])
            break
    mean = np.array(mean)
    mean_label = np.mean(mean, axis=0)
    total_label.append(mean_label)
total_label = np.array(total_label)

In [None]:
labels = pd.DataFrame()
labels["file_path"] = test_paths
labels["label"] = 0
for i in range(labels.shape[0]):
    labels["label"].iloc[i] = total_label[i].argmax()

In [None]:
labels.to_csv("./final_submission.csv", index=False)