## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import timm
from torchvision.models import video
import torchvision

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [3]:
#print(dir(models.video))

In [4]:
#model=getattr(models.video,CFG.model_name)(pretrained=True)

In [5]:
#model=getattr(video,CFG.model_name)(weights=video.R2Plus1D_18_Weights.DEFAULT)
#model

In [6]:
class CFG:
    model_name= "r3d_18"
    n_folds = 5
    n_classes = 13
    video_length=50
    img_size=128
    epochs=50
    lr=3e-4
    batch_size=16
    seed=41
    earlystop=3
        

## Fixed RandomSeed

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG.seed) # Seed 고정

## Data Load

In [8]:
df = pd.read_csv('./train_detail_classified.csv')
df

Unnamed: 0.1,Unnamed: 0,sample_id,video_path,label,crush,ego,weather,timing
0,0,TRAIN_0000,./train/TRAIN_0000.mp4,7,1,0,2,0
1,1,TRAIN_0001,./train/TRAIN_0001.mp4,7,1,0,2,0
2,2,TRAIN_0002,./train/TRAIN_0002.mp4,0,0,-1,-1,-1
3,3,TRAIN_0003,./train/TRAIN_0003.mp4,0,0,-1,-1,-1
4,4,TRAIN_0004,./train/TRAIN_0004.mp4,1,1,1,0,0
...,...,...,...,...,...,...,...,...
2693,2693,TRAIN_2693,./train/TRAIN_2693.mp4,3,1,1,1,0
2694,2694,TRAIN_2694,./train/TRAIN_2694.mp4,5,1,1,2,0
2695,2695,TRAIN_2695,./train/TRAIN_2695.mp4,0,0,-1,-1,-1
2696,2696,TRAIN_2696,./train/TRAIN_2696.mp4,0,0,-1,-1,-1


## 항목 별로 split -> 후에 stratified k fold 전략 사용

In [9]:
#-1 라벨인 데이터 버리기
#df_crush=df[df['crush']!=1]
df_ego=df[df['ego']!=-1]
df_weather=df[df['weather']!=-1]
df_timing=df[df['timing']!=-1]

In [10]:
train_crush, val_crush, _, _ = train_test_split(df, df['crush'], test_size=0.2, random_state=CFG.seed)
train_ego, val_ego, _,_=train_test_split(df_ego, df_ego['ego'], test_size=0.2, random_state=CFG.seed)
train_weather, val_weather, _,_=train_test_split(df_weather, df_weather['weather'], test_size=0.2, random_state=CFG.seed)
train_timing, val_timing,_,_=train_test_split(df_timing, df_timing['timing'], test_size=0.2, random_state=CFG.seed)

In [11]:
print(len(train_timing), len(val_timing))

732 183


## CustomDataset

In [12]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list):
        self.video_path_list = video_path_list
        self.label_list = label_list
        
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG.video_length):
            _, img = cap.read()
            img = cv2.resize(img, (CFG.img_size, CFG.img_size))
            img = img / 255.
            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)

In [13]:
train_crush_dataset = CustomDataset(train_crush['video_path'].values, train_crush['crush'].values)
train_crush_loader = DataLoader(train_crush_dataset, batch_size = CFG.batch_size, shuffle=True, num_workers=2)

val_crush_dataset = CustomDataset(val_crush['video_path'].values, val_crush['crush'].values)
val_crush_loader = DataLoader(val_crush_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=2)

In [14]:
train_ego_dataset = CustomDataset(train_ego['video_path'].values, train_ego['ego'].values)
train_ego_loader = DataLoader(train_ego_dataset, batch_size = CFG.batch_size, shuffle=True, num_workers=2)

val_ego_dataset = CustomDataset(val_ego['video_path'].values, val_ego['ego'].values)
val_ego_loader = DataLoader(val_ego_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=2)

In [15]:
train_weather_dataset = CustomDataset(train_weather['video_path'].values, train_weather['weather'].values)
train_weather_loader = DataLoader(train_weather_dataset, batch_size = CFG.batch_size, shuffle=True, num_workers=2)

val_weather_dataset = CustomDataset(val_weather['video_path'].values, val_weather['weather'].values)
val_weather_loader = DataLoader(val_weather_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=2)

In [16]:
train_timing_dataset = CustomDataset(train_timing['video_path'].values, train_timing['timing'].values)
train_timing_loader = DataLoader(train_timing_dataset, batch_size = CFG.batch_size, shuffle=True, num_workers=2)

val_timing_dataset = CustomDataset(val_timing['video_path'].values, val_timing['timing'].values)
val_timing_loader = DataLoader(val_timing_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=2)

In [17]:
train_loaders=[train_crush_loader,train_ego_loader,train_weather_loader,train_timing_loader]
val_loaders=[val_crush_loader,val_ego_loader,val_weather_loader,val_timing_loader]

In [18]:
#for tr_loader,val_loader in zip(train_loaders, val_loaders):
#    print(next(iter(tr_loader))[0].shape, next(iter(val_loader))[1])

In [19]:
#train_dataset[0][0].shape
#print(next(iter(train_crush_loader))[0].shape)
#print(next(iter(train_ego_loader))[0].shape)
#print(next(iter(train_weather_loader))[0].shape)
#print(next(iter(train_timing_loader))[0].shape)

## Model Define
- 동일 모델로 순서대로 4개 사용

In [20]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=CFG.n_classes, fc_type='shallow', binary=True):
        super(BaseModel, self).__init__()
        self.fc_type=fc_type
        self.num_classes=num_classes
        
        #get backbone
        #self.backbone = r2plus1d_18(pretrained=True)
        self.backbone=getattr(torchvision.models.video,CFG.model_name)(pretrained=True)
        self.backbone.fc=self.get_fc()
        self.binary=binary
        
    def get_fc(self):
        if self.fc_type == 'deep':
            fc = nn.Sequential(nn.Linear(self.backbone.fc.in_features, self.backbone.fc.in_features//2),
                                        nn.BatchNorm1d(self.backbone.fc.in_features//2,  momentum=0.1),
                                        nn.ReLU(),
                                        nn.Linear(self.backbone.fc.in_features//2, self.num_classes)
                                        )
    
        elif self.fc_type == 'shallow':
            fc = nn.Linear(self.backbone.fc.in_features, self.num_classes)
        else:
            raise ValueError(f"Wrong fc-type input {self.fc_type}")
        return fc
    
    def forward(self, x):
        x = self.backbone(x)
            
        return x

### check model in-out

In [21]:
#from torchsummary import summary
#model=BaseModel(fc_type='deep')
#model=model.to(device)
#summary(model, (3,50,720,1280))

## Train

In [22]:
def train(model, criterion, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
    
    #criterion = nn.CrossEntropyLoss().to(device)
    criterion=criterion().to(device)
    best_val_score = 0
    best_model = None
    cnt=0
    for epoch in range(1, CFG.epochs+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(videos)
            
            #print("target: ", labels)
            #print("output: ", output)
            #if model.module.binary==True: #because of DataParallel 
            #    labels=labels.float()
                
            #loss = criterion(output, labels.reshape(-1,1))
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
            
            cnt=0
        else:
            print("early stopping count : {}".format(cnt))
            cnt+=1
        
        if cnt==CFG.earlystop:
            print("early stopping done")
            break
            
    return best_model

In [23]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            #if model.module.binary==True: #because of DataParallel 
            #    labels=labels.float()
                
            logit = model(videos)
            
            #loss = criterion(logit, labels.reshape(-1,1))
            loss = criterion(logit, labels)
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist() 

            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

## Run!!

In [24]:
model_crush = BaseModel(num_classes=2, fc_type='shallow')
model_ego = BaseModel(num_classes=2, fc_type='shallow')
model_weather = BaseModel(num_classes=3, fc_type='shallow', binary=False)
model_timing = BaseModel(num_classes=2, fc_type='shallow')
models=[model_crush, model_ego, model_weather, model_timing]
name=["crush", "ego", "weather", "timing"]
#i=0
for i in range(4):
    model=models[i]
    model.eval()
    optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG.lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=0.00001)
    if i == 2: #weather
        criterion=nn.CrossEntropyLoss
        print("mutli-classification")
    else:
        criterion=nn.CrossEntropyLoss #여기 튜닝 고민 해봐야 할듯
        print("binary-classification")
    infer_model = train(model, criterion, optimizer, train_loaders[i], val_loaders[i], scheduler, device)
    torch.save(infer_model.state_dict(), './weights/r3d_per_label_{}.pt'.format(name[i]))

    del infer_model

binary-classification
Let's use 2 GPUs!


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.19196] Val Loss : [0.05652] Val F1 : [0.99176]


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.09361] Val Loss : [0.13842] Val F1 : [0.95568]
early stopping count : 0


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.05819] Val Loss : [0.01929] Val F1 : [0.99383]


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.01558] Val Loss : [0.00753] Val F1 : [0.99590]


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.01562] Val Loss : [0.01916] Val F1 : [0.99383]
early stopping count : 0


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.05835] Val Loss : [0.08029] Val F1 : [0.97950]
early stopping count : 1


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.03728] Val Loss : [0.02001] Val F1 : [0.99589]
early stopping count : 2
early stopping done
binary-classification
Let's use 2 GPUs!


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.53393] Val Loss : [0.36284] Val F1 : [0.85501]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.29309] Val Loss : [0.22317] Val F1 : [0.91607]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.16848] Val Loss : [0.49185] Val F1 : [0.82488]
early stopping count : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.21336] Val Loss : [0.28506] Val F1 : [0.85758]
early stopping count : 1


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.13929] Val Loss : [0.71310] Val F1 : [0.69123]
early stopping count : 2
early stopping done
mutli-classification
Let's use 2 GPUs!


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.81399] Val Loss : [0.62172] Val F1 : [0.70419]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.43851] Val Loss : [0.97030] Val F1 : [0.66454]
early stopping count : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.30715] Val Loss : [2.36891] Val F1 : [0.51417]
early stopping count : 1


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.28685] Val Loss : [1.27621] Val F1 : [0.52039]
early stopping count : 2
early stopping done
binary-classification
Let's use 2 GPUs!


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.18099] Val Loss : [0.18411] Val F1 : [0.92541]


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.10550] Val Loss : [0.15271] Val F1 : [0.91311]
early stopping count : 0


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.04727] Val Loss : [0.12847] Val F1 : [0.88598]
early stopping count : 1


  0%|          | 0/46 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.02318] Val Loss : [0.11730] Val F1 : [0.91456]
early stopping count : 2
early stopping done


## Inference

In [25]:
test = pd.read_csv('./test.csv')

In [26]:
test_dataset = CustomDataset(test['video_path'].values, None)
test_loader = DataLoader(test_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=0)

In [27]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            
            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [30]:
path='./weights/r3d_per_label_{}.pt'.format(name[0])
torch.load(path)
#torch.load('./weights/r3d_per_label_{}.pt')

OrderedDict([('module.backbone.stem.0.weight',
              tensor([[[[[-3.1073e-02, -4.3038e-02, -4.3884e-02,  ..., -5.4068e-02,
                          -4.1210e-02, -4.1756e-02],
                         [-2.1801e-02, -3.3765e-02, -3.4632e-02,  ..., -4.5462e-02,
                          -3.7791e-02, -3.6855e-02],
                         [-1.6407e-02, -2.8327e-02, -2.7515e-02,  ..., -3.8010e-02,
                          -3.2247e-02, -2.9739e-02],
                         ...,
                         [ 2.5978e-02,  2.1443e-02,  2.8154e-02,  ...,  2.8325e-02,
                           2.3443e-02,  1.9566e-02],
                         [ 3.4389e-02,  3.2103e-02,  4.2398e-02,  ...,  4.4478e-02,
                           3.7176e-02,  3.3994e-02],
                         [ 3.9814e-02,  3.5487e-02,  4.0729e-02,  ...,  4.8770e-02,
                           4.2822e-02,  4.3097e-02]],
              
                        [[-5.0850e-02, -7.1842e-02, -7.2626e-02,  ..., -8.0580e-02,
 

In [33]:
i=0
from collections import OrderedDict

model_crush = BaseModel(num_classes=2, fc_type='shallow')
model_ego = BaseModel(num_classes=2, fc_type='shallow')
model_weather = BaseModel(num_classes=3, fc_type='shallow')
model_timing = BaseModel(num_classes=2, fc_type='shallow')
models=[model_crush, model_ego, model_weather, model_timing]
pred_tot=[]

for model in models:
    path='./weights/r3d_per_label_{}.pt'.format(name[i])
    state_dict=torch.load(path)
    keys = state_dict.keys()

    values = state_dict.values()
    
    new_keys = []
    
    for key in keys:
        new_key = key[7:]    # remove the 'module.'
        new_keys.append(new_key)
    
    new_dict = OrderedDict(list(zip(new_keys, values)))
    
    model.load_state_dict(new_dict)
    preds = inference(model, test_loader, device)
    pred_tot.append(preds)
    i+=1
    del model

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

## Submission

In [34]:
submit = pd.read_csv('./sample_submission.csv')

In [42]:
submit['crush'] = pred_tot[0]
submit['ego'] = pred_tot[1]
submit['weather'] = pred_tot[2]
submit['timing'] = pred_tot[3]
submit['label']=-1
submit.head()

Unnamed: 0,sample_id,label,crush,ego,weather,timing
0,TEST_0000,-1,0,0,2,1
1,TEST_0001,-1,0,1,2,1
2,TEST_0002,-1,0,1,2,0
3,TEST_0003,-1,0,1,2,1
4,TEST_0004,-1,0,0,2,0


In [47]:
#label ensemble
for _, row in submit.iterrows():
    if row['crush']==0: # 0
        row['label']=0 
    else:
        if row['ego']==0: # 1~6
            if row['weather']==0: #1,2
                if row['timing']==0:
                    submit[row['label']]=1
                else:
                    submit[row['label']]=2
                    
            elif row['weather']==1:# 3,4
                if row['timing']==0:
                    row['label']=3
                else:
                    row['label']=4
                
                if row['timing']==0:# 5,6
                    submit[row['label']]=5
                else:
                    submit[row['label']]=6

        else: # 7~12
            if row['weather']==0: #7,8
                if row['timing']==0:
                    submit[row['label']]=7
                else:
                    submit[row['label']]=8
                    
            elif row['weather']==1:# 9,10
                if row['timing']==0:
                    submit[row['label']]=9
                else:
                    submit[row['label']]=10
                
                if row['timing']==0:# 11,12
                    submit[row['label']]=11
                else:
                    submit[row['label']]=12

#라벨 추가했으니 필요없는 열 이제 삭제
submit = submit.drop(['crush'],axis=1)
submit = submit.drop(['ego'],axis=1)
submit = submit.drop(['weather'],axis=1)
submit = submit.drop(['timing'],axis=1)

KeyError: 'crush'

In [45]:
submit.to_csv('./r3d_labelEnsemble_submit.csv', index=False)

In [46]:
submit

Unnamed: 0,sample_id,label
0,TEST_0000,-1
1,TEST_0001,-1
2,TEST_0002,-1
3,TEST_0003,-1
4,TEST_0004,-1
...,...,...
1795,TEST_1795,-1
1796,TEST_1796,-1
1797,TEST_1797,-1
1798,TEST_1798,-1
