## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from ipywidgets import interact
from matplotlib import pyplot as plt

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import sklearn
from sklearn.ensemble import VotingClassifier

import timm
from torchvision.models import video
import torchvision

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [3]:
class CFG:
    model_name= "r3d_18"
    n_folds = 5
    n_classes = 13
    video_length=50
    img_size=128
    epochs=50
    lr=3e-4
    batch_size=16
    seed=41
    earlystop=10
    fold=5
        

## Fixed RandomSeed

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG.seed) # Seed 고정

## Data Load

In [5]:
df = pd.read_csv('./train_detail_classified.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,sample_id,video_path,label,crush,ego,weather,timing
0,0,TRAIN_0000,./train/TRAIN_0000.mp4,7,1,0,0,0
1,1,TRAIN_0001,./train/TRAIN_0001.mp4,7,1,0,0,0
2,2,TRAIN_0002,./train/TRAIN_0002.mp4,0,0,-1,-1,-1
3,3,TRAIN_0003,./train/TRAIN_0003.mp4,0,0,-1,-1,-1
4,4,TRAIN_0004,./train/TRAIN_0004.mp4,1,1,1,0,0


## stratified K fold

In [6]:
#-1 라벨인 데이터 버리기
#df_crush=df[df['crush']!=1].reset_index(drop=True)
df_ego=df[df['ego']!=-1].reset_index(drop=True)
df_weather=df[df['weather']!=-1].reset_index(drop=True)
df_timing=df[df['timing']!=-1].reset_index(drop=True)
dfs=[(df, df['crush']),(df_ego, df_ego['ego']), (df_weather,df_weather['weather']), (df_timing,df_timing['timing'])]

In [7]:
skf = sklearn.model_selection.StratifiedKFold(n_splits=CFG.fold, shuffle=True, random_state=CFG.seed)
#라벨 분포에 맞춰서 fold, val 정보 넣기
for data in dfs:
    data[0]["fold"]=-1
    for k, (train_idx, val_idx) in enumerate(skf.split(*data)):
        data[0].loc[val_idx, 'fold']=k

In [16]:
df_weather[df_weather['fold']==1]['weather'].value_counts(dropna=False).sort_index()

0    143
1     26
2     14
Name: weather, dtype: int64

In [17]:
df_weather[df_weather['fold']==0]['weather'].value_counts(dropna=False).sort_index()

0    144
1     25
2     14
Name: weather, dtype: int64

In [18]:
df_weather[df_weather['fold']==2]['weather'].value_counts(dropna=False).sort_index()

0    143
1     26
2     14
Name: weather, dtype: int64

In [19]:
df_weather[df_weather['fold']==3]['weather'].value_counts(dropna=False).sort_index()

0    143
1     26
2     14
Name: weather, dtype: int64

In [20]:
df_weather[df_weather['fold']==4]['weather'].value_counts(dropna=False).sort_index()

0    143
1     26
2     14
Name: weather, dtype: int64

## CustomDataset

In [9]:
#https://dacon.io/en/competitions/official/236064/codeshare/7572?page=1&dtype=recent#
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.label_list = label_list
        self.transforms = transforms
        self.img_path_list = img_path_list
        
    def __getitem__(self, index):        
        images = self.get_frames(self.img_path_list[index])
                        
        if self.transforms is not None:
            res = self.transforms(**images)
            #print(images)
            images = torch.zeros((len(images), 3, CFG.img_size, CFG.img_size))
            #print(images.shape)
            images[0, :, :, :] = torch.Tensor(res["image"])
            for i in range(1, len(images)):
                images[i, :, :, :] = res[f"image{i}"]
        
        images=images.permute(1,0,2,3)
        
        if self.label_list is not None:
            label = self.label_list[index]
        
            return images, label
        else:
            return images

    def __len__(self):
        return len(self.img_path_list) 
    
    def get_frames(self, path):
        cap = cv2.VideoCapture(path)
        frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        imgs = []        
        for fidx in range(frames):
            _, img = cap.read()            
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            imgs.append(img)
        
        ret = {f"image{i}":imgs[i] for i in range(1, len(imgs))}
        ret['image'] = imgs[0]

        return ret

In [10]:
weather_transforms = A.Compose([
    #A.CenterCrop(480,854,p=1.0),
    A.Resize(height=CFG.img_size, width=CFG.img_size),
    A.Superpixels(p=0.3),
    #A.HorizontalFlip(0.5),
    A.Normalize(mean=0.0, std=1.0),
    ToTensorV2()
], p=1, additional_targets={f"image{i}":"image" for i in range(1, 50)})

other_transforms = A.Compose([
    #A.CenterCrop(480,854,p=1.0),
    A.Resize(height=CFG.img_size, width=CFG.img_size),
    #A.HorizontalFlip(0.5),
    A.Normalize(mean=0.0, std=1.0),
    ToTensorV2()
], p=1, additional_targets={f"image{i}":"image" for i in range(1, 50)})

test_transforms= A.Compose([
    #A.CenterCrop(480,854,p=1.0),
    A.Resize(height=CFG.img_size, width=CFG.img_size),
    #A.HorizontalFlip(0.5),
    A.Normalize(mean=0.0, std=1.0),
    ToTensorV2()
], p=1, additional_targets={f"image{i}":"image" for i in range(1, 50)})


In [11]:
#get data loader
def load_dataset(df,name,k,transforms= other_transforms):
    
    train_df = df[df["fold"]!=k]
    val_df = df[df["fold"]==k]
    
    train_dataset = CustomDataset(df['video_path'].values, df[name].values,transforms=other_transforms)
    train_loader = DataLoader(train_dataset, batch_size = CFG.batch_size, shuffle=True, num_workers=2)

    val_dataset = CustomDataset(df['video_path'].values, df[name].values,transforms=test_transforms)
    val_loader = DataLoader(val_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=2)
    
    return train_loader, val_loader

## Model Define
- 동일 모델로 순서대로 4개 사용

In [18]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=CFG.n_classes, fc_type='shallow', binary=True):
        super(BaseModel, self).__init__()
        self.fc_type=fc_type
        self.num_classes=num_classes
        
        #get backbone
        #self.backbone = r2plus1d_18(pretrained=True)
        self.backbone=getattr(torchvision.models.video,CFG.model_name)(pretrained=True)
        self.backbone.fc=self.get_fc()
        self.binary=binary
        
    def get_fc(self):
        if self.fc_type == 'deep':
            fc = nn.Sequential(nn.Linear(self.backbone.fc.in_features, self.backbone.fc.in_features//2),
                                        nn.BatchNorm1d(self.backbone.fc.in_features//2,  momentum=0.1),
                                        nn.ReLU(),
                                        nn.Linear(self.backbone.fc.in_features//2, self.num_classes)
                                        )
    
        elif self.fc_type == 'shallow':
            fc = nn.Linear(self.backbone.fc.in_features, self.num_classes)
        else:
            raise ValueError(f"Wrong fc-type input {self.fc_type}")
        return fc
    
    def forward(self, x):
        x = self.backbone(x)
            
        return x

### check model in-out

In [19]:
#from torchsummary import summary
#model=BaseModel(fc_type='deep')
#model=model.to(device)
#summary(model, (3,50,720,1280))

## Train

In [20]:
def train_one_epoch(model, criterion, optimizer, train_loader, val_loader, device):
    train_loss = []
    model.train()
    for videos, labels in tqdm(iter(train_loader)):
        videos = videos.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        output = model(videos)
        
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        
    return train_loss # score 변화없으면 잘못된거니까 바꾸기 -> 업데이트를 안하는거겠지 옵티마이저 반환안해줘서

def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            #if model.module.binary==True: #because of DataParallel 
            #    labels=labels.float()
                
            logit = model(videos)
            
            #loss = criterion(logit, labels.reshape(-1,1))
            loss = criterion(logit, labels)
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist() 

            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

In [21]:
def train(model, criterion, optimizer, train_loader, val_loader, scheduler, device, is_parallel=False):
    model.to(device)
    
    if torch.cuda.device_count() > 1:
        if is_parallel==True:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            model = nn.DataParallel(model)
    
    criterion=criterion().to(device)
    best_val_score = 0
    best_model = None
    cnt=0
    
    for epoch in range(1, CFG.epochs+1):
        train_loss=train_one_epoch(model, criterion, optimizer, train_loader, val_loader, device) #train 
        _val_loss, _val_score = validation(model, criterion, val_loader, device) #validation
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
            
            cnt=0
        else:
            print("early stopping count : {}".format(cnt+1))
            cnt+=1
        
        if best_val_score>=0.999:
            print("already on best score")
            break
        
        if cnt==CFG.earlystop:
            print("early stopping done")
            break
        
            
    return best_model

## Run!!

In [22]:
def run(model,df, name:str, transforms, device):
    for k in range(CFG.fold):
        
        #data load for each fold
        train_loader, val_loader = load_dataset(df,name, k , transforms= transforms)
    
        model.eval()
        optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG.lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=0.00001)
        criterion=nn.CrossEntropyLoss # 추후 수정 예정
        
        print("{} model run".format(name))
        print("{}th model run".format(k+1))
        print("_"*100)
        
        infer_model = train(model, criterion, optimizer, train_loader, val_loader, scheduler, device)
        torch.save(infer_model.state_dict(), './weights/r3d_per_label_{}_Transform_{}fold.pt'.format(name, k))
    
        del infer_model

In [23]:
#total runnning -> too slow
model_crush = BaseModel(num_classes=2, fc_type='shallow')
model_ego = BaseModel(num_classes=2, fc_type='shallow')
model_weather = BaseModel(num_classes=3, fc_type='shallow', binary=False)
model_timing = BaseModel(num_classes=2, fc_type='shallow')

models=[model_crush, model_ego, model_weather, model_timing]
dfs=[(df, df['crush']),(df_ego, df_ego['ego']), (df_weather,df_weather['weather']), (df_timing,df_timing['timing'])]
names=["crush", "ego", "weather", "timing"]


run(models[2],dfs[2][0], names[2], transforms= weather_transforms, device=device)

binary-classification
weather model run
1th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.58279] Val Loss : [0.61725] Val F1 : [0.53057]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.34540] Val Loss : [0.34872] Val F1 : [0.59843]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.29044] Val Loss : [0.20530] Val F1 : [0.81755]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.13954] Val Loss : [0.08453] Val F1 : [0.95811]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.09492] Val Loss : [0.10118] Val F1 : [0.91607]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.06775] Val Loss : [0.04542] Val F1 : [0.97613]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.05102] Val Loss : [0.08973] Val F1 : [0.95148]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.06923] Val Loss : [0.05252] Val F1 : [0.97095]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.11745] Val Loss : [0.05042] Val F1 : [0.97282]
early stopping count : 3


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.08513] Val Loss : [0.05074] Val F1 : [0.95979]
early stopping count : 4


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.05946] Val Loss : [0.01804] Val F1 : [0.98343]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.01929] Val Loss : [0.00487] Val F1 : [0.99484]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.00538] Val Loss : [0.00098] Val F1 : [1.00000]
already on best score
binary-classification
weather model run
2th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.06044] Val Loss : [0.11564] Val F1 : [0.89261]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.09596] Val Loss : [0.08426] Val F1 : [0.94785]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.04952] Val Loss : [0.04266] Val F1 : [0.98143]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.05594] Val Loss : [0.06686] Val F1 : [0.96421]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.04122] Val Loss : [0.03352] Val F1 : [0.97599]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.02418] Val Loss : [0.01407] Val F1 : [0.99009]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.00894] Val Loss : [0.00434] Val F1 : [0.99737]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.05159] Val Loss : [0.12671] Val F1 : [0.94134]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.20446] Val Loss : [0.45512] Val F1 : [0.77646]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.08220] Val Loss : [0.05600] Val F1 : [0.97579]
early stopping count : 3


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.01768] Val Loss : [0.00210] Val F1 : [1.00000]
already on best score
binary-classification
weather model run
3th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.02582] Val Loss : [0.29853] Val F1 : [0.83844]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.07343] Val Loss : [0.04301] Val F1 : [0.97278]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.04846] Val Loss : [0.03676] Val F1 : [0.97118]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.05281] Val Loss : [0.15218] Val F1 : [0.89805]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.07648] Val Loss : [0.24677] Val F1 : [0.86838]
early stopping count : 3


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.04934] Val Loss : [0.00123] Val F1 : [1.00000]
already on best score
binary-classification
weather model run
4th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.01922] Val Loss : [0.01450] Val F1 : [0.98854]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.00862] Val Loss : [0.00831] Val F1 : [0.99697]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.04768] Val Loss : [0.13862] Val F1 : [0.92266]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.14793] Val Loss : [0.06622] Val F1 : [0.95331]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.10762] Val Loss : [0.03324] Val F1 : [0.97776]
early stopping count : 3


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.02045] Val Loss : [0.00211] Val F1 : [1.00000]
already on best score
binary-classification
weather model run
5th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.01524] Val Loss : [0.00357] Val F1 : [1.00000]
already on best score


In [None]:
#run(models[0],dfs[0][0], names[0], transforms= other_transforms, device=device)

In [90]:
#ego
run(models[1],dfs[1][0], names[1], transforms= other_transforms, device=device)

binary-classification
ego model run
1th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.26928] Val Loss : [0.70064] Val F1 : [0.53770]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.09165] Val Loss : [0.02640] Val F1 : [0.99560]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.08250] Val Loss : [0.10256] Val F1 : [0.96170]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.10791] Val Loss : [0.05307] Val F1 : [0.98572]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.10451] Val Loss : [0.02664] Val F1 : [0.99890]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.01794] Val Loss : [0.00278] Val F1 : [0.99890]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.01417] Val Loss : [0.00302] Val F1 : [1.00000]
already on best score
binary-classification
ego model run
2th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.02568] Val Loss : [0.05253] Val F1 : [0.98355]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.07779] Val Loss : [0.03378] Val F1 : [0.98901]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.04679] Val Loss : [0.01636] Val F1 : [0.99670]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.01589] Val Loss : [0.01018] Val F1 : [0.99670]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.01257] Val Loss : [0.00471] Val F1 : [0.99890]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.05034] Val Loss : [0.02858] Val F1 : [0.98680]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.05512] Val Loss : [0.00925] Val F1 : [0.99780]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.01970] Val Loss : [0.01088] Val F1 : [0.99780]
early stopping count : 3


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.12511] Val Loss : [0.39532] Val F1 : [0.89164]
early stopping count : 4


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.06711] Val Loss : [0.02387] Val F1 : [0.99450]
early stopping count : 5


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.01097] Val Loss : [0.00132] Val F1 : [1.00000]
already on best score
binary-classification
ego model run
3th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.01227] Val Loss : [0.01475] Val F1 : [0.99451]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.05434] Val Loss : [0.09287] Val F1 : [0.97462]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.03928] Val Loss : [0.00307] Val F1 : [1.00000]
already on best score
binary-classification
ego model run
4th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.05911] Val Loss : [0.02194] Val F1 : [0.99340]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.03345] Val Loss : [0.05680] Val F1 : [0.98016]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.06377] Val Loss : [0.00181] Val F1 : [1.00000]
already on best score
binary-classification
ego model run
5th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.01354] Val Loss : [0.00208] Val F1 : [0.99890]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.00288] Val Loss : [0.00175] Val F1 : [1.00000]
already on best score


In [91]:
#timing
run(models[3],dfs[3][0], names[3], transforms= other_transforms, device=device)

binary-classification
timing model run
1th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.11684] Val Loss : [0.57041] Val F1 : [0.62336]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.05125] Val Loss : [0.10456] Val F1 : [0.88543]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.07909] Val Loss : [0.05953] Val F1 : [0.96681]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.03121] Val Loss : [0.04803] Val F1 : [0.95771]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.01793] Val Loss : [0.04154] Val F1 : [0.96493]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.00554] Val Loss : [0.00071] Val F1 : [1.00000]
already on best score
binary-classification
timing model run
2th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.03299] Val Loss : [0.05299] Val F1 : [0.95557]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.02927] Val Loss : [0.00590] Val F1 : [0.99734]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.01860] Val Loss : [0.00238] Val F1 : [1.00000]
already on best score
binary-classification
timing model run
3th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.03063] Val Loss : [0.03776] Val F1 : [0.96948]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.03622] Val Loss : [0.02709] Val F1 : [0.98924]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.05973] Val Loss : [0.02593] Val F1 : [0.97866]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.03603] Val Loss : [0.02095] Val F1 : [0.98703]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.01590] Val Loss : [0.00321] Val F1 : [1.00000]
already on best score
binary-classification
timing model run
4th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.02927] Val Loss : [0.01966] Val F1 : [0.98399]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.04798] Val Loss : [0.01256] Val F1 : [0.99736]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.02753] Val Loss : [0.01300] Val F1 : [0.99736]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.00465] Val Loss : [0.00067] Val F1 : [1.00000]
already on best score
binary-classification
timing model run
5th model run
____________________________________________________________________________________________________


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.01000] Val Loss : [0.00428] Val F1 : [0.99734]


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.02946] Val Loss : [0.12183] Val F1 : [0.92278]
early stopping count : 1


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.04365] Val Loss : [0.01196] Val F1 : [0.98933]
early stopping count : 2


  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.00805] Val Loss : [0.00069] Val F1 : [1.00000]
already on best score


## Inference

In [24]:
test = pd.read_csv('./test.csv')

In [25]:
test_dataset = CustomDataset(test['video_path'].values, None, transforms=test_transforms)
test_loader = DataLoader(test_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=0)

In [26]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            
            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [100]:
#path: ./weights/r3d_per_label_{}_Transform
def run_infer(model, name:str, path:str, fold=True, is_parallel=False):
    preds_each=[]
    
    for k in range(CFG.fold):
        if fold==True:
            re_path=path[:-3]+'_{}fold.pt'.format(k) 
        state_dict=torch.load(re_path)
        
        if is_parallel==True:
            keys = state_dict.keys() 
            values = state_dict.values()
            
            new_keys = []
            
            for key in keys:  #병렬 처리 했을 경우에만
                new_key = key[7:]    # remove the 'module.'
                new_keys.append(new_key)
            
            new_dict = OrderedDict(list(zip(new_keys, values)))
            model.load_state_dict(new_dict)
            
        else:
            model.load_state_dict(state_dict)
            
        print(f"{re_path} key matching successfully")
        preds = inference(model, test_loader, device)
        preds_each.append(preds)
        
        if fold!=True:
            break
            
    return preds_each

In [87]:
preds_crush=run_infer(model=models[0], name=names[0], path='./weights/r3d_per_label_crush_Transform_6fold.pt', fold=False, is_parallel=True)

./weights/r3d_per_label_crush_Transform_6fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

In [101]:
preds_ego=run_infer(model=models[1], name=names[1], path='./weights/r3d_per_label_ego_Transform.pt', fold=True, is_parallel=False)

./weights/r3d_per_label_ego_Transform_0fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

./weights/r3d_per_label_ego_Transform_1fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

./weights/r3d_per_label_ego_Transform_2fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

./weights/r3d_per_label_ego_Transform_3fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

./weights/r3d_per_label_ego_Transform_4fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

In [102]:
preds_timing=run_infer(model=models[3], name=names[3], path='./weights/r3d_per_label_timing_Transform.pt', fold=True, is_parallel=False)

./weights/r3d_per_label_timing_Transform_0fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

./weights/r3d_per_label_timing_Transform_1fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

./weights/r3d_per_label_timing_Transform_2fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

./weights/r3d_per_label_timing_Transform_3fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

./weights/r3d_per_label_timing_Transform_4fold.pt key matching successfully


  0%|          | 0/113 [00:00<?, ?it/s]

In [105]:
np.array(preds_timing).shape

(5, 1800)

In [32]:
model=models[2]
preds_each=[]
for k in range(CFG.fold):
    path='./weights/r3d_per_label_{}_Transform_{}fold.pt'.format(names[2], k)
    state_dict=torch.load(path)
    keys = state_dict.keys()

    values = state_dict.values()
    
    #new_keys = []
    
    #for key in keys:  #병렬 처리 했을 경우에만
    #    new_key = key[7:]    # remove the 'module.'
    #    new_keys.append(new_key)
    #
    #new_dict = OrderedDict(list(zip(new_keys, values)))
    
    #model.load_state_dict(new_dict)
    model.load_state_dict(state_dict)
    
    
    preds = inference(model, test_loader, device) #순서대로 4종류씩 fold번 담김
    preds_each.append(preds)
    
pred_tot[names[i]]=(preds_each)
del model

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

  0%|          | 0/113 [00:00<?, ?it/s]

## Submission

In [None]:
sample = pd.read_csv('./sample_submission.csv')

In [50]:
#날씨만 voting
from collections import Counter
from glob import glob

preds=[]
#voting
for k in range(CFG.fold):
    sample = pd.read_csv('./sample_submission.csv')
    
    sample['weather'] = preds_each[k]
    sample.to_csv(f'./ensemble/weather_fold{k}.csv', index=False)


csvs = glob('./ensemble/weather_*.csv')
    
for csv in csvs:
    f = pd.read_csv(csv)
    label = f['weather'].tolist()
    preds.append(label)
out = []
cols = list(zip(*preds))
for c in cols:
    most = Counter(c).most_common()[0][0]
    out.append(most)
ss = pd.read_csv('./sample_submission.csv')
ss['weather'] = out
ss.to_csv('vote_{}.csv'.format('weather'), index=False) 

#추론하고 결과값을 리스트에 일자로 받아온다 -> kfold라면 이걸 왕창가져온다 (어쨌든 2차원배열에 넣음)
#가져온 리스트를 돌면서 csv에 저장하고 다시 가져왔었음 -> 개 말도안댐
#가져온 리스트를 그대로 이용하는게 좋아보임

In [72]:
def voting(name, preds:list, save_each=False, save_result=False):
    vote_preds=[]
    
    for k in range(CFG.fold):
        sample = pd.read_csv('./sample_submission.csv')
        
        sample[name] = preds[k]
        if save_each==True:
            sample.to_csv(f'./ensemble/weather_fold{k}.csv', index=False)
    
    cols = list(zip(*preds))
    for c in cols:
        most = Counter(c).most_common()[0][0]
        vote_preds.append(most)
        
    ss = pd.read_csv('./sample_submission.csv')
    ss[name] = vote_preds
    if save_result==True:
        ss.to_csv('vote_{}.csv'.format(name), index=False)    
    
    return vote_preds, ss

In [106]:
_,ss_ego=voting('ego', preds_ego, False, False)
#_,ss_crush=voting('crush', preds_crush, False, False)
_,ss_timing=voting('timing', preds_timing, False, False)


In [111]:
ss_ego

Unnamed: 0,sample_id,label,ego
0,TEST_0000,0,0
1,TEST_0001,0,0
2,TEST_0002,0,1
3,TEST_0003,0,0
4,TEST_0004,0,0
...,...,...,...
1795,TEST_1795,0,0
1796,TEST_1796,0,0
1797,TEST_1797,0,0
1798,TEST_1798,0,0


In [115]:
submit = pd.read_csv('./sample_submission.csv')
submit['weather']=ss['weather']

In [117]:
submit['ego']=ss_ego['ego']
submit['crush']=preds_crush[0]
submit['timing']=ss_timing['timing']

In [118]:
submit["crush"].value_counts(dropna=False).sort_index()

0    1219
1     581
Name: crush, dtype: int64

In [119]:
submit["ego"].value_counts(dropna=False).sort_index()

0    1394
1     406
Name: ego, dtype: int64

In [120]:
submit["weather"].value_counts(dropna=False).sort_index()

0    1619
1     107
2      74
Name: weather, dtype: int64

In [121]:
submit["timing"].value_counts(dropna=False).sort_index()

0    1175
1     625
Name: timing, dtype: int64

In [122]:
#label ensemble
def label_ensemble(submit):
    for i, row in submit.iterrows():
        #print(i)
        if row['crush']==0: # 0
            submit['label'][i]=0 
        else:
            if row['ego']==1: # 1~6
                if row['weather']==0: #1,2
                    if row['timing']==0:
                        submit['label'][i]=1
                    else:
                        submit['label'][i]=2
                        
                elif row['weather']==1:# 3,4
                    if row['timing']==0:
                        submit['label'][i]=3
                    else:
                        submit['label'][i]=4
                else:
                    if row['timing']==0:# 5,6
                        submit['label'][i]=5
                    else:
                        submit['label'][i]=6
    
            else: # 7~12
                if row['weather']==0: #7,8
                    if row['timing']==0:
                        submit['label'][i]=7
                    else:
                        submit['label'][i]=8
                        
                elif row['weather']==1:# 9,10
                    if row['timing']==0:
                        submit['label'][i]=9
                    else:
                        submit['label'][i]=10
                else:
                    if row['timing']==0:# 11,12
                        submit['label'][i]=11
                    else:
                        submit['label'][i]=12
        if submit['label'][i]==-1:
            print(row['crush'], row['ego'], row['weather'], row['timing'])
    #라벨 추가했으니 필요없는 열 이제 삭제
    submit = submit.drop(['crush'],axis=1)
    submit = submit.drop(['ego'],axis=1)
    submit = submit.drop(['weather'],axis=1)
    submit = submit.drop(['timing'],axis=1)
    
    return submit

submit=label_ensemble(submit)

In [124]:
submit.to_csv('./r3d_labelEnsemble_5fold_submit.csv', index=False)

In [125]:
submit["label"].value_counts(dropna=False).sort_index()

0     1219
1      205
2       30
3       48
4        2
5       10
6        1
7      218
8       26
9       28
11      13
Name: label, dtype: int64

In [None]:
#2 7 8 없다