In [7]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore') 

In [8]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [9]:
def metric(y_pred, y_true):
    y_pred = y_pred > 0
    score = f1_score(y_true, y_pred, average='macro')
    return score

In [10]:
!mkdir checkpoints

mkdir: cannot create directory ‘checkpoints’: File exists


In [11]:
CFG = {
    'VIDEO_LENGTH':50, # 10프레임 * 5초
    'IMG_SIZE':128,
    'EPOCHS':10,
    'LEARNING_RATE':0.001,
    'BATCH_SIZE':2,
    'SEED':41
}

In [12]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [13]:
df = pd.read_csv('/kaggle/input/dacon-car-crash/train.csv')

In [14]:
df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./train/TRAIN_0000.mp4,7
1,TRAIN_0001,./train/TRAIN_0001.mp4,7
2,TRAIN_0002,./train/TRAIN_0002.mp4,0
3,TRAIN_0003,./train/TRAIN_0003.mp4,0
4,TRAIN_0004,./train/TRAIN_0004.mp4,1
...,...,...,...
2693,TRAIN_2693,./train/TRAIN_2693.mp4,3
2694,TRAIN_2694,./train/TRAIN_2694.mp4,5
2695,TRAIN_2695,./train/TRAIN_2695.mp4,0
2696,TRAIN_2696,./train/TRAIN_2696.mp4,0


In [15]:
#1~6 ego 내차
#7~12 ego 남차
df['timing']=0

In [16]:
df = df[df['label'] != 0] #0 제거

In [17]:
df['timing'] = df['label'].apply(lambda x: 0 if x in [1,3,5,7,9,11] else 1)

In [18]:
df = df.drop('label',axis=1)

In [19]:
df = df.rename(columns={'timing': 'label'})

In [20]:
label_count = df['label'].value_counts()

print(label_count)

0    808
1    107
Name: label, dtype: int64


In [21]:
df['video_path'] = df['video_path'].str.replace('./train/', '/kaggle/input/dacon-car-crash/train/')


In [22]:
df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,/kaggle/input/dacon-car-crash/train/TRAIN_0000...,0
1,TRAIN_0001,/kaggle/input/dacon-car-crash/train/TRAIN_0001...,0
4,TRAIN_0004,/kaggle/input/dacon-car-crash/train/TRAIN_0004...,0
6,TRAIN_0006,/kaggle/input/dacon-car-crash/train/TRAIN_0006...,0
7,TRAIN_0007,/kaggle/input/dacon-car-crash/train/TRAIN_0007...,0
...,...,...,...
2685,TRAIN_2685,/kaggle/input/dacon-car-crash/train/TRAIN_2685...,1
2689,TRAIN_2689,/kaggle/input/dacon-car-crash/train/TRAIN_2689...,0
2692,TRAIN_2692,/kaggle/input/dacon-car-crash/train/TRAIN_2692...,0
2693,TRAIN_2693,/kaggle/input/dacon-car-crash/train/TRAIN_2693...,0


In [23]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.1, random_state=CFG['SEED'])

In [24]:
len(train),len(val)

(823, 92)

In [25]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list, tfms):
        self.video_path_list = video_path_list
        self.label_list = label_list
        self.tfms = tfms
        
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            frames.append(img)
        frames = aug_video(frames, tfms=self.tfms)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)
    
def aug_video(vid, tfms):
    seed = random.randint(0,99999)
    aug_vid = []
    for x in vid:
        random.seed(seed)
        aug_vid.append((tfms(image = np.asarray(x)))['image'])
    return torch.from_numpy(np.stack(aug_vid))

tfms = A.Compose([
            A.Resize(width=CFG['IMG_SIZE'], height=CFG['IMG_SIZE']),
            A.HorizontalFlip(p=0.5),
            A.Rotate(limit=30, p=0.5),
            A.Normalize()
            ], p=1)

In [26]:
train_dataset = CustomDataset(train['video_path'].values, train['label'].values,tfms=tfms)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['video_path'].values, val['label'].values, tfms=tfms)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [37]:
import torch.nn as nn
from torchvision.models.video import r3d_18

class ResNet3DModel(nn.Module):
    def __init__(self, num_classes=1):
        super(ResNet3DModel, self).__init__()
        self.feature_extract = r3d_18(pretrained=True)
        num_features = self.feature_extract.fc.in_features
        self.feature_extract.fc = nn.Identity()
        self.classifier = nn.Linear(num_features, num_classes)
        
    def forward(self, x):
        batch_size = x.size(0)
        x = self.feature_extract(x)
        x = x.view(batch_size, -1)
        x = self.classifier(x)
        return x

In [38]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.BCEWithLogitsLoss().to(device) # BCEWithLogitsLoss로 변경
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            labels = labels.reshape(-1, 1)
            labels = labels.to(device, dtype=torch.float)
            optimizer.zero_grad()
            
            output = model(videos)
            loss = criterion(output.squeeze(), labels.squeeze()) # squeeze()를 이용하여 차원을 맞춰줍니다.
            loss = loss.mean()
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model


In [39]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    val_score = []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device).float() # float로 변경
            output = model(videos)
            
            loss = criterion(output.squeeze(), labels.squeeze()) # squeeze()를 이용하여 차원을 맞춰줍니다.
            
            val_loss.append(loss.item())
            score = metric(output.cpu().numpy(), labels.cpu().numpy())
            val_score.append(score)
    
    return np.mean(val_loss), np.mean(val_score)

In [None]:
model = ResNet3DModel()
model.to(device)
optimizer = torch.optim.SGD(params=model.parameters(), lr=CFG["LEARNING_RATE"], momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/412 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.27641] Val Loss : [0.13342] Val F1 : [0.95652]


  0%|          | 0/412 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.19450] Val Loss : [0.14942] Val F1 : [0.97101]


  0%|          | 0/412 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.10921] Val Loss : [0.21330] Val F1 : [0.95652]


  0%|          | 0/412 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.17076] Val Loss : [0.18305] Val F1 : [0.92754]


  0%|          | 0/412 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.12437] Val Loss : [0.15782] Val F1 : [0.97101]
Epoch 00005: reducing learning rate of group 0 to 5.0000e-04.


  0%|          | 0/412 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.11822] Val Loss : [0.16549] Val F1 : [0.97101]


  0%|          | 0/412 [00:00<?, ?it/s]

  0%|          | 0/46 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.08393] Val Loss : [0.18117] Val F1 : [0.94203]


  0%|          | 0/412 [00:00<?, ?it/s]

In [None]:
import torch

# 모델을 저장합니다.
torch.save(infer_model.state_dict(), '/kaggle/working/weather.pth')

In [None]:
import torch

# 모델을 정의합니다.
infer_model = ResNet3DModel()

# 저장된 모델의 가중치를 로드합니다.
infer_model.load_state_dict(torch.load('/kaggle/working/weather.pth'))