In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [18]:
CFG = {
    'VIDEO_LENGTH':50, # 10프레임 * 5초
    'IMG_SIZE':128,
    'EPOCHS':20,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':4,
    'SEED':41
}

In [19]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [20]:
df = pd.read_csv('/kaggle/input/weathersss/weather_datas_final.csv')

In [21]:
df["label"] = df["label"].map({1:0, 2:1, 3:2})

In [22]:
df

Unnamed: 0.1,Unnamed: 0,sample_id,video_path,label
0,0,TRAIN_0000,/kaggle/input/dacon-car-crash/train/TRAIN_0000...,0
1,1,TRAIN_0001,/kaggle/input/dacon-car-crash/train/TRAIN_0001...,0
2,2,TRAIN_0002,/kaggle/input/dacon-car-crash/train/TRAIN_0002...,0
3,3,TRAIN_0003,/kaggle/input/dacon-car-crash/train/TRAIN_0003...,0
4,4,TRAIN_0004,/kaggle/input/dacon-car-crash/train/TRAIN_0004...,0
...,...,...,...,...
2693,2693,TRAIN_2693,/kaggle/input/dacon-car-crash/train/TRAIN_2693...,1
2694,2694,TRAIN_2694,/kaggle/input/dacon-car-crash/train/TRAIN_2694...,2
2695,2695,TRAIN_2695,/kaggle/input/dacon-car-crash/train/TRAIN_2695...,0
2696,2696,TRAIN_2696,/kaggle/input/dacon-car-crash/train/TRAIN_2696...,2


In [23]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CFG['SEED'])

In [24]:
len(train),len(val)

(2158, 540)

In [25]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list):
        self.video_path_list = video_path_list
        self.label_list = label_list
        
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
            img = img / 255.
            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)

In [26]:
train_dataset = CustomDataset(train['video_path'].values, train['label'].values)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['video_path'].values, val['label'].values)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

array(['./train/TRAIN_1629.mp4', './train/TRAIN_0920.mp4',
       './train/TRAIN_0768.mp4', ..., './train/TRAIN_1105.mp4',
       './train/TRAIN_0932.mp4', './train/TRAIN_1986.mp4'], dtype=object)

In [27]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=3):
        super(BaseModel, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Conv3d(3, 8, (1, 3, 3)),
            nn.ReLU(),
            nn.BatchNorm3d(8),
            nn.MaxPool3d(2),
            nn.Conv3d(8, 32, (1, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(32),
            nn.MaxPool3d(2),
            nn.Conv3d(32, 64, (1, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.MaxPool3d(2),
            nn.Conv3d(64, 128, (1, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(128),
            nn.MaxPool3d((3, 7, 7)),
        )
        self.classifier = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        batch_size = x.size(0)
        x = self.feature_extract(x)
        x = x.view(batch_size, -1)
        x = self.classifier(x)
        return x

In [28]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(videos)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

In [29]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            logit = model(videos)
            
            loss = criterion(logit, labels)
            
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

In [None]:
model = BaseModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.76861] Val Loss : [0.67770] Val F1 : [0.39789]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.63775] Val Loss : [0.73856] Val F1 : [0.35709]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.54848] Val Loss : [0.77695] Val F1 : [0.41790]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.48232] Val Loss : [0.57841] Val F1 : [0.54592]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.41628] Val Loss : [0.82207] Val F1 : [0.53534]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.36141] Val Loss : [0.69368] Val F1 : [0.47898]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.29626] Val Loss : [0.75934] Val F1 : [0.59257]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.25072] Val Loss : [0.64501] Val F1 : [0.53817]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.20684] Val Loss : [0.68800] Val F1 : [0.61065]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.20144] Val Loss : [0.81441] Val F1 : [0.50784]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.18873] Val Loss : [0.73229] Val F1 : [0.59104]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.15274] Val Loss : [0.91716] Val F1 : [0.49640]
Epoch 00012: reducing learning rate of group 0 to 1.5000e-04.


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.06866] Val Loss : [0.78539] Val F1 : [0.58445]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.03932] Val Loss : [0.81718] Val F1 : [0.56599]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.03805] Val Loss : [0.81327] Val F1 : [0.61843]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.03776] Val Loss : [0.86563] Val F1 : [0.59458]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.03582] Val Loss : [0.88651] Val F1 : [0.57894]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.02746] Val Loss : [0.85848] Val F1 : [0.57643]
Epoch 00018: reducing learning rate of group 0 to 7.5000e-05.


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.01791] Val Loss : [0.91268] Val F1 : [0.58671]


  0%|          | 0/540 [00:00<?, ?it/s]

In [None]:
def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
            img = img / 255
            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)

In [59]:
path = '/content/data/test/TEST_0000.mp4'
frames = []
cap = cv2.VideoCapture(path)
img = cv2.imread()
if img is not None and img.shape[0] != 0 and img.shape[1] != 0:
    img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
    img = img / 255.
else:
    print("Error: Failed to load or resize image.")

TypeError: ignored

In [92]:
import cv2

# 비디오 로드
cap = cv2.VideoCapture('/content/data/train/TRAIN_0000.mp4')

# 첫 프레임 가져오기
ret, frame = cap.read()

# 프레임 출력
cv2.imshow('First Frame', frame)
cv2.waitKey(0)
cv2.destroyAllWindows()

DisabledFunctionError: ignored

In [58]:
img