In [1]:
#앙상블
# pip uninstall opencv-python-headless==4.5.5.62
#pip install opencv-python-headless==4.5.2.52

In [2]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from torchsampler import ImbalancedDatasetSampler
from sklearn.model_selection import KFold,StratifiedKFold
import albumentations as A

from datetime import datetime
from sklearn.metrics import f1_score
from torch.utils.data import Dataset, DataLoader
from segmentation_models_pytorch.losses import FocalLoss
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import random
import os
import cv2
from tqdm import tqdm
from transformers import AutoModel, AutoImageProcessor, AutoConfig

import warnings

warnings.filterwarnings("ignore")


In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [4]:
import pandas as pd
all_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/test.csv')

In [5]:
all_df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./train/TRAIN_0000.mp4,7
1,TRAIN_0001,./train/TRAIN_0001.mp4,7
2,TRAIN_0002,./train/TRAIN_0002.mp4,0
3,TRAIN_0003,./train/TRAIN_0003.mp4,0
4,TRAIN_0004,./train/TRAIN_0004.mp4,1
...,...,...,...
2693,TRAIN_2693,./train/TRAIN_2693.mp4,3
2694,TRAIN_2694,./train/TRAIN_2694.mp4,5
2695,TRAIN_2695,./train/TRAIN_2695.mp4,0
2696,TRAIN_2696,./train/TRAIN_2696.mp4,0


In [6]:

CFG = {
    'VIDEO_LENGTH':10, 
    'IMG_SIZE':240,
    'EPOCHS':2,
    'LEARNING_RATE':1e-5,
    'BATCH_SIZE':2,
    'SEED':2023,
    'SPLIT':5,
    'ROOT':'./data',
    'MODEL':'MCG-NJU/videomae-base-finetuned-ssv2',
    'LOAD_WEIGHT':False,
    'LOAD_WEIGHT_NAME' :'a',
    'VAL_SCORE_THRES' : 0.98,
    'NUM_ASB':1
}

In [7]:
skf = StratifiedKFold(n_splits = CFG['SPLIT'])

In [8]:
all_df['video_path'] = all_df['video_path'].apply(lambda x:CFG['ROOT']+x[1:])
test_df['video_path'] = test_df['video_path'].apply(lambda x:CFG['ROOT']+x[1:])

crash_df = all_df.copy()
crash_df['label'] = crash_df['label'].apply(lambda x: 1 if x != 0 else 0)


In [9]:
ego_df = all_df.copy()
idx = ego_df[ego_df['label']==0].index
ego_df.drop(idx,inplace=True)

ego_df['label'] = ego_df['label'].apply(lambda x: 1 if x < 7 else 0)
ego_df.reset_index(drop=True,inplace=True)

In [10]:
weather_df = all_df.copy()
idx = weather_df[weather_df['label']==0].index
weather_df.drop(idx,inplace=True)
#0:normal,1:snow,2:rain
weather_df['label'] = weather_df['label'].apply(lambda x: 0 if x==1 or x==2 or x==7 or x==8 else 1 if x==3 or x==4 or x==8 or x==9 else 2)
weather_df.reset_index(drop=True,inplace=True)

In [11]:
time_df = all_df.copy()
idx = time_df[time_df['label']==0].index
time_df.drop(idx,inplace=True)
#0:normal,1:snow,2:rain
time_df['label'] = time_df['label'].apply(lambda x: 0 if x%2==1 else 1)
time_df.reset_index(drop=True,inplace=True)

In [12]:
all_df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./data/train/TRAIN_0000.mp4,7
1,TRAIN_0001,./data/train/TRAIN_0001.mp4,7
2,TRAIN_0002,./data/train/TRAIN_0002.mp4,0
3,TRAIN_0003,./data/train/TRAIN_0003.mp4,0
4,TRAIN_0004,./data/train/TRAIN_0004.mp4,1
...,...,...,...
2693,TRAIN_2693,./data/train/TRAIN_2693.mp4,3
2694,TRAIN_2694,./data/train/TRAIN_2694.mp4,5
2695,TRAIN_2695,./data/train/TRAIN_2695.mp4,0
2696,TRAIN_2696,./data/train/TRAIN_2696.mp4,0


In [13]:
ego_df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./data/train/TRAIN_0000.mp4,0
1,TRAIN_0001,./data/train/TRAIN_0001.mp4,0
2,TRAIN_0004,./data/train/TRAIN_0004.mp4,1
3,TRAIN_0006,./data/train/TRAIN_0006.mp4,1
4,TRAIN_0007,./data/train/TRAIN_0007.mp4,0
...,...,...,...
910,TRAIN_2685,./data/train/TRAIN_2685.mp4,0
911,TRAIN_2689,./data/train/TRAIN_2689.mp4,1
912,TRAIN_2692,./data/train/TRAIN_2692.mp4,0
913,TRAIN_2693,./data/train/TRAIN_2693.mp4,1


In [14]:
crash_df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./data/train/TRAIN_0000.mp4,1
1,TRAIN_0001,./data/train/TRAIN_0001.mp4,1
2,TRAIN_0002,./data/train/TRAIN_0002.mp4,0
3,TRAIN_0003,./data/train/TRAIN_0003.mp4,0
4,TRAIN_0004,./data/train/TRAIN_0004.mp4,1
...,...,...,...
2693,TRAIN_2693,./data/train/TRAIN_2693.mp4,1
2694,TRAIN_2694,./data/train/TRAIN_2694.mp4,1
2695,TRAIN_2695,./data/train/TRAIN_2695.mp4,0
2696,TRAIN_2696,./data/train/TRAIN_2696.mp4,0


In [15]:
weather_df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./data/train/TRAIN_0000.mp4,0
1,TRAIN_0001,./data/train/TRAIN_0001.mp4,0
2,TRAIN_0004,./data/train/TRAIN_0004.mp4,0
3,TRAIN_0006,./data/train/TRAIN_0006.mp4,1
4,TRAIN_0007,./data/train/TRAIN_0007.mp4,0
...,...,...,...
910,TRAIN_2685,./data/train/TRAIN_2685.mp4,0
911,TRAIN_2689,./data/train/TRAIN_2689.mp4,0
912,TRAIN_2692,./data/train/TRAIN_2692.mp4,0
913,TRAIN_2693,./data/train/TRAIN_2693.mp4,1


In [16]:
time_df

Unnamed: 0,sample_id,video_path,label
0,TRAIN_0000,./data/train/TRAIN_0000.mp4,0
1,TRAIN_0001,./data/train/TRAIN_0001.mp4,0
2,TRAIN_0004,./data/train/TRAIN_0004.mp4,0
3,TRAIN_0006,./data/train/TRAIN_0006.mp4,0
4,TRAIN_0007,./data/train/TRAIN_0007.mp4,0
...,...,...,...
910,TRAIN_2685,./data/train/TRAIN_2685.mp4,1
911,TRAIN_2689,./data/train/TRAIN_2689.mp4,0
912,TRAIN_2692,./data/train/TRAIN_2692.mp4,0
913,TRAIN_2693,./data/train/TRAIN_2693.mp4,0


In [17]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [18]:
from transformers import VideoMAEConfig, VideoMAEModel
from transformers import AutoImageProcessor, VideoMAEForVideoClassification
from transformers import XCLIPVisionModel, XCLIPVisionConfig


crash_configuration = VideoMAEConfig()
crash_configuration = AutoConfig.from_pretrained(CFG['MODEL'])
crash_configuration.num_frames = CFG['VIDEO_LENGTH']
crash_configuration.num_frames=CFG['VIDEO_LENGTH']
crash_configuration.image_size=CFG['IMG_SIZE']
crash_configuration.id2label = {0:'no crash',1:'crash'}
crash_configuration.label2id = {'no crash':0,'crash':1}
crash_model = VideoMAEForVideoClassification.from_pretrained(CFG['MODEL'],config=crash_configuration,ignore_mismatched_sizes=True)

ego_configuration = VideoMAEConfig()
ego_configuration = AutoConfig.from_pretrained(CFG['MODEL'])
ego_configuration.num_frames = CFG['VIDEO_LENGTH']
ego_configuration.num_frames=CFG['VIDEO_LENGTH']
ego_configuration.image_size=CFG['IMG_SIZE']
ego_configuration.id2label = {0:'other',1:'ego'}
ego_configuration.label2id = {'other':0,'ego':1}
ego_model = VideoMAEForVideoClassification.from_pretrained(CFG['MODEL'],config=ego_configuration,ignore_mismatched_sizes=True)

weather_configuration = VideoMAEConfig()
weather_configuration = AutoConfig.from_pretrained(CFG['MODEL'])
weather_configuration.num_frames = CFG['VIDEO_LENGTH']
weather_configuration.num_frames=CFG['VIDEO_LENGTH']
weather_configuration.image_size=CFG['IMG_SIZE']
weather_configuration.id2label = {0:'normal',1:'snow',2:'rain'}
weather_configuration.label2id = {'normal':0,'snow':1,'rain':2}
weather_model = VideoMAEForVideoClassification.from_pretrained(CFG['MODEL'],config=weather_configuration,ignore_mismatched_sizes=True)

time_configuration = VideoMAEConfig()
time_configuration = AutoConfig.from_pretrained(CFG['MODEL'])
time_configuration.num_frames = CFG['VIDEO_LENGTH']
time_configuration.num_frames=CFG['VIDEO_LENGTH']
time_configuration.image_size=CFG['IMG_SIZE']
time_configuration.id2label = {0:'day',1:'night'}
time_configuration.label2id = {'normal':0,'snow':1}
time_model = VideoMAEForVideoClassification.from_pretrained(CFG['MODEL'],config=time_configuration,ignore_mismatched_sizes=True)


image_processor_config = AutoImageProcessor.from_pretrained(CFG['MODEL'])



Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base-finetuned-ssv2 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([174, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([174]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base-finetuned-ssv2 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([174, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([174]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN

In [21]:
Alb = A.Compose([
        A.Resize(width=CFG['IMG_SIZE'], height=CFG['IMG_SIZE']),
        A.HorizontalFlip(p=0.5),
        A.RandomBrightness(p=0.1),
#         A.GaussNoise(p=0.2,var_limit=(0.0, 26.849998474121094)).
        A.Downscale(p=0.2,scale_min=0.699999988079071, scale_max=0.9900000095367432, interpolation=2),
        A.Normalize(mean=tuple(image_processor_config.image_mean)
                   ,std=tuple(image_processor_config.image_std))
    ], p=1)


def aug_video(vid, tfms):
    aug_vid = []
    for x in vid:
        aug_vid.append((tfms(image = np.asarray(x)))['image'])
    return torch.from_numpy(np.stack(aug_vid))

In [22]:
class VideoDataset(Dataset):
    def __init__(self, video_path_list, label_list,transform=None):
        self.video_path_list = video_path_list
        self.label_list = label_list
        self.Alb = transform
    
    def get_labels(self):   
        return self.label_list  
    
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for idx in range(50):
            if idx%5 == 3:
                _, img = cap.read()
                img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
                frames.append(img)
        if self.Alb is not None:
            frames = aug_video(frames, tfms=self.Alb)
        return torch.FloatTensor(np.array(frames)).permute(0, 3, 1, 2)


In [23]:
def train(skf_idx, model, optimizer, train_loader, val_loader, scheduler, device, cls_type):
    model.to(device)
    criterion = FocalLoss('multiclass')
    best_val_score = 0
    best_model = None
    achieve = False
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            output = model(videos)
            loss = criterion(output.logits, labels)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
            
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        if _val_score > CFG['VAL_SCORE_THRES']:
            achieve=True
            print("archieve score!!")
            break
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
            date=datetime.today().strftime("%Y%m%d%H%M%S")
            torch.save(best_model.state_dict(), './'+cls_type + '_' + str(skf_idx) +'_'+ date + '_best_model.pth')
        skf_idx+=1
    return best_model,achieve

In [24]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            output = model(videos)
            
            loss = criterion(output.logits, labels)
            
            val_loss.append(loss.item())
            
            preds += output.logits.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

In [25]:
dict_pl = []

crash = {
    'model' : crash_model,
    'cls_type' : 'crash',
    'df' : crash_df
}

ego = {
    'model' : ego_model,
    'cls_type' : 'ego',
    'df' : ego_df
}

weather = {
    'model' : weather_model,
    'cls_type' : 'weather',
    'df' : weather_df
}

time = {
    'model' : time_model,
    'cls_type' : 'time',
    'df' : time_df
}

dict_pl.append(crash)
dict_pl.append(ego)
dict_pl.append(weather)
dict_pl.append(time)

In [26]:

# if CFG['LOAD_WEIGHT'] == True:
checkpoint = './checkpoint/crash/crash_120230221015333_best_model.pth'


# for total_idx,dict_name in enumerate(dict_pl):
#     if total_idx==0:
model = dict_name['model']
crash_model.load_state_dict(checkpoint)
print('model : '+ dict_name['cls_type'])
apply_df = dict_name['df'].copy()
cls_type = dict_name['cls_type']
# optimizer = torch.optim.Adadelta(params = model.parameters(), lr = CFG["LEARNING_RATE"])
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
# scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1, lambda2], verbose=True)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3,threshold_mode='abs',min_lr=1e-12, verbose=True)
skf_idx=1
for train_idx,val_idx in skf.split(apply_df['video_path'],apply_df['label']):
    train_dataset = VideoDataset(apply_df['video_path'][train_idx].values, apply_df['label'][train_idx].values,transform=Alb)
    val_dataset = VideoDataset(apply_df['video_path'][val_idx].values, apply_df['label'][val_idx].values, transform=Alb)
    train_loader = DataLoader(train_dataset,sampler=ImbalancedDatasetSampler(train_dataset),shuffle=False,batch_size = CFG['BATCH_SIZE'],  num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], num_workers=4)
    dict_name['model'],achieve = train(skf_idx,model, optimizer, train_loader, val_loader, scheduler, device, cls_type)
#         skf_idx+=1
    if achieve == True:
        break

AttributeError: 'str' object has no attribute 'copy'

In [None]:
test_df

In [None]:
test_dataset = VideoDataset(test_df['video_path'].values,label_list= None, transform=Alb)
test_loader = DataLoader(test_dataset, shuffle=False,batch_size = CFG['BATCH_SIZE'],  num_workers=4)
   

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            output = model(videos)
            preds += output.logits.argmax(1).detach().cpu().numpy().tolist()
#             preds += output.logits.detach().cpu().numpy().tolist()

    return preds

In [None]:
checkpoint = torch.load('./checkpoint/crash/crash_120230221015333_best_model.pth')
crash_model.load_state_dict(checkpoint)

checkpoint = torch.load('./checkpoint/ego/ego_120230221021140_best_model.pth')
ego_model.load_state_dict(checkpoint)

checkpoint = torch.load('./checkpoint/weather/weather_2_20230221022225_best_model.pth')
weather_model.load_state_dict(checkpoint)

checkpoint = torch.load('./checkpoint/time/time_1_20230221093311_best_model.pth')
time_model.load_state_dict(checkpoint)

In [None]:
total_preds_list = []
crash_preds_list=[]
ego_preds_list=[]
weather_preds_list=[]
time_preds_list=[]
crash_preds=None
ego_preds=None
weather_preds=None
time_preds=None

for idx in range(CFG['NUM_ASB']):
    crash_preds = inference(crash_model, test_loader, device)
#     crash_preds_list.append(crash_preds)
    
#TODO :앙상블

for idx in range(CFG['NUM_ASB']):
    ego_preds = inference(ego_model, test_loader, device)
#     ego_preds_list.append(ego_preds)
    
for idx in range(CFG['NUM_ASB']):
    weather_preds = inference(weather_model, test_loader, device)
#     weather_preds_list.append(weather_preds)
    
for idx in range(CFG['NUM_ASB']):
    time_preds = inference(time_model, test_loader, device)
#     time_preds_list.append(time_preds)





In [None]:
# crash_preds_df = pd.DataFrame(crash_preds,columns=['crash'])
# ego_preds_df = pd.DataFrame(ego_preds,columns=['ego'])
# weather_preds_df = pd.DataFrame(weather_preds,columns=['weather'])
# time_preds_df = pd.DataFrame(time_preds,columns=['time'])

preds=[]

print(len(crash_preds))
print(len(ego_preds))
print(len(weather_preds))
print(len(time_preds))

for idx,crash in enumerate(crash_preds):
    ego = ego_preds[idx]
    weather = weather_preds[idx]
    time = time_preds[idx]
    if crash == 0:
        preds.append(0)
    else:
        if ego==0:
            if weather==0:
                if time == 0:
                    preds.append(7)
                else:
                    preds.append(8)
            elif weather==1:
                if time == 0:
                    preds.append(9)
                else:
                    preds.append(10)
            else:
                if time == 0:
                    preds.append(11)
                else:
                    preds.append(12)
                    
        else:
            if weather==0:
                if time == 0:
                    preds.append(1)
                else:
                    preds.append(2)
            elif weather==1:
                if time == 0:
                    preds.append(3)
                else:
                    preds.append(4)
            else:
                if time == 0:
                    preds.append(5)
                else:
                    preds.append(6)
                

In [None]:
submit = pd.read_csv('./data/sample_submission.csv')

In [None]:
submit['label'] = preds

In [None]:
submit['label'].value_counts()

In [None]:
date=datetime.today().strftime("%Y_%m_%d_%H_%M_%S")
submit.to_csv('./'+date+'.csv', index=False)