## Import

In [1]:
from data import *
from utils import *
from model import *
from train import *
from infer import *
import random
import os
import torch
import pandas as pd
import sklearn
from matplotlib import pyplot as plt

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Fixed RandomSeed

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG.seed) # Seed 고정

## Data Load

In [4]:
df = pd.read_csv('./train_detail_classified.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,sample_id,video_path,label,crush,ego,weather,timing
0,0,TRAIN_0000,./train/TRAIN_0000.mp4,7,1,0,0,0
1,1,TRAIN_0001,./train/TRAIN_0001.mp4,7,1,0,0,0
2,2,TRAIN_0002,./train/TRAIN_0002.mp4,0,0,-1,-1,-1
3,3,TRAIN_0003,./train/TRAIN_0003.mp4,0,0,-1,-1,-1
4,4,TRAIN_0004,./train/TRAIN_0004.mp4,1,1,1,0,0


## stratified K fold

In [5]:
#-1 라벨인 데이터 버리기
df_ego=df[df['ego']!=-1].reset_index(drop=True)
df_weather=df[df['weather']!=-1].reset_index(drop=True)
df_timing=df[df['timing']!=-1].reset_index(drop=True)
dfs=[(df, df['crush']),(df_ego, df_ego['ego']), (df_weather,df_weather['weather']), (df_timing,df_timing['timing'])]

In [6]:
skf = sklearn.model_selection.StratifiedKFold(n_splits=CFG.fold, shuffle=True, random_state=CFG.seed)
#라벨 분포에 맞춰서 fold, val 정보 넣기
for data in dfs:
    data[0]["fold"]=-1
    for k, (train_idx, val_idx) in enumerate(skf.split(*data)):
        data[0].loc[val_idx, 'fold']=k

## Run!!

In [7]:
model_crush = BaseModel(num_classes=2, fc_type='shallow')
model_ego = BaseModel(num_classes=2, fc_type='shallow')
model_weather = BaseModel(num_classes=3, fc_type='shallow', binary=False)
model_timing = BaseModel(num_classes=2, fc_type='shallow')

models=[model_crush, model_ego, model_weather, model_timing]
dfs=[(df, df['crush']),(df_ego, df_ego['ego']), (df_weather,df_weather['weather']), (df_timing,df_timing['timing'])]
names=["crush", "ego", "weather", "timing"]

In [None]:
#crush
run(models[0],dfs[0][0], names[0], transforms= Transforms.other, device=device, save_dir='./crush', is_fold=False)

crush model run
1th model run
____________________________________________________________________________________________________


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.14109] Val Loss : [0.05125] Val F1 : [0.97692]


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.04855] Val Loss : [0.11550] Val F1 : [0.95950]
early stopping count : 1


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.03789] Val Loss : [0.01867] Val F1 : [0.99586]


  0%|          | 0/135 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.05342] Val Loss : [0.02785] Val F1 : [0.98757]
early stopping count : 1


  0%|          | 0/135 [00:00<?, ?it/s]

In [None]:
#ego
run(models[1],dfs[1][0], names[1], transforms= Transforms.other, device=device,save_dir='./ego', is_fold=True)

In [None]:
#weather
run(models[2],dfs[2][0], names[2], transforms= Transforms.weather, device=device,save_dir='./weather', is_fold=True)

In [None]:
#timing
run(models[3],dfs[3][0], names[3], transforms= Transforms.other, device=device, save_dir='./timing', is_fold=False)

## Inference

In [None]:
test = pd.read_csv('./test.csv')

In [None]:
test_dataset = CustomDataset(test['video_path'].values, None, transforms=test_transforms)
test_loader = DataLoader(test_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=0)

In [None]:
preds_crush=run_infer(model=models[0], name=names[0], path='./crush/crush.pt', fold=False, is_parallel=False)

In [None]:
preds_ego=run_infer(model=models[1], name=names[1], path='./ego/ego.pt', fold=True, is_parallel=False)

In [None]:
preds_weather=run_infer(model=models[2], name=names[2], path='./weather/weather.pt', fold=True, is_parallel=False)

In [None]:
preds_timing=run_infer(model=models[3], name=names[3], path='./timing/timing.pt', fold=False, is_parallel=False)

## Submission

In [None]:
sample = pd.read_csv('./sample_submission.csv')

In [None]:
_,ss_ego=voting('ego', preds_ego, False, False)
_,ss_weather=voting('weather', preds_weather, False, False)

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit['weather']=ss_weather['weather']
submit['ego']=ss['ego']
submit['crush']=preds_crush
submit['timing']=preds_timing

In [None]:
submit["crush"].value_counts(dropna=False).sort_index()

In [None]:
submit["ego"].value_counts(dropna=False).sort_index()

In [None]:
submit["weather"].value_counts(dropna=False).sort_index()

In [None]:
submit["timing"].value_counts(dropna=False).sort_index()

In [None]:
#label ensemble
def label_ensemble(submit):
    for i, row in submit.iterrows():
        #print(i)
        if row['crush']==0: # 0
            submit['label'][i]=0 
        else:
            if row['ego']==1: # 1~6
                if row['weather']==0: #1,2
                    if row['timing']==0:
                        submit['label'][i]=1
                    else:
                        submit['label'][i]=2
                        
                elif row['weather']==1:# 3,4
                    if row['timing']==0:
                        submit['label'][i]=3
                    else:
                        submit['label'][i]=4
                else:
                    if row['timing']==0:# 5,6
                        submit['label'][i]=5
                    else:
                        submit['label'][i]=6
    
            else: # 7~12
                if row['weather']==0: #7,8
                    if row['timing']==0:
                        submit['label'][i]=7
                    else:
                        submit['label'][i]=8
                        
                elif row['weather']==1:# 9,10
                    if row['timing']==0:
                        submit['label'][i]=9
                    else:
                        submit['label'][i]=10
                else:
                    if row['timing']==0:# 11,12
                        submit['label'][i]=11
                    else:
                        submit['label'][i]=12
        if submit['label'][i]==-1:
            print(row['crush'], row['ego'], row['weather'], row['timing'])
    #라벨 추가했으니 필요없는 열 이제 삭제
    submit = submit.drop(['crush'],axis=1)
    submit = submit.drop(['ego'],axis=1)
    submit = submit.drop(['weather'],axis=1)
    submit = submit.drop(['timing'],axis=1)
    
    return submit

submit=label_ensemble(submit)

In [None]:
submit.to_csv('./5fold_basic_submit.csv', index=False)

In [None]:
submit["label"].value_counts(dropna=False).sort_index()