<a href="https://colab.research.google.com/github/dbtjr1103/Dacon_practice/blob/main/Kaggle_2_5D_CNN_Baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
cd /content/drive/MyDrive/kaggle

/content/drive/MyDrive/kaggle


In [2]:
!git clone https://github.com/huggingface/pytorch-image-models.git

Cloning into 'pytorch-image-models'...
remote: Enumerating objects: 13030, done.[K
remote: Counting objects: 100% (148/148), done.[K
remote: Compressing objects: 100% (67/67), done.[K
remote: Total 13030 (delta 90), reused 127 (delta 79), pack-reused 12882[K
Receiving objects: 100% (13030/13030), 23.40 MiB | 9.89 MiB/s, done.
Resolving deltas: 100% (9361/9361), done.
Updating files: 100% (516/516), done.


In [3]:
import os
import sys
sys.path.append('/content/drive/MyDrive/kaggle/pytorch-image-models')
import glob
import numpy as np
import pandas as pd
import random
import math
import gc
import cv2
from tqdm import tqdm
import time
from functools import lru_cache
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
from sklearn.metrics import matthews_corrcoef

In [4]:
# 모델을 바꿀때 하이퍼파라미터 값들도 적절히 변경해주어야 함.
CFG = {
    'seed': 42,
    'model': 'resnet50',
    'img_size': 256,
    'epochs': 10,
    'train_bs': 100, 
    'valid_bs': 64,
    'lr': 1e-3, 
    'weight_decay': 1e-6,
    'num_workers': 2
}

In [5]:
# 랜덤 시드값을 고정, 변경할 사항 없음
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed) # 해시 함수가 동일한 출력을 생성하도록 파이썬 해시 시드 값을 고정
    np.random.seed(seed)
    torch.manual_seed(seed) # 연산에 대한 랜덤 시드값을 고정
    torch.cuda.manual_seed(seed) # 연산에 대한 랜덤 시드값을 고정
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['seed'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
def expand_contact_id(df):
    """
    Splits out contact_id into seperate columns. contact_id 컬럼을 4개로 나눔
    """
    df["game_play"] = df["contact_id"].str[:12]
    df["step"] = df["contact_id"].str.split("_").str[-3].astype("int")
    df["nfl_player_id_1"] = df["contact_id"].str.split("_").str[-2]
    df["nfl_player_id_2"] = df["contact_id"].str.split("_").str[-1]
    return df

labels = expand_contact_id(pd.read_csv("/content/drive/MyDrive/kaggle/nfl-player-contact-detection/sample_submission.csv"))

test_tracking = pd.read_csv("/content/drive/MyDrive/kaggle/nfl-player-contact-detection/test_player_tracking.csv")

test_helmets = pd.read_csv("/content/drive/MyDrive/kaggle/nfl-player-contact-detection/test_baseline_helmets.csv")

test_video_metadata = pd.read_csv("/content/drive/MyDrive/kaggle/nfl-player-contact-detection/test_video_metadata.csv")

In [8]:
labels

Unnamed: 0,contact_id,contact,game_play,step,nfl_player_id_1,nfl_player_id_2
0,58168_003392_0_38590_43854,0,58168_003392,0,38590,43854
1,58168_003392_0_38590_41257,0,58168_003392,0,38590,41257
2,58168_003392_0_38590_41944,0,58168_003392,0,38590,41944
3,58168_003392_0_38590_42386,0,58168_003392,0,38590,42386
4,58168_003392_0_38590_47944,0,58168_003392,0,38590,47944
...,...,...,...,...,...,...
49583,58172_003247_125_40656_G,0,58172_003247,125,40656,G
49584,58172_003247_125_52521_G,0,58172_003247,125,52521,G
49585,58172_003247_125_52939_G,0,58172_003247,125,52939,G
49586,58172_003247_125_39008_G,0,58172_003247,125,39008,G


In [9]:
test_tracking

Unnamed: 0,game_play,game_key,play_id,nfl_player_id,datetime,step,team,position,jersey_number,x_position,y_position,speed,distance,direction,orientation,acceleration,sa
0,58172_003247,58172,3247,41937,2020-09-13T19:30:20.200Z,-272,home,MLB,57,64.28,11.29,4.82,0.46,20.74,12.43,3.13,3.00
1,58172_003247,58172,3247,45345,2020-09-13T19:30:20.200Z,-272,away,ILB,50,76.03,36.74,3.15,0.32,179.52,184.91,1.87,-1.81
2,58172_003247,58172,3247,46205,2020-09-13T19:30:20.200Z,-272,home,DE,98,65.64,15.74,1.80,0.20,6.73,339.85,0.69,-0.68
3,58172_003247,58172,3247,43406,2020-09-13T19:30:20.200Z,-272,home,WR,14,36.35,14.94,1.81,0.18,330.50,344.03,0.62,0.39
4,58172_003247,58172,3247,48233,2020-09-13T19:30:20.200Z,-272,away,RB,45,76.10,32.15,3.36,0.34,166.59,180.56,0.88,-0.87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14867,58168_003392,58168,3392,43395,2020-09-11T03:01:57.300Z,92,away,FS,23,51.03,13.63,1.70,0.17,21.01,18.32,0.35,-0.20
14868,58168_003392,58168,3392,39947,2020-09-11T03:01:57.300Z,92,home,T,72,42.73,18.52,1.19,0.13,331.22,332.97,0.58,-0.56
14869,58168_003392,58168,3392,44822,2020-09-11T03:01:57.300Z,92,home,QB,15,35.01,18.68,1.48,0.15,165.22,162.66,0.38,-0.07
14870,58168_003392,58168,3392,38590,2020-09-11T03:01:57.300Z,92,home,G,70,43.72,18.24,1.54,0.16,341.08,336.00,0.41,-0.41


In [10]:
test_helmets

Unnamed: 0,game_play,game_key,play_id,view,video,frame,nfl_player_id,player_label,left,width,top,height
0,58168_003392,58168,3392,Endzone,58168_003392_Endzone.mp4,290,39947,H72,946,25,293,34
1,58168_003392,58168,3392,Endzone,58168_003392_Endzone.mp4,290,37211,H42,151,25,267,33
2,58168_003392,58168,3392,Endzone,58168_003392_Endzone.mp4,290,38590,H70,810,25,293,35
3,58168_003392,58168,3392,Endzone,58168_003392_Endzone.mp4,290,44822,H15,681,26,254,33
4,58168_003392,58168,3392,Endzone,58168_003392_Endzone.mp4,290,41944,V92,680,23,303,33
...,...,...,...,...,...,...,...,...,...,...,...,...
47325,58172_003247,58172,3247,Sideline,58172_003247_Sideline.mp4,1082,52852,H38,460,40,315,45
47326,58172_003247,58172,3247,Sideline,58172_003247_Sideline.mp4,1082,41937,H57,729,43,473,45
47327,58172_003247,58172,3247,Sideline,58172_003247_Sideline.mp4,1082,48233,V45,126,33,504,35
47328,58172_003247,58172,3247,Sideline,58172_003247_Sideline.mp4,1082,48241,V22,446,38,511,45


In [11]:
test_video_metadata

Unnamed: 0,game_play,game_key,play_id,view,start_time,end_time,snap_time
0,58168_003392,58168,3392,Endzone,2020-09-11T03:01:43.134Z,2020-09-11T03:01:54.971Z,2020-09-11T03:01:48.134Z
1,58168_003392,58168,3392,Sideline,2020-09-11T03:01:43.134Z,2020-09-11T03:01:54.971Z,2020-09-11T03:01:48.134Z
2,58172_003247,58172,3247,Endzone,2020-09-13T19:30:42.414Z,2020-09-13T19:31:00.524Z,2020-09-13T19:30:47.414Z
3,58172_003247,58172,3247,Sideline,2020-09-13T19:30:42.414Z,2020-09-13T19:31:00.524Z,2020-09-13T19:30:47.414Z


In [27]:
'''
ffmpeg 라이브러리를 사용하여 주어진 경로에 있는 동영상 파일을 읽고, 프레임을 추출하여 이미지 파일로 저장 
코드 실행 시, test_helmets 데이터 프레임에서 고유한 비디오 파일의 리스트를 추출하고, 
해당 리스트에서 "Endzone2"라는 문자열이 포함되어 있지 않은 동영상 파일에 대해 프레임 추출 작업을 수행

추출된 프레임은 /work/frames 폴더에 비디오 파일명과 일련번호를 사용하여 jpg 형식으로 저장

추출된 프레임의 화질을 나타내는 -q:v 2 옵션을 사용

오류 및 진행상황 메시지를 숨기기 위해 -hide_banner 및 -loglevel error 옵션을 사용
'''
# !mkdir -p ../work/frames

for video in tqdm(test_helmets.video.unique()):
    if 'Endzone2' not in video:
        !ffmpeg -i /content/drive/MyDrive/kaggle/nfl-player-contact-detection/test/{video} -q:v 2 -f image2 /content/drive/MyDrive/kaggle/work/frames/{video}_%04d.jpg -hide_banner -loglevel error

100%|██████████| 4/4 [01:00<00:00, 15.16s/it]


In [24]:
pwd

'/content/drive/MyDrive/kaggle/work/frames'

In [25]:
cd ..

/content/drive/MyDrive/kaggle/work


In [26]:
rm -rf work

In [20]:
cd frames

/content/drive/MyDrive/kaggle/work/frames


In [21]:
ls

In [29]:
cd /content/drive/MyDrive/kaggle/work/frames

/content/drive/MyDrive/kaggle/work/frames


In [None]:
len(/content/drive/MyDrive/kaggle/work/frames)

In [32]:
import os
tmp = os.listdir('/content/drive/MyDrive/kaggle/work/frames')

In [33]:
len(tmp)

3590