Take frames, make clips with 10 frames each and save them in .mp4 files.

## Import libraries, mount GDrive, constants

In [None]:
import os
import numpy as np
import cv2
from tqdm import tqdm
import pandas as pd

In [None]:
# mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
ROOT = '/content/drive/MyDrive/ITC_Bundesliga/'
TABULAR_FILENAME = '/content/drive/MyDrive/ITC_Bundesliga/train.csv'

CURR_FRAMES_FOLDER = '/content/drive/MyDrive/ITC_Bundesliga/frames_ms2'
CLIPS_FOLDER = '/content/drive/MyDrive/ITC_Bundesliga/clipsHD'

# CURR_FRAMES_FOLDER structure: CURR_FRAMES_FOLDER/[video_id]/[frame]
# CLIPS_FOLDER structure: CLIPS_FOLDER/[train, val, test]/[class_]/[clip]

In [None]:
CLASS_IDS = {0: 'challenge',
             1: 'throwin',
             2: 'play'}

In [None]:
CLASS_IDS_INV = {v: k for k, v in CLASS_IDS.items()}
CLASS_IDS_INV

{'challenge': 0, 'throwin': 1, 'play': 2}

In [None]:
# dataset split

train_videos = [
    '1606b0e6_0',
    '1606b0e6_1',
    'cfbe2e94_0',
    'cfbe2e94_1',
    '35bd9041_0',
    '35bd9041_1',
    '3c993bd2_0',
    '3c993bd2_1',
]
val_videos = [
    '9a97dae4_1',
    'ecf251d4_0',
]
test_videos = [
    '4ffd5986_0',
    '407c5a9e_1',
]

In [None]:
DATASET_SPLIT = {
    '1606b0e6_0': 'train',
    '1606b0e6_1': 'train',
    'cfbe2e94_0': 'train',
    'cfbe2e94_1': 'train',
    '35bd9041_0': 'train',
    '35bd9041_1': 'train',
    '3c993bd2_0': 'train',
    '3c993bd2_1': 'train',
    '9a97dae4_1': 'val',
    'ecf251d4_0': 'val',
    '4ffd5986_0': 'test',
    '407c5a9e_1': 'test'
}

In [None]:
INDEX_START = 17
INDEX_END = 27
# TARGET_SIZE = (224, 224)
TARGET_SIZE = (1920, 1080)
FPS = 25
CLIP_LEN = 10

Take tabular data on events, build DataFrame to iterate on it.

In [None]:
df = pd.read_csv(TABULAR_FILENAME)
df['event_time_ms'] = round(df['time'] * 1000, 0).astype(int)
df_events = df[~df['event'].isin(['start', 'end'])].copy()
df_events['event_id'] = df_events.groupby('video_id').cumcount()
df_events['class_id'] = df_events['event'].map(lambda x: CLASS_IDS_INV[x])

In [None]:
df_events

Unnamed: 0,video_id,time,event,event_attributes,event_time_ms,event_id,class_id
1,1606b0e6_0,201.150,challenge,['ball_action_forced'],201150,0,0
4,1606b0e6_0,210.870,challenge,['opponent_dispossessed'],210870,1,0
7,1606b0e6_0,219.230,throwin,['pass'],219230,2,1
10,1606b0e6_0,224.430,play,"['pass', 'openplay']",224430,3,2
13,1606b0e6_0,229.390,play,"['pass', 'openplay']",229390,4,2
...,...,...,...,...,...,...,...
11206,ecf251d4_0,3041.347,play,"['pass', 'openplay']",3041347,381,2
11209,ecf251d4_0,3050.347,play,"['pass', 'openplay']",3050347,382,2
11210,ecf251d4_0,3053.067,play,"['pass', 'openplay']",3053067,383,2
11213,ecf251d4_0,3056.587,challenge,['opponent_dispossessed'],3056587,384,0


In [None]:
frames_filenames = []

for i in range(len(df_events)):
    frames_perevent = ['_'.join([df_events['video_id'].iloc[i],
                                  str(df_events['event_id'].iloc[i]),
                                  str(df_events['class_id'].iloc[i]),
                                  str(k) ]) + '.jpg' for k in range(INDEX_START, INDEX_END)]
    frames_filenames.append(frames_perevent)

In [None]:
frames_filenames[:3]

[['1606b0e6_0_0_0_17.jpg',
  '1606b0e6_0_0_0_18.jpg',
  '1606b0e6_0_0_0_19.jpg',
  '1606b0e6_0_0_0_20.jpg',
  '1606b0e6_0_0_0_21.jpg',
  '1606b0e6_0_0_0_22.jpg',
  '1606b0e6_0_0_0_23.jpg',
  '1606b0e6_0_0_0_24.jpg',
  '1606b0e6_0_0_0_25.jpg',
  '1606b0e6_0_0_0_26.jpg'],
 ['1606b0e6_0_1_0_17.jpg',
  '1606b0e6_0_1_0_18.jpg',
  '1606b0e6_0_1_0_19.jpg',
  '1606b0e6_0_1_0_20.jpg',
  '1606b0e6_0_1_0_21.jpg',
  '1606b0e6_0_1_0_22.jpg',
  '1606b0e6_0_1_0_23.jpg',
  '1606b0e6_0_1_0_24.jpg',
  '1606b0e6_0_1_0_25.jpg',
  '1606b0e6_0_1_0_26.jpg'],
 ['1606b0e6_0_2_1_17.jpg',
  '1606b0e6_0_2_1_18.jpg',
  '1606b0e6_0_2_1_19.jpg',
  '1606b0e6_0_2_1_20.jpg',
  '1606b0e6_0_2_1_21.jpg',
  '1606b0e6_0_2_1_22.jpg',
  '1606b0e6_0_2_1_23.jpg',
  '1606b0e6_0_2_1_24.jpg',
  '1606b0e6_0_2_1_25.jpg',
  '1606b0e6_0_2_1_26.jpg']]

In [None]:
df_events['frames_filenames'] = frames_filenames

In [None]:
df_events

Unnamed: 0,video_id,time,event,event_attributes,event_time_ms,event_id,class_id,frames_filenames
1,1606b0e6_0,201.150,challenge,['ball_action_forced'],201150,0,0,"[1606b0e6_0_0_0_17.jpg, 1606b0e6_0_0_0_18.jpg,..."
4,1606b0e6_0,210.870,challenge,['opponent_dispossessed'],210870,1,0,"[1606b0e6_0_1_0_17.jpg, 1606b0e6_0_1_0_18.jpg,..."
7,1606b0e6_0,219.230,throwin,['pass'],219230,2,1,"[1606b0e6_0_2_1_17.jpg, 1606b0e6_0_2_1_18.jpg,..."
10,1606b0e6_0,224.430,play,"['pass', 'openplay']",224430,3,2,"[1606b0e6_0_3_2_17.jpg, 1606b0e6_0_3_2_18.jpg,..."
13,1606b0e6_0,229.390,play,"['pass', 'openplay']",229390,4,2,"[1606b0e6_0_4_2_17.jpg, 1606b0e6_0_4_2_18.jpg,..."
...,...,...,...,...,...,...,...,...
11206,ecf251d4_0,3041.347,play,"['pass', 'openplay']",3041347,381,2,"[ecf251d4_0_381_2_17.jpg, ecf251d4_0_381_2_18...."
11209,ecf251d4_0,3050.347,play,"['pass', 'openplay']",3050347,382,2,"[ecf251d4_0_382_2_17.jpg, ecf251d4_0_382_2_18...."
11210,ecf251d4_0,3053.067,play,"['pass', 'openplay']",3053067,383,2,"[ecf251d4_0_383_2_17.jpg, ecf251d4_0_383_2_18...."
11213,ecf251d4_0,3056.587,challenge,['opponent_dispossessed'],3056587,384,0,"[ecf251d4_0_384_0_17.jpg, ecf251d4_0_384_0_18...."


make relevant folders and subfolders.

In [None]:
%cd {CLIPS_FOLDER}

/content/drive/MyDrive/ITC_Bundesliga/clipsHD


In [None]:
! ls

In [None]:
# ! mkdir train
# ! mkdir val
# ! mkdir test

In [None]:
! ls

test  train  val


In [None]:
%cd ../val
# ! mkdir challenge
# ! mkdir play
# ! mkdir throwin

/content/drive/MyDrive/ITC_Bundesliga/clipsHD/val


##Read frames, stack them, save .mp4 files.

In [None]:
drive.mount('/content/drive', force_remount=True)

errors = []
for i in tqdm(range(len(df_events))):

    video_id = df_events['video_id'].iloc[i]
    frames_filenames = df_events['frames_filenames'].iloc[i]
    event_id = str(df_events['event_id'].iloc[i])
    class_id = str(df_events['class_id'].iloc[i])

    set_ = DATASET_SPLIT[video_id]
    class_ = df_events['event'].iloc[i]

    imgs = []
    for frame_filename in frames_filenames:
        img = cv2.imread(os.path.join(CURR_FRAMES_FOLDER, video_id, frame_filename))
        # img = cv2.resize(img, TARGET_SIZE)  # do not resize. Save original FHD frames.
        if not img.any():
            print(f'error in {frame_filename}')
            break
        else:
            imgs.append(img)
    video_filename = '_'.join([video_id, event_id, class_id]) + '.mp4'
    video_filepath = os.path.join(CLIPS_FOLDER, set_, class_, video_filename)
    out = cv2.VideoWriter(video_filepath, cv2.VideoWriter_fourcc(*'MP4V'), FPS, TARGET_SIZE)
    if len(imgs) != CLIP_LEN:
        print(f'error in event {video_id} {event_id}')
        errors.append((video_id, event_id))
    else:
        for img in imgs:
            out.write(img)
    out.release()

if errors:
    pd.Series(errors).to_csv(os.path.join(ROOT, 'errors.csv'))

drive.flush_and_unmount()

Mounted at /content/drive


100%|██████████| 4382/4382 [4:12:53<00:00,  3.46s/it]


Check filecount of saved clips.

In [None]:
filecount = []

for set_ in os.listdir(CLIPS_FOLDER):
    for class_ in os.listdir(os.path.join(CLIPS_FOLDER, set_)):
        filecount.append((set_, class_, len(os.listdir(os.path.join(CLIPS_FOLDER, set_, class_)))))

In [None]:
filecount

[('train', 'challenge', 417),
 ('train', 'play', 2507),
 ('train', 'throwin', 129),
 ('val', 'challenge', 94),
 ('val', 'play', 561),
 ('val', 'throwin', 22),
 ('test', 'challenge', 113),
 ('test', 'play', 518),
 ('test', 'throwin', 21)]