Take long video recordings, extract and save relevant frames in the surroundins of event occurrances.

## Setup

In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Mounted at /content/drive


### Import libraries

In [None]:
import cv2
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from timeit import default_timer as timer

import glob

In [None]:
cv2.__version__

'4.6.0'

### Set constants

In [None]:
ROOT = "/content/drive/MyDrive/ITC_Bundesliga"  # ROOT folder, where data file train.csv is located
VIDEOS_PATH = "/content/drive/MyDrive/ITC_Bundesliga/train"
TABULAR_FILENAME = '/content/drive/MyDrive/ITC_Bundesliga/train.csv'
FRAMES_FOLDER = '/content/drive/My Drive/ITC_Bundesliga/frames_ms2/'

In [None]:
# tolerances in seconds
TOLERANCES = {
    "challenge": [0.3, 0.4, 0.5, 0.6, 0.7],
    "play": [0.15, 0.20, 0.25, 0.30, 0.35],
    "throwin": [0.15, 0.20, 0.25, 0.30, 0.35],
}

FPS = 25

TOLERANCE_INDEX = -1

In [None]:
CLASS_IDS = {'challenge': 0,
             'throwin': 1,
             'play': 2}

## Frames capture

In [None]:
tolerances_in_frames = {}
for event_type in TOLERANCES:
    event_tolerance_f = [FPS * tol for tol in TOLERANCES[event_type]]
    tolerances_in_frames[event_type] = event_tolerance_f

In [None]:
tolerances_in_frames

{'challenge': [7.5, 10.0, 12.5, 15.0, 17.5],
 'play': [3.75, 5.0, 6.25, 7.5, 8.75],
 'throwin': [3.75, 5.0, 6.25, 7.5, 8.75]}

In [None]:
effective_tolerances_in_frames = {}
for event_type in tolerances_in_frames:
    f = np.floor(tolerances_in_frames[event_type][TOLERANCE_INDEX]).astype(int)
    effective_tolerances_in_frames[event_type] = f
effective_tolerances_in_frames

{'challenge': 17, 'play': 8, 'throwin': 8}

In [None]:
df = pd.read_csv(TABULAR_FILENAME)
df['event_time_ms'] = round(df['time'] * 1000, 0).astype(int)
df_events = df[~df['event'].isin(['start', 'end'])].copy()
df_events['event_id'] = df_events.groupby('video_id').cumcount()
df_events['class_id'] = df_events['event'].map(lambda x: CLASS_IDS[x])

In [None]:
video_ids = df['video_id'].unique()

In [None]:
df_events

Unnamed: 0,video_id,time,event,event_attributes,event_time_ms,event_id,class_id
1,1606b0e6_0,201.150,challenge,['ball_action_forced'],201150,0,0
4,1606b0e6_0,210.870,challenge,['opponent_dispossessed'],210870,1,0
7,1606b0e6_0,219.230,throwin,['pass'],219230,2,1
10,1606b0e6_0,224.430,play,"['pass', 'openplay']",224430,3,2
13,1606b0e6_0,229.390,play,"['pass', 'openplay']",229390,4,2
...,...,...,...,...,...,...,...
11206,ecf251d4_0,3041.347,play,"['pass', 'openplay']",3041347,381,2
11209,ecf251d4_0,3050.347,play,"['pass', 'openplay']",3050347,382,2
11210,ecf251d4_0,3053.067,play,"['pass', 'openplay']",3053067,383,2
11213,ecf251d4_0,3056.587,challenge,['opponent_dispossessed'],3056587,384,0


In [None]:
video_ids

array(['1606b0e6_0', '1606b0e6_1', '35bd9041_0', '35bd9041_1',
       '3c993bd2_0', '3c993bd2_1', '407c5a9e_1', '4ffd5986_0',
       '9a97dae4_1', 'cfbe2e94_0', 'cfbe2e94_1', 'ecf251d4_0'],
      dtype=object)

Function to get frames for one video_id.  
Start extraction 17 frames before the event, get 35 frames in sequence.

In [None]:
def get_frames(video_id, df_events, videos_path, output_path, offset_start=17, n_frames=35):

    # time
    start = timer()

    # capture video
    videoname = ''.join([video_id, '.mp4'])
    cap = cv2.VideoCapture(os.path.join(videos_path, videoname))

    # df
    df = df_events[df_events['video_id'] == video_id]

    for i, time in enumerate(tqdm(df_events[df_events['video_id'] == video_id]['event_time_ms'])):

        # get event data fom tabular source
        event_id = df['event_id'].iloc[i]
        class_id = df['class_id'].iloc[i]

        cap.set(cv2.CAP_PROP_POS_MSEC, time)  # move the time
        event_pos = cap.get(cv2.CAP_PROP_POS_FRAMES) # get position of event in frame count
        start_pos = event_pos - offset_start  # calculate position of first frame to be captured, considering offset
        cap.set(cv2.CAP_PROP_POS_FRAMES, start_pos)  # move video capture to position of first frame to be captured (offset back)
        for j in range(n_frames):
            success, image = cap.read()
            if not success:
                print(f'{j+1} of {n_frames}, video_id: {video_id}, time: {time}, failed to read.')
            cv2.imwrite(os.path.join(output_path, f'{video_id}_{event_id}_{class_id}_{j}.jpg'), image)
    cap.release()

    # time
    end = timer()

    print(f'Done in {end} seconds.')

Run each video_id separately because the operation is expensive. Allow for parallelization with copies of this notebook.

In [None]:
video_id = '3c993bd2_1'
output_path = os.path.join('/content/drive/MyDrive/ITC_Bundesliga/frames_ms2', video_id)

In [None]:
%cd {output_path}

/content/drive/MyDrive/ITC_Bundesliga/frames_ms2/3c993bd2_1


In [None]:
get_frames(video_id, df_events, videos_path=VIDEOS_PATH, output_path=output_path)