In [1]:
import pandas as pd
import yaml
import csv
import numpy as np
import cv2

In [2]:
def load_config(config_path):
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

config = load_config('config.yaml')

In [3]:
csv_location = config['data_paths']['output_file_full_dataframe']
df = pd.read_csv(csv_location)

Behaviour of consecutive timestamps
Loop through the dataframe and note the timestamps of the next and previous event, identify rows where match_id or match_period changes and give NA value for previous/next timestamps in those rows.


In [4]:
df['video_timestamp'] = pd.to_numeric(df['video_timestamp'])
df['next_type'] = df['primary_type'].shift(-1)
df['next_timestamp'] = df['video_timestamp'].shift(-1)
df['next_match_id'] = df['match_id'].shift(-1)
df['next_match_period'] = df['match_period'].shift(-1)

# Shift the 'primary_type', 'video_timestamp', 'match_id', and 'match_period' columns for previous row values
df['prev_type'] = df['primary_type'].shift(1)
df['prev_timestamp'] = df['video_timestamp'].shift(1)
df['prev_match_id'] = df['match_id'].shift(1)
df['prev_match_period'] = df['match_period'].shift(1)

# Identify rows where match_id or match_period changes
df.loc[(df['match_id'] != df['next_match_id']) | (df['match_period'] != df['next_match_period']), 'next_timestamp'] = pd.NA
# Identify rows where match_id or match_period changes and set previous timestamp to NaN
df.loc[(df['match_id'] != df['prev_match_id']) | (df['match_period'] != df['prev_match_period']), 'prev_timestamp'] = pd.NA

For each primary type used in the classification process, calculate how long it takes to be followed up by every other primary type. 

In [5]:
df.loc[df['primary_type'] == 'shot', 'time_diff'] = df['next_timestamp'] - df['video_timestamp']
df.loc[df['primary_type'] == 'pass', 'time_diff'] = df['next_timestamp'] - df['video_timestamp']
df.loc[df['primary_type'] == 'duel', 'time_diff'] = df['next_timestamp'] - df['video_timestamp']
df.loc[df['primary_type'] == 'interception', 'time_diff'] = df['next_timestamp'] - df['video_timestamp']
df.loc[df['primary_type'] == 'touch', 'time_diff'] = df['next_timestamp'] - df['video_timestamp']

In [8]:
print(df['time_diff'].dtype)

object


In [10]:
df['time_diff'] = df['time_diff'].fillna(np.nan).astype(float)
df['time_diff'] = df['time_diff'].astype(float)

In [30]:
time_diff_stats_shot = df[df['primary_type'] == 'shot'].groupby('next_type')['time_diff'].agg(['mean', 'std', 'count'])
time_diff_stats_pass = df[df['primary_type'] == 'pass'].groupby('next_type')['time_diff'].agg(['mean', 'std', 'count'])
time_diff_stats_duel = df[df['primary_type'] == 'duel'].groupby('next_type')['time_diff'].agg(['mean', 'std', 'count'])
time_diff_stats_interception = df[df['primary_type'] == 'interception'].groupby('next_type')['time_diff'].agg(['mean', 'std', 'count'])
time_diff_stats_touch = df[df['primary_type'] == 'touch'].groupby('next_type')['time_diff'].agg(['mean', 'std', 'count'])

In [35]:
time_diff_stats_touch

Unnamed: 0_level_0,mean,std,count
next_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
acceleration,1.642806,0.864254,91
clearance,1.332881,0.790815,375
duel,2.322474,1.954782,6401
fairplay,5.720663,3.206438,25
game_interruption,4.545607,8.329891,1255
goalkeeper_exit,0.446576,0.367741,10
infraction,2.073065,5.684358,120
interception,1.100753,2.124454,570
offside,2.177592,1.45661,22
own_goal,0.714679,0.220049,5


In [13]:
df.loc[df['primary_type'] == 'shot', 'time_diff_prev'] = df['video_timestamp'] - df['prev_timestamp']
df.loc[df['primary_type'] == 'pass', 'time_diff_prev'] = df['video_timestamp'] - df['prev_timestamp']
df.loc[df['primary_type'] == 'duel', 'time_diff_prev'] = df['video_timestamp'] - df['prev_timestamp']
df.loc[df['primary_type'] == 'interception', 'time_diff_prev'] = df['video_timestamp'] - df['prev_timestamp']
df.loc[df['primary_type'] == 'touch', 'time_diff_prev'] = df['video_timestamp'] - df['prev_timestamp']

In [14]:
df['time_diff_prev'] = df['time_diff_prev'].fillna(np.nan).astype(float)
df['time_diff_prev'] = df['time_diff_prev'].astype(float)

In [27]:
time_diff_stats_shot_prev = df[df['primary_type'] == 'shot'].groupby('prev_type')['time_diff_prev'].agg(['mean', 'std', 'count'])
time_diff_stats_pass_prev = df[df['primary_type'] == 'pass'].groupby('prev_type')['time_diff_prev'].agg(['mean', 'std', 'count'])
time_diff_stats_duel_prev = df[df['primary_type'] == 'duel'].groupby('prev_type')['time_diff_prev'].agg(['mean', 'std', 'count'])
time_diff_stats_interception_prev = df[df['primary_type'] == 'interception'].groupby('prev_type')['time_diff_prev'].agg(['mean', 'std', 'count'])
time_diff_stats_touch_prev = df[df['primary_type'] == 'touch'].groupby('prev_type')['time_diff_prev'].agg(['mean', 'std', 'count'])

In [41]:
print(time_diff_stats_touch_prev)

                        mean        std  count
prev_type                                     
acceleration        3.447074   1.939557     16
clearance           2.718100   1.846987    153
corner              1.305407   0.524044    209
duel                1.624382   3.684882   2752
fairplay            5.505369  13.131748     44
free_kick           2.112785   1.379621    733
game_interruption  47.340819  41.798073     21
goal_kick           2.198807   1.408124    665
goalkeeper_exit     4.219033   5.639884     31
interception        1.694868   1.454101   2167
pass                1.716840   0.943179  29826
shot                1.850139   1.190894     65
shot_against        8.190162  13.850245    109
throw_in            1.847749   1.223759    736
touch               1.754075   2.979046   1015


Accuracy of the event timestamps

In [18]:
def extract_frames(video_path, df, event_id):
    cap = cv2.VideoCapture(video_path)
    
    # Get frame rate of the video
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # Filter the DataFrame for the given event_id
    event_row = df[df['event_id'] == event_id].iloc[0]
    #print(event_row)
    
    # Extract timestamps
    important_frame_timestamp = event_row['video_timestamp']
    next_timestamp = event_row['next_timestamp']
    prev_timestamp = event_row['prev_timestamp']
    
    # Calculate start and end times based on timestamps
    start_time = prev_timestamp if pd.notnull(prev_timestamp) else important_frame_timestamp
    end_time = next_timestamp if pd.notnull(next_timestamp) else important_frame_timestamp
    
    # Calculate start and end frame numbers
    start_frame = int(start_time * fps)
    end_frame = int(end_time * fps)
    
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    timestamp_start = 0
    timestamp_end = 0
    frame_count = start_frame
    
    while cap.isOpened() and frame_count <= end_frame:
        ret, frame = cap.read()
        if not ret:
            break
        
        current_time = frame_count / fps
        
        # Define the color for the timestamps
        current_timestamp_color = (0, 0, 255)  # Red for current frame timestamp
        important_timestamp_color = (0, 255, 255)  # Yellow for important frame timestamp
        
        # Put the timestamps on the frame
        current_timestamp_text = f'Current Frame: {current_time:.2f}s'
        important_timestamp_text = f'Important Frame: {important_frame_timestamp:.2f}s'
        
        cv2.putText(frame, current_timestamp_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, current_timestamp_color, 2, cv2.LINE_AA)
        cv2.putText(frame, important_timestamp_text, (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, important_timestamp_color, 2, cv2.LINE_AA)
        
        # Display the frame
        cv2.imshow('Frame', frame)
        
        # Wait for user input to show the next frame
        key = cv2.waitKey(0) & 0xFF
        # Break the loop if 'q' is pressed
        if key == ord('q'):
            break
        elif key == ord('s'):
            timestamp_start = float(current_time - 1/fps)
        elif key == ord('e'):
            timestamp_end = float(current_time - 1/fps)
            break  # Exit loop after setting timestamp_end
        
        frame_count += 1
        
    if timestamp_end == 0:
        timestamp_end = end_time
    
    cap.release()
    cv2.destroyAllWindows()
    return timestamp_start, timestamp_end, important_frame_timestamp

In [19]:
def process_events(df, video_folder):
    timestamps_df = pd.DataFrame(columns=['EventID', 'TimestampStart', 'TimestampEnd', 'TimestampEvent'])
    
    sample_df = df.sample(n=50, random_state=42)

    for index, row in sample_df.iterrows():
        event_id = row['event_id']
        game_id = row['match_id']  # Assuming match_id is the column name for game ID
        timestamp_next_event = row['next_timestamp']
        
        video_path = f"{video_folder}/g{game_id}-hd.mp4"
        #print(f'Processing Event ID: {event_id}, Game ID: {game_id}, Video Path: {video_path}')
        
        # Extract timestamps for the current event_id
        timestamp_start, timestamp_end, timestamp_event = extract_frames(video_path, df, event_id)
        
        # Append timestamps to DataFrame
        timestamps_df = timestamps_df.append({
            'EventID': event_id,
            'TimestampStart': timestamp_start,
            'TimestampEnd': timestamp_end,
            'TimestampEvent': timestamp_event,
            'TimestampNextEvent': timestamp_next_event
        }, ignore_index=True)
        
        print(f'Timestamps for Event ID {event_id}: Start={timestamp_start}, End={timestamp_end}, Event={timestamp_event}')
    
    return timestamps_df

In [None]:
shot_following = df[df['primary_type'] == 'shot']
pass_following = df[df['primary_type'] == 'pass']
duel_following = df[df['primary_type'] == 'duel']
interception_following = df[df['primary_type'] == 'interception']
touch_following = df[df['primary_type'] == 'touch']

video_folder = config['data_paths']['video_directory']

results_shot = process_events(shot_following, video_folder)
results_shot.to_csv('results_shot.csv', index=False)
results_pass = process_events(pass_following, video_folder)
results_pass.to_csv('results_pass.csv', index=False)
results_duel = process_events(duel_following, video_folder)
results_duel.to_csv('results_pass.csv', index=False)
results_touch = process_events(touch_following, video_folder)
results_touch.to_csv('results_touch.csv', index=False)
results_interception = process_events(interception_following, video_folder)
results_interception.to_csv('results_interception', index=False)
