In [15]:
%load_ext autoreload
%autoreload 2

from datetime import datetime, timedelta

import numpy as np
import pandas as pd
from tqdm import tqdm

from sync import config, elastic, etsy
from sync.preprocessor import Preprocessor, find_spadl_event_types

pd.set_option('display.width', 250)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 30)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing original data

In [10]:
game_ids = ["8qhuektrx8cmkxs11lxsdd4pg", "8t53c07vfe5vmg9jm0w7pq2vo", "9gewka7f25bz12mqrfm7ygjro"]

lineups = pd.read_parquet("data/ajax/lineup/line_up.parquet")
events = pd.read_parquet("data/ajax/event/event.parquet")
events["utc_timestamp"] = pd.to_datetime(events["utc_timestamp"])
events = find_spadl_event_types(events)

data_dict = dict()

for game_id in tqdm(game_ids):
    game_lineup = lineups.loc[lineups["stats_perform_match_id"] == game_id].set_index("player_id")
    game_events = events[
        (events["stats_perform_match_id"] == game_id)
        & (events["spadl_type"].notna())
        & (events["player_id"].notna())
    ].copy()

    traces = pd.read_parquet(f"data/ajax/tracking/{game_id}.parquet")

    proc = Preprocessor(game_lineup, game_events, traces)
    input_events = proc.format_events_for_syncer()
    input_traces = proc.format_traces_for_syncer()
    start_utc = proc.traces.at[0, "utc_timestamp"]

    data_dict[game_id] = {"start_utc": start_utc, "events": input_events, "traces": input_traces}

100%|██████████| 3/3 [00:07<00:00,  2.57s/it]


### Aligning indices of event data with ground truth annotation

Considering that the annotated event data includes annotation of false positives and missing values, the following code aligns the indices of input events with annotated ones for evaluating accuracy.

If you do not have annotated events for evaluation or if your annotated events already have the same indices with the input event data, you can skip the following cell.

In [8]:
def time_str_to_frame(t: str, fps=25) -> float:
    return round((float(t[:2]) * 60 + float(t[3:])) * fps) if isinstance(t, str) else np.nan

def frame_to_utc_timestamp(frame: float, start_utc: datetime, fps=25) -> datetime:
    return start_utc + timedelta(seconds=frame / fps) if not np.isnan(frame) else np.nan

def get_event_attr(event: pd.Series, traces: pd.DataFrame, col="x"):
    return input_traces[input_traces["ball"]].set_index("frame").at[event["frame"], col]

In [None]:
for game_id in game_ids:
    start_utc = data_dict[game_id]["start_utc"]
    input_events = data_dict[game_id]["events"]
    input_traces = data_dict[game_id]["traces"]

    data_path = f"data/ajax/event_corrected/{game_id}.csv"
    drop_cols = ["next_player_id", "next_type", "receive_frame", "note"]
    annotated = pd.read_csv(data_path, header=0).drop(drop_cols, axis=1)
    annotated.loc[annotated["spadl_type"] == "ball_touch", "spadl_type"] = "bad_touch"
    annotated = annotated[(annotated["period_id"] == 1) & (annotated["spadl_type"] != "shield_ball_oop")].copy()

    annotated["frame"] = annotated["synced_ts"].apply(time_str_to_frame)
    annotated["receive_frame"] = annotated["receive_ts"].apply(time_str_to_frame)

    period_events = input_events[input_events["period_id"] == 1]
    annotated.loc[annotated["error_type"] != "missing", "utc_timestamp"] = period_events["utc_timestamp"].values
    annotated.loc[annotated["error_type"] != "missing", "start_x"] = period_events["start_x"].values
    annotated.loc[annotated["error_type"] != "missing", "start_y"] = period_events["start_y"].values
    annotated["utc_timestamp"] = pd.to_datetime(annotated["utc_timestamp"])

    missing = annotated[annotated["error_type"] == "missing"].copy()
    annotated.loc[missing.index, "utc_timestamp"] = missing["frame"].apply(frame_to_utc_timestamp, args=(start_utc,))
    annotated.loc[missing.index, "start_x"] = missing.apply(get_event_attr, traces=input_traces, col="x", axis=1)
    annotated.loc[missing.index, "start_y"] = missing.apply(get_event_attr, traces=input_traces, col="y", axis=1)

    data_dict[game_id]["annotated_events"] = annotated

### Calculating the accuracy of ELASTIC

In [22]:
def calc_accuracy(synced: pd.DataFrame, annotated: pd.DataFrame, include_receive: bool = True):
    pass_like_dict = {x: "pass_like" for x in config.PASS_LIKE_OPEN}
    set_piece_dict = {x: "set_piece" for x in config.SET_PIECE}
    incoming_dict = {x: "incoming" for x in config.INCOMING}
    minor_dict = {x: "minor" for x in config.MINOR}
    event_cats = pass_like_dict | set_piece_dict | incoming_dict | minor_dict
    synced["event_cat"] = synced["spadl_type"].map(event_cats)
    synced["event_cat"].value_counts()

    true_frames_s = annotated.loc[annotated["error_type"] != "false_positive", "frame"].round().values
    true_frames_r = annotated.loc[annotated["error_type"] != "false_positive", "receive_frame"].round().values

    acc_counts = dict()

    for cat in ["pass_like", "set_piece", "incoming", "minor"]:
        cat_events = synced[synced["event_cat"] == cat]
        cat_true_frames = true_frames_s[cat_events.index]
        
        cat_acc = dict()
        cat_acc["total"] = len(cat_events)
        cat_acc["mean_diff"] = (cat_events["frame"] - cat_true_frames).abs().mean()
        cat_acc["exact"] = (cat_events["frame"] == cat_true_frames).astype(int).sum()
        cat_acc["within_5"] = ((cat_events["frame"] - cat_true_frames).abs() <= 5).astype(int).sum()
        cat_acc["within_25"] = ((cat_events["frame"] - cat_true_frames).abs() <= 25).astype(int).sum()
        cat_acc["within_50"] = ((cat_events["frame"] - cat_true_frames).abs() <= 50).astype(int).sum()
        cat_acc["valid"] = (cat_events["frame"].notna()).astype(int).sum()
        acc_counts[cat] = cat_acc

    acc_counts = pd.DataFrame(acc_counts).T
    acc_counts.loc["event_start"] = acc_counts.sum(axis=0)
    acc_counts.at["event_start", "mean_diff"] = (synced["frame"] - true_frames_s).abs().mean()

    if "receive_frame" in synced.columns:
        receive_acc = dict()
        pass_like = annotated["spadl_type"].isin(config.PASS_LIKE_OPEN + config.SET_PIECE)
        receive_acc["total"] = len(annotated[(annotated["error_type"] != "false_positive") & pass_like])
        receive_acc["mean_diff"] = (synced['receive_frame'] - true_frames_r).abs().mean()
        receive_acc["exact"] = (synced["receive_frame"] == true_frames_r).astype(int).sum()
        receive_acc["within_5"] = ((synced["receive_frame"] - true_frames_r).abs() <= 5).astype(int).sum()
        receive_acc["within_25"] = ((synced["receive_frame"] - true_frames_r).abs() <= 25).astype(int).sum()
        receive_acc["within_50"] = ((synced["receive_frame"] - true_frames_r).abs() <= 50).astype(int).sum()
        receive_acc["valid"] = (synced["receive_frame"].notna()).astype(int).sum()
        acc_counts.loc["event_end"] = receive_acc

        acc_counts.loc["total"] = acc_counts.loc["event_start"] + acc_counts.loc["event_end"]
        sum_diff_s = (synced['frame'] - true_frames_s).abs().sum()
        sum_diff_r = (synced['receive_frame'] - true_frames_r).abs().sum()
        acc_counts.at["total", "mean_diff"] = (sum_diff_s + sum_diff_r) / acc_counts.at["total", "total"]

    int_cols = ["total", "exact", "within_5", "within_25", "within_50", "valid"]
    acc_counts[int_cols] = acc_counts[int_cols].astype(int)
    acc_rates = acc_counts.drop(["total", "mean_diff"], axis=1).div(acc_counts["total"], axis=0)

    return acc_counts, acc_rates

In [None]:
for game_id in game_ids:
    print()
    print(game_id)
    
    input_events: pd.DataFrame = data_dict[game_id]["events"]
    input_traces: pd.DataFrame = data_dict[game_id]["traces"]
    annotated: pd.DataFrame = data_dict[game_id]["annotated_events"]

    input_events = annotated.loc[annotated["error_type"] != "false_positive", input_events.columns].copy()
    input_events.reset_index(drop=True, inplace=True)

    syncer = elastic.ELASTIC(input_events, input_traces)
    syncer.run()

    data_dict[game_id]["synced_events"] = syncer.events


8qhuektrx8cmkxs11lxsdd4pg


Syncing major events in period 1: 100%|██████████| 672/672 [00:20<00:00, 33.19it/s]
Detecting receiving events: 100%|██████████| 600/600 [00:08<00:00, 71.15it/s]
Post-syncing minor events: 100%|██████████| 88/88 [00:01<00:00, 47.47it/s]



8t53c07vfe5vmg9jm0w7pq2vo


Syncing major events in period 1: 100%|██████████| 543/543 [00:15<00:00, 35.52it/s]
Detecting receiving events: 100%|██████████| 486/486 [00:06<00:00, 73.16it/s]
Post-syncing minor events: 100%|██████████| 94/94 [00:02<00:00, 36.13it/s]



9gewka7f25bz12mqrfm7ygjro


Syncing major events in period 1: 100%|██████████| 657/657 [00:20<00:00, 32.66it/s]
Detecting receiving events: 100%|██████████| 599/599 [00:08<00:00, 71.43it/s]
Post-syncing minor events: 100%|██████████| 77/77 [00:01<00:00, 44.64it/s]


In [14]:
synced = []
annotated = []

for game_id, data in data_dict.items():
    game_events = data["synced_events"]
    game_events["game_id"] = game_id
    synced.append(game_events)
    annotated.append(data["corrected_events"])

synced = pd.concat(synced, ignore_index=True)
annotated = pd.concat(annotated, ignore_index=True)

acc_counts, acc_rates = calc_accuracy(synced, annotated)
display(acc_counts)
display(acc_rates)

Unnamed: 0,total,mean_diff,exact,within_5,within_25,within_50,valid
pass_like,1590,0.566518,1519,1546,1561,1566,1571
set_piece,117,0.122807,108,113,114,114,114
incoming,168,6.662651,144,147,151,157,166
minor,259,11.75,115,174,201,233,248
event_start,2134,2.345879,1886,1980,2027,2070,2099
event_end,1707,1.605952,1579,1623,1645,1661,1680
total,3841,1.984379,3465,3603,3672,3731,3779


Unnamed: 0,exact,within_5,within_25,within_50,valid
pass_like,0.955346,0.972327,0.981761,0.984906,0.98805
set_piece,0.923077,0.965812,0.974359,0.974359,0.974359
incoming,0.857143,0.875,0.89881,0.934524,0.988095
minor,0.444015,0.671815,0.776062,0.899614,0.957529
event_start,0.883786,0.927835,0.949859,0.970009,0.983599
event_end,0.925015,0.950791,0.963679,0.973052,0.984183
total,0.902109,0.938037,0.956001,0.971362,0.983858


### Calculating the accuracy of ETSY

In [89]:
for game_id in game_ids:
    print()
    print(game_id)
    
    input_events: pd.DataFrame = data_dict[game_id]["events"]
    input_traces: pd.DataFrame = data_dict[game_id]["traces"]
    annotated: pd.DataFrame = data_dict[game_id]["annotated_events"]

    input_events = annotated.loc[annotated["error_type"] != "false_positive", input_events.columns].copy()
    input_events.reset_index(drop=True, inplace=True)

    syncer = etsy.ETSY(input_events, input_traces)
    syncer.run()

    data_dict[game_id]["synced_events"] = syncer.events


8qhuektrx8cmkxs11lxsdd4pg


Syncing events in period 1: 100%|██████████| 760/760 [00:23<00:00, 31.83it/s]



8t53c07vfe5vmg9jm0w7pq2vo


Syncing events in period 1: 100%|██████████| 637/637 [00:19<00:00, 33.11it/s]



9gewka7f25bz12mqrfm7ygjro


Syncing events in period 1: 100%|██████████| 734/734 [00:23<00:00, 31.90it/s]


In [90]:
synced = []
annotated = []

for game_id, data in data_dict.items():
    game_events = data["synced_events"]
    game_events["game_id"] = game_id
    synced.append(game_events)
    annotated.append(data["corrected_events"])

synced = pd.concat(synced, ignore_index=True)
annotated = pd.concat(annotated, ignore_index=True)

acc_counts, acc_rates = calc_accuracy(synced, annotated)
display(acc_counts)
display(acc_rates)

Unnamed: 0,total,mean_diff,exact,within_5,within_25,within_50,valid
pass_like,1590,14.053333,348,995,1242,1375,1500
set_piece,117,11.974138,68,103,107,109,116
incoming,168,24.18125,9,64,107,133,160
minor,259,18.382022,26,82,139,162,178
event_start,2134,15.153531,451,1244,1595,1779,1954


Unnamed: 0,exact,within_5,within_25,within_50,valid
pass_like,0.218868,0.625786,0.781132,0.86478,0.943396
set_piece,0.581197,0.880342,0.91453,0.931624,0.991453
incoming,0.053571,0.380952,0.636905,0.791667,0.952381
minor,0.100386,0.316602,0.53668,0.625483,0.687259
event_start,0.21134,0.582943,0.747423,0.833646,0.915651


### Ablation study about the synchronization order

##### (1) Synchronizing ALL events before detecting receptions

In [None]:
args = {"fps": 25, "post_sync_types": []}

for game_id in game_ids:
    print()
    print(game_id)
    
    input_events: pd.DataFrame = data_dict[game_id]["events"]
    input_traces: pd.DataFrame = data_dict[game_id]["traces"]
    annotated: pd.DataFrame = data_dict[game_id]["annotated_events"]

    input_events = annotated.loc[annotated["error_type"] != "false_positive", input_events.columns].copy()
    input_events.reset_index(drop=True, inplace=True)

    syncer = elastic.ELASTIC(input_events, input_traces, args)
    syncer.run()

    data_dict[game_id]["synced_events"] = syncer.events


8qhuektrx8cmkxs11lxsdd4pg


Syncing major events in period 1: 100%|██████████| 760/760 [00:23<00:00, 32.76it/s]
Detecting receiving events: 100%|██████████| 588/588 [00:08<00:00, 73.07it/s]



8t53c07vfe5vmg9jm0w7pq2vo


Syncing major events in period 1: 100%|██████████| 637/637 [00:19<00:00, 32.20it/s]
Detecting receiving events: 100%|██████████| 476/476 [00:06<00:00, 72.66it/s]



9gewka7f25bz12mqrfm7ygjro


Syncing major events in period 1: 100%|██████████| 734/734 [00:21<00:00, 34.68it/s] 
Detecting receiving events: 100%|██████████| 540/540 [00:07<00:00, 72.55it/s]


In [24]:
synced = []
annotated = []

for game_id, data in data_dict.items():
    game_events = data["synced_events"]
    game_events["game_id"] = game_id
    synced.append(game_events)
    annotated.append(data["corrected_events"])

synced = pd.concat(synced, ignore_index=True)
annotated = pd.concat(annotated, ignore_index=True)

acc_counts, acc_rates = calc_accuracy(synced, annotated)
display(acc_counts)
display(acc_rates)

Unnamed: 0,total,mean_diff,exact,within_5,within_25,within_50,valid
pass_like,1590,0.569993,1434,1469,1484,1488,1493
set_piece,117,0.126126,105,110,111,111,111
incoming,168,7.455696,133,137,140,149,158
minor,259,48.908367,82,142,177,223,251
event_start,2134,7.113264,1754,1858,1912,1971,2013
event_end,1707,1.678549,1488,1543,1565,1580,1599
total,3841,4.426712,3242,3401,3477,3551,3612


Unnamed: 0,exact,within_5,within_25,within_50,valid
pass_like,0.901887,0.923899,0.933333,0.935849,0.938994
set_piece,0.897436,0.940171,0.948718,0.948718,0.948718
incoming,0.791667,0.815476,0.833333,0.886905,0.940476
minor,0.316602,0.548263,0.683398,0.861004,0.969112
event_start,0.821931,0.870665,0.89597,0.923618,0.943299
event_end,0.871705,0.903925,0.916813,0.9256,0.936731
total,0.844051,0.885446,0.905233,0.924499,0.94038


##### (2) Synchronizing only PASS-LIKE and SET-PIECE events before detecting receptions

In [None]:
args = {"fps": 25, "post_sync_types": config.MINOR + config.INCOMING}

for game_id in game_ids:
    print()
    print(game_id)
    
    input_events: pd.DataFrame = data_dict[game_id]["events"]
    input_traces: pd.DataFrame = data_dict[game_id]["traces"]
    annotated: pd.DataFrame = data_dict[game_id]["annotated_events"]

    input_events = annotated.loc[annotated["error_type"] != "false_positive", input_events.columns].copy()
    input_events.reset_index(drop=True, inplace=True)

    syncer = elastic.ELASTIC(input_events, input_traces, args)
    syncer.run()

    data_dict[game_id]["synced_events"] = syncer.events


8qhuektrx8cmkxs11lxsdd4pg


Syncing major events in period 1: 100%|██████████| 607/607 [00:18<00:00, 33.57it/s]
Detecting receiving events: 100%|██████████| 600/600 [00:08<00:00, 71.86it/s]
Post-syncing minor events: 100%|██████████| 153/153 [00:02<00:00, 52.52it/s]



8t53c07vfe5vmg9jm0w7pq2vo


Syncing major events in period 1: 100%|██████████| 492/492 [00:13<00:00, 35.25it/s]
Detecting receiving events: 100%|██████████| 488/488 [00:06<00:00, 72.74it/s]
Post-syncing minor events: 100%|██████████| 145/145 [00:02<00:00, 56.24it/s]



9gewka7f25bz12mqrfm7ygjro


Syncing major events in period 1: 100%|██████████| 605/605 [00:17<00:00, 33.78it/s]
Detecting receiving events: 100%|██████████| 600/600 [00:08<00:00, 71.03it/s]
Post-syncing minor events: 100%|██████████| 129/129 [00:02<00:00, 50.03it/s]


In [28]:
synced = []
annotated = []

for game_id, data in data_dict.items():
    game_events = data["synced_events"]
    game_events["game_id"] = game_id
    synced.append(game_events)
    annotated.append(data["corrected_events"])

synced = pd.concat(synced, ignore_index=True)
annotated = pd.concat(annotated, ignore_index=True)

acc_counts, acc_rates = calc_accuracy(synced, annotated)
display(acc_counts)
display(acc_rates)

Unnamed: 0,total,mean_diff,exact,within_5,within_25,within_50,valid
pass_like,1590,0.476493,1502,1550,1565,1570,1574
set_piece,117,0.122807,108,113,114,114,114
incoming,168,35.452632,24,34,50,67,95
minor,259,13.74477,108,160,185,221,239
event_start,2134,3.66815,1742,1857,1914,1972,2022
event_end,1707,10.079026,1483,1542,1576,1607,1683
total,3841,6.347305,3225,3399,3490,3579,3705


Unnamed: 0,exact,within_5,within_25,within_50,valid
pass_like,0.944654,0.974843,0.984277,0.987421,0.989937
set_piece,0.923077,0.965812,0.974359,0.974359,0.974359
incoming,0.142857,0.202381,0.297619,0.39881,0.565476
minor,0.416988,0.617761,0.714286,0.853282,0.92278
event_start,0.816307,0.870197,0.896907,0.924086,0.947516
event_end,0.868776,0.903339,0.923257,0.941418,0.98594
total,0.839625,0.884926,0.908618,0.931789,0.964593
