In [2]:
%load_ext autoreload
%autoreload 2

import os, json

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from matplotlib import animation
%matplotlib inline

from dataset import SportsDataset
from datatools.trace_animator import TraceAnimator
from datatools.trace_helper import TraceHelper
from datatools.visualize_helper import VisualizeHelper
from datatools.nba_helper import NBADataHelper, NBADataAnimator
from datatools.nfl_helper import NFLDataHelper
from models import load_model
from models.utils import get_dataset_config, print_helper, reshape_tensor, sort_players

from models.graph_imputer.graph_imputer import BidirectionalGraphImputer

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Model evaluating on test data

### Load model

In [3]:
device = "cuda:0"
trial = 1357
save_path = f"saved/{trial:03d}"

with open(f"{save_path}/params.json", "r") as f:
    params = json.load(f)

if params["model"] == "nrtsi":
    model = load_model(params["model"], params).to(device)

    gap_models = dict()
    gap_models[1] = f"{save_path}/model/nrtsi_state_dict_best_gap_1.pt"
    gap_models[2] = f"{save_path}/model/nrtsi_state_dict_best_gap_2.pt"
    gap_models[4] = f"{save_path}/model/nrtsi_state_dict_best_gap_4.pt"
    gap_models[8] = f"{save_path}/model/nrtsi_state_dict_best_gap_8.pt"
    gap_models[16] = f"{save_path}/model/nrtsi_state_dict_best_gap_16.pt"

    for k in gap_models:
        gap_models[k] = torch.load(gap_models[k], map_location=lambda storage, _: storage)
else:
    state_dict = torch.load(
        f"{save_path}/model/{params['model']}_state_dict_best.pt",
        map_location=lambda storage, _: storage,
    )

    model = load_model(params["model"], params).to(device)
    model.load_state_dict(state_dict)

In [4]:
sports = params["dataset"]
model_type = params["model"]
naive_baselines = True

if model_type == "dbhp":
    deriv_accum = params["deriv_accum"]
    dynamic_hybrid = params["dynamic_hybrid"]

print(f"- Sports: {sports}")
print(f"- Model type: {model_type}")
print(f"- Compute stats for naive baselines: {naive_baselines}")

- Sports: soccer
- Model type: latentode
- Compute stats for naive baselines: True


In [5]:
metrica_files = ["match1.csv", "match2.csv", "match3_valid.csv", "match3_test.csv"]
metrica_paths = [f"data/metrica_traces/{f}" for f in metrica_files]

nba_files = os.listdir("data/nba_traces")
nba_paths = [f"data/nba_traces/{f}" for f in nba_files]
nba_paths.sort()

nfl_files = os.listdir("data/nfl_traces")
nfl_paths = [f"data/nfl_traces/{f}" for f in nfl_files if f.endswith(".csv")]
nfl_paths.sort()

if sports == "soccer":
    trace_helper = TraceHelper
    test_data_paths = metrica_paths[3:4]
elif sports == "basketball":
    trace_helper = NBADataHelper
    test_data_paths = nba_paths[90:]
else: # e.g. "American football"
    trace_helper = NFLDataHelper
    test_data_paths = nfl_paths[0:1]

print(f"Test data paths: {test_data_paths}")

Test data paths: ['data/metrica_traces/match3_test.csv']


### Function for testing a trial and printing performance statistics

In [69]:
def print_stats(trial, model, params, sports="soccer", naive_baselines=True):
    sports = params["dataset"]
    model_type = params["model"]
    # naive_baselines = True

    if model_type == "dbhp":
        deriv_accum = params["deriv_accum"]
        dynamic_hybrid = params["dynamic_hybrid"]

    print(f"- Sports: {sports}")
    print(f"- Model type: {model_type}")
    print(f"- Compute stats for naive baselines: {naive_baselines}")

    metrica_files = ["match1.csv", "match2.csv", "match3_valid.csv", "match3_test.csv"]
    metrica_paths = [f"data/metrica_traces/{f}" for f in metrica_files]

    nba_files = os.listdir("data/nba_traces")
    nba_paths = [f"data/nba_traces/{f}" for f in nba_files]
    nba_paths.sort()

    nfl_files = os.listdir("data/nfl_traces")
    nfl_paths = [f"data/nfl_traces/{f}" for f in nfl_files if f.endswith(".csv")]
    nfl_paths.sort()

    if sports == "soccer":
        trace_helper = TraceHelper
        test_data_paths = metrica_paths[3:4]
    elif sports == "basketball":
        trace_helper = NBADataHelper
        test_data_paths = nba_paths[90:]
    else: # e.g. "American football"
        trace_helper = NFLDataHelper
        test_data_paths = nfl_paths[0:1]

    print(f"Test data paths: {test_data_paths}")
    print(f"\n---------- Trial {trial} ----------")

    pred_keys = ["pred"]
    if model_type == "dbhp":
        if model.params["deriv_accum"]:
            pred_keys += ["dap_f", "dap_b"]
        if model.params["dynamic_hybrid"]:
            pred_keys += ["hybrid_s", "hybrid_s2", "hybrid_d"]
    if naive_baselines:
        pred_keys += ["linear", "knn", "ffill"]

    stat_keys = ["total_frames", "missing_frames"]
    stat_keys += [f"{k}_{m}" for k in pred_keys for m in ["pe", "se", "sce", "ple"]]
    stats = {k: 0 for k in stat_keys}

    for path in test_data_paths:
        print()
        print(f"{path}:")
        match_traces = pd.read_csv(path, header=0, encoding="utf-8-sig")
        helper = trace_helper(traces=match_traces)

        if params["model"] == "nrtsi":
            match_ret, match_stats = helper.predict(
                model, dataset_type=sports, naive_baselines=naive_baselines, gap_models=gap_models
            )
        else:
            if sports != "soccer":
                match_ret, match_stats = helper.predict(model, dataset=sports)
            else:
                match_ret, match_stats = helper.predict(model, dataset_type=sports, naive_baselines=naive_baselines)
                

        for k, v in match_stats.items():
            stats[k] += v

    # print("Total Performance:")
    # print_helper(ret, pred_keys, trial=trial, save_txt=True)

    # torch.save(helper, f"{save_path}/helper")
    # torch.save(ret, f"{save_path}/df_dict")

    n_players, _ = get_dataset_config(sports)
    stats_df = pd.DataFrame(index=pred_keys, columns=["pe", "se", "sce", "ple"])

    for k, v in stats.items():
        if k in ["total_frames", "missing_frames"]:
            continue
        
        pred_key = "_".join(k.split("_")[:-1])
        metric = k.split("_")[-1]

        if metric in ["pe", "se"]:
            stats[k] = round(v / stats["missing_frames"], 6)
            
        elif metric in ["sce", "ple"]:
            stats[k] = round(v / (stats["total_frames"] * n_players), 6)

        stats_df.at[pred_key, metric] = stats[k]

    # print(f"Total frames: {stats['total_frames'] * n_players}")
    # print(f"Missing frames: {stats['missing_frames']}")
    print()
    print(f"Window size: {params['window_size']}")
    print(f"Missing pattern: {params['missing_pattern']}")
    print(f"Missing rate: {stats['missing_frames'] / (stats['total_frames'] * n_players):.4f}")
    # print(stats_df.loc[["pred", "dap_f", "dap_b", "hybrid_s2", "hybrid_d", "linear"], "pe"])
    print(stats_df.loc[["pred", "linear"], "pe"])

    return stats_df

In [70]:
# def print_stats(trial, model, params, sports="soccer", naive_baselines=True):
#     print(f"\n---------- Trial {trial} ----------")

#     pred_keys = ["pred"]
#     if model_type == "dbhp":
#         if model.params["deriv_accum"]:
#             pred_keys += ["dap_f", "dap_b"]
#         if model.params["dynamic_hybrid"]:
#             pred_keys += ["hybrid_s", "hybrid_s2", "hybrid_d"]
#     if naive_baselines:
#         pred_keys += ["linear", "knn", "ffill"]

#     stat_keys = ["total_frames", "missing_frames"]
#     stat_keys += [f"{k}_{m}" for k in pred_keys for m in ["pe", "se", "sce", "ple"]]
#     stats = {k: 0 for k in stat_keys}

#     for path in test_data_paths:
#         print()
#         print(f"{path}:")
#         match_traces = pd.read_csv(path, header=0, encoding="utf-8-sig")
#         helper = trace_helper(traces=match_traces)

#         if params["model"] == "nrtsi":
#             match_ret, match_stats = helper.predict(
#                 model, dataset_type=sports, naive_baselines=naive_baselines, gap_models=gap_models
#             )
#         else:
#             match_ret, match_stats = helper.predict(model, dataset_type=sports, naive_baselines=naive_baselines)

#         for k, v in match_stats.items():
#             stats[k] += v

#     # print("Total Performance:")
#     # print_helper(ret, pred_keys, trial=trial, save_txt=True)

#     # torch.save(helper, f"{save_path}/helper")
#     # torch.save(ret, f"{save_path}/df_dict")

#     n_players, _ = get_dataset_config(sports)
#     stats_df = pd.DataFrame(index=pred_keys, columns=["pe", "se", "sce", "ple"])

#     for k, v in stats.items():
#         if k in ["total_frames", "missing_frames"]:
#             continue
        
#         pred_key = "_".join(k.split("_")[:-1])
#         metric = k.split("_")[-1]

#         if metric in ["pe", "se"]:
#             stats[k] = round(v / stats["missing_frames"], 6)
            
#         elif metric in ["sce", "ple"]:
#             stats[k] = round(v / (stats["total_frames"] * n_players), 6)

#         stats_df.at[pred_key, metric] = stats[k]

#     # print(f"Total frames: {stats['total_frames'] * n_players}")
#     # print(f"Missing frames: {stats['missing_frames']}")
#     print()
#     print(f"Window size: {params['window_size']}")
#     print(f"Missing pattern: {params['missing_pattern']}")
#     print(f"Missing rate: {stats['missing_frames'] / (stats['total_frames'] * n_players):.4f}")
#     # print(stats_df.loc[["pred", "dap_f", "dap_b", "hybrid_s2", "hybrid_d", "linear"], "pe"])
#     print(stats_df.loc[["pred", "linear"], "pe"])

#     return stats_df

### Ablation study on Set Transformer architecture

In [71]:
params['window_size'] = 200
params['missing_pattern'] = "camera"
params['missing_rate'] = 0.5

In [72]:
pred_keys = ["pred"]
if model_type == "dbhp":
    if model.params["deriv_accum"]:
        pred_keys += ["dap_f", "dap_b"]
    if model.params["dynamic_hybrid"]:
        pred_keys += ["hybrid_s", "hybrid_s2", "hybrid_d"]
if naive_baselines:
    pred_keys += ["linear", "knn", "ffill"]

stat_keys = ["total_frames", "missing_frames"]
stat_keys += [f"{k}_{m}" for k in pred_keys for m in ["pe", "se", "sce", "ple"]]
stats = {k: 0 for k in stat_keys}

In [None]:
for path in test_data_paths:
    print()
    print(f"{path}:")
    match_traces = pd.read_csv(path, header=0, encoding="utf-8-sig")
    helper = trace_helper(traces=match_traces)


In [75]:
device = "cuda:0"
# trial_ids = [310, 320, 330, 331, 332, 220, 341, 342]
# trial_ids = [i for i in range(1358, 1364)]
trial_ids = [1357, 1360, 1361]
# mode = ['camera', 'playerwise', 'playerwise', 'playerwise', 'uniform', 'uniform', 'uniform']
mode = ['camera', 'playerwise', 'uniform']

for idx, trial in enumerate(trial_ids):
    save_path = f"saved/{trial:03d}"

    with open(f"{save_path}/params.json", "r") as f:
        params = json.load(f)

    state_dict = torch.load(
        f"{save_path}/model/{params['model']}_state_dict_best.pt",
        map_location=lambda storage, _: storage,
    )

    model = load_model(params["model"], params).to(device)
    model.load_state_dict(state_dict)

    team_size_dict = {"soccer": 11, "basketball": 5, "football": 3}
    model.params['team_size'] = team_size_dict[model.dataset]
    model.params['window_size'] = 200
    model.params['missing_pattern'] = mode[idx]
    model.params['missing_rate'] = 0.5
    model.missing_mode = mode[idx]

    print_stats(trial, model, params)

- Sports: soccer
- Model type: latentode
- Compute stats for naive baselines: True
Test data paths: ['data/metrica_traces/match3_test.csv']

---------- Trial 1357 ----------

data/metrica_traces/match3_test.csv:


  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
Phase 2: 100%|████████████████████████| 10/10 [00:12<00:00,  1.21s/it]
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
Phase 3: 100%|██████████████████████████| 2/2 [00:01<00:00,  1.42it/s]
Phase 4: 0it [00:00, ?it/s]
Phase 5: 0it [00:00, ?it/s]
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
Phase 6: 100%|██████████████████████████| 1/1 [00:02<00:00,  2.39s/it]
  time_gap = torch.tens


Window size: 200
Missing pattern: camera
Missing rate: 0.5375
pred      20.772954
linear     3.166009
Name: pe, dtype: object
- Sports: soccer
- Model type: latentode
- Compute stats for naive baselines: True
Test data paths: ['data/metrica_traces/match3_test.csv']

---------- Trial 1360 ----------

data/metrica_traces/match3_test.csv:


  time_gap = torch.tensor(time_gap, dtype = torch.float32).unsqueeze(0)
Phase 2:   0%|                                 | 0/10 [00:00<?, ?it/s]


RuntimeError: expand(torch.FloatTensor{[1, 1, 253, 132]}, size=[1, -1, -1]): the number of sizes provided (3) must be greater or equal to the number of dimensions in the tensor (4)

In [77]:
device = "cuda:0"
# trial_ids = [310, 320, 330, 331, 332, 220, 341, 342]
# trial_ids = [i for i in range(1358, 1364)]
trial_ids = [1360, 1361]
# mode = ['camera', 'playerwise', 'playerwise', 'playerwise', 'uniform', 'uniform', 'uniform']
mode = ['playerwise', 'uniform']

for idx, trial in enumerate(trial_ids):
    save_path = f"saved/{trial:03d}"

    with open(f"{save_path}/params.json", "r") as f:
        params = json.load(f)

    state_dict = torch.load(
        f"{save_path}/model/{params['model']}_state_dict_best.pt",
        map_location=lambda storage, _: storage,
    )

    model = load_model(params["model"], params).to(device)
    model.load_state_dict(state_dict)

    team_size_dict = {"soccer": 11, "basketball": 5, "football": 3}
    model.params['team_size'] = team_size_dict[model.dataset]
    model.params['window_size'] = 200
    model.params['missing_pattern'] = mode[idx]
    model.params['missing_rate'] = 0.5
    model.missing_mode = mode[idx]

    print_stats(trial, model, params)

- Sports: soccer
- Model type: latentode
- Compute stats for naive baselines: True
Test data paths: ['data/metrica_traces/match3_test.csv']

---------- Trial 1360 ----------

data/metrica_traces/match3_test.csv:


  
  
  
  
  
  
  
  
  
Phase 2: 100%|████████████████████████| 10/10 [00:13<00:00,  1.32s/it]
  
  
Phase 3: 100%|██████████████████████████| 2/2 [00:01<00:00,  1.47it/s]
Phase 4: 0it [00:00, ?it/s]
Phase 5: 0it [00:00, ?it/s]
  
Phase 6: 100%|██████████████████████████| 1/1 [00:02<00:00,  2.30s/it]
  
  
Phase 7: 100%|██████████████████████████| 2/2 [00:02<00:00,  1.06s/it]
  
  
  
Phase 8: 100%|██████████████████████████| 3/3 [00:03<00:00,  1.32s/it]
  
  
  
  
Phase 9: 100%|██████████████████████████| 4/4 [00:06<00:00,  1.65s/it]
  
  
  
  
  
  
Phase 10: 100%|█████████████████████████| 6/6 [00:05<00:00,  1.14it/s]
  
  
  
  
  
  
  
Phase 11: 100%|█████████████████████████| 7/7 [00:07<00:00,  1.08s/it]



Window size: 200
Missing pattern: playerwise
Missing rate: 0.5000
pred      20.527509
linear     5.411107
Name: pe, dtype: object
- Sports: soccer
- Model type: latentode
- Compute stats for naive baselines: True
Test data paths: ['data/metrica_traces/match3_test.csv']

---------- Trial 1361 ----------

data/metrica_traces/match3_test.csv:


Phase 2:   0%|                                 | 0/10 [00:00<?, ?it/s]


UnboundLocalError: local variable 'missing_len' referenced before assignment

In [78]:
device = "cuda:0"
# trial_ids = [310, 320, 330, 331, 332, 220, 341, 342]
# trial_ids = [i for i in range(1358, 1364)]
trial_ids = [1361]
# mode = ['camera', 'playerwise', 'playerwise', 'playerwise', 'uniform', 'uniform', 'uniform']
mode = ['uniform']

for idx, trial in enumerate(trial_ids):
    save_path = f"saved/{trial:03d}"

    with open(f"{save_path}/params.json", "r") as f:
        params = json.load(f)

    state_dict = torch.load(
        f"{save_path}/model/{params['model']}_state_dict_best.pt",
        map_location=lambda storage, _: storage,
    )

    model = load_model(params["model"], params).to(device)
    model.load_state_dict(state_dict)

    team_size_dict = {"soccer": 11, "basketball": 5, "football": 3}
    model.params['team_size'] = team_size_dict[model.dataset]
    model.params['window_size'] = 200
    model.params['missing_pattern'] = mode[idx]
    model.params['missing_rate'] = 0.5
    model.missing_mode = mode[idx]

    print_stats(trial, model, params)

- Sports: soccer
- Model type: latentode
- Compute stats for naive baselines: True
Test data paths: ['data/metrica_traces/match3_test.csv']

---------- Trial 1361 ----------

data/metrica_traces/match3_test.csv:


  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
Phase 2: 100%|████████████████████████| 10/10 [00:12<00:00,  1.24s/it]
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
Phase 3: 100%|██████████████████████████| 2/2 [00:01<00:00,  1.52it/s]
Phase 4: 0it [00:00, ?it/s]
Phase 5: 0it [00:00, ?it/s]
  time_gap = torch.tensor(time_gap, dtype = torch.float32)
Phase 6: 100%|██████████████████████████| 1/1 [00:02<00:00,  2.17s/it]
  time_gap = torch.tens


Window size: 200
Missing pattern: uniform
Missing rate: 0.4995
pred      20.710135
linear     4.172049
Name: pe, dtype: object





In [55]:
print(f"\n---------- Trial {trial} ----------")

pred_keys = ["pred"]
if model_type == "dbhp":
    if model.params["deriv_accum"]:
        pred_keys += ["dap_f", "dap_b"]
    if model.params["dynamic_hybrid"]:
        pred_keys += ["hybrid_s", "hybrid_s2", "hybrid_d"]
if naive_baselines:
    pred_keys += ["linear", "knn", "ffill"]

stat_keys = ["total_frames", "missing_frames"]
stat_keys += [f"{k}_{m}" for k in pred_keys for m in ["pe", "se", "sce", "ple"]]
stats = {k: 0 for k in stat_keys}


---------- Trial 1358 ----------


In [56]:
path = test_data_paths[0]
print()
print(f"{path}:")
match_traces = pd.read_csv(path, header=0, encoding="utf-8-sig")
helper = trace_helper(traces=match_traces)


data/metrica_traces/match3_test.csv:


In [57]:
players = helper.team1_players + helper.team2_players

In [20]:
import random

model_type = model.params["model"]
random.seed(1000)

feature_types = ["_x", "_y", "_vx", "_vy", "_ax", "_ay"][: model.params["n_features"]]

players = helper.team1_players + helper.team2_players
player_cols = [f"{p}{x}" for p in players for x in feature_types]

pred_keys = ["pred"]
if model_type == "dbhp":
    if model.params["deriv_accum"]:
        pred_keys += ["dap_f", "dap_b"]
    if model.params["dynamic_hybrid"]:
        pred_keys += ["hybrid_s", "hybrid_s2", "hybrid_d"]
if naive_baselines:
    pred_keys += ["linear", "knn", "ffill"]

stat_keys = ["total_frames", "missing_frames"]
stat_keys += [f"{k}_{m}" for k in pred_keys for m in ["pe", "se", "sce", "ple"]]

stats = {k: 0 for k in stat_keys}

# initialize resulting DataFrames
ret = dict()
ret["target"] = helper.traces.copy(deep=True)
ret["mask"] = pd.DataFrame(-1, index=helper.traces.index, columns=player_cols)
for k in pred_keys:
    ret[k] = helper.traces.copy(deep=True)

if model_type == "dbhp" and model.params["dynamic_hybrid"]:
    lambda_cols = [f"{p}{w}" for p in players for w in ["_w0", "_w1", "_w2"]]
    ret["lambdas"] = pd.DataFrame(-1, index=helper.traces.index, columns=lambda_cols)

x_cols = [c for c in helper.traces.columns if c.endswith("_x")]
y_cols = [c for c in helper.traces.columns if c.endswith("_y")]

if model.params["normalize"]:
    helper.traces[x_cols] /= helper.pitch_size[0]
    helper.traces[y_cols] /= helper.pitch_size[1]
    helper.pitch_size = (1, 1)

for phase in helper.traces["phase"].unique():
    phase_traces = helper.traces[helper.traces["phase"] == phase]

    phase_gks = SportsDataset.detect_goalkeepers(phase_traces)
    team1_code, team2_code = phase_gks[0][0], phase_gks[1][0]

    phase_player_cols = phase_traces[player_cols].dropna(axis=1).columns
    team1_cols = [c for c in phase_player_cols if c.startswith(team1_code)]
    team2_cols = [c for c in phase_player_cols if c.startswith(team2_code)]
    ball_cols = ["ball_x", "ball_y"]

    # reorder teams so that the left team comes first
    phase_player_cols = team1_cols + team2_cols

    if min(len(team1_cols), len(team2_cols)) < model.params["n_features"] * model.params["team_size"]:
        continue

    episodes = [e for e in phase_traces["episode"].unique() if e > 0]


In [22]:
e = episodes[0]
min_episode_size=100

ep_traces = phase_traces[phase_traces["episode"] == e]
if len(ep_traces) < min_episode_size:
    print('error')

ep_player_traces = torch.FloatTensor(ep_traces[phase_player_cols].values)
ep_ball_traces = torch.FloatTensor(ep_traces[ball_cols].values)


In [None]:
with torch.no_grad():
                    ep_ret, ep_stats = TraceHelper.predict_episode(
                        model,
                        dataset_type,
                        ep_player_traces,
                        ep_ball_traces,
                        pred_keys=pred_keys,
                        window_size=model.params["window_size"],
                        min_window_size=min_episode_size,
                        naive_baselines=naive_baselines,
                        gap_models=gap_models,
                    )

In [26]:
dataset_type = sports
window_size=model.params["window_size"]
min_window_size=min_episode_size

model_type = model.params["model"]
device = next(model.parameters()).device

player_traces = ep_player_traces.unsqueeze(0).to(device)  # [1, time, x]
# target_traces = input_traces.clone()
if dataset_type == "soccer":
    ball_traces = ep_ball_traces.unsqueeze(0).to(device)  # [1, time, 2]

if dataset_type == "afootball":
    out_dim = model.params["team_size"] * model.params["n_features"]
    out_xy_dim = model.params["team_size"] * 2
else:
    out_dim = 2 * model.params["team_size"] * model.params["n_features"]
    out_xy_dim = 2 * model.params["team_size"] * 2

seq_len = player_traces.shape[1]

ret = dict()
ret["input"] = torch.zeros(seq_len, out_dim)
ret["target"] = torch.zeros(seq_len, out_dim)
ret["mask"] = -torch.ones(seq_len, out_dim)

for k in pred_keys:
    if k == "pred":
        ret["pred"] = torch.zeros(seq_len, out_dim)  # [time, players * feats]
    else:
        ret[k] = torch.zeros(seq_len, out_xy_dim)  # [time, players * 2]

if model_type == "dbhp" and model.params["dynamic_hybrid"]:
    if dataset_type == "afootball":
        ret["lambdas"] = torch.zeros(seq_len, model.params["team_size"] * 3)
    else:
        ret["lambdas"] = torch.zeros(seq_len, (model.params["team_size"] * 2) * 3)

# if model.params["missing_pattern"] == "camera":
#     n_windows = 1

if player_traces.shape[1] % window_size < min_window_size:
    n_windows = player_traces.shape[1] // window_size
else:
    n_windows = player_traces.shape[1] // window_size + 1


In [29]:
i = 0
i_from = window_size * i
i_to = window_size * (i + 1) if i < n_windows - 1 else player_traces.shape[1]

window_player_traces = player_traces[:, i_from:i_to]
if dataset_type == "soccer":  # For simulated camera view
    window_ball_traces = ball_traces[:, i_from:i_to]

# Run model
if dataset_type == "soccer":
    window_inputs = [window_player_traces, window_ball_traces]
else:
    window_inputs = [window_player_traces]


In [37]:
if len(window_inputs) == 2: #Soccer 
    x, ball = window_inputs
else:
    x = window_inputs[0]
    ball = []


input_dict = {"target" : x, "ball" : ball}

In [38]:
ret = dict()
bs, seq_len, feat_dim = x.shape
x = x.to(model.device)


In [39]:
from models.utils import *

In [40]:
n_players, _ = get_dataset_config(sports)
player_data = input_dict["target"]  # [bs, time, players * feats]

# compute the length of each sequence without padding
if player_data.is_cuda:
    valid_frames = np.array(player_data.cpu()[..., 0] != -100).astype(int).sum(axis=-1)  # [bs]
else:
    valid_frames = np.array(player_data[..., 0] != -100).astype(int).sum(axis=-1)  # [bs]


In [41]:
player_data, ball_data = input_dict["target"].clone().cpu(), input_dict["ball"].clone().cpu()
player_pos = reshape_tensor(player_data, upscale=True, dataset_type=sports)  # [bs, time, players, 2]
ball_pos = normalize_tensor(ball_data, mode="upscale", dataset_type=sports)

if player_data.is_cuda:
    is_pad = np.array(player_data.cpu()[..., :1] == -100).astype(int)
else:
    is_pad = np.array(player_data[..., :1] == -100).astype(int)


In [42]:
camera_vertices = compute_camera_coverage(ball_pos)
mask = is_inside(camera_vertices, player_pos)  # [bs, time, players]
mask = (1 - is_pad) * mask + is_pad


In [43]:
mask[:, :5, :] = 1
for i in range(mask.shape[0]):
    mask[i, valid_frames[i] - 5 :] = 1

missing_rate = ((1 - is_pad) * (1 - mask)).sum() / ((1 - is_pad).sum() * n_players)


In [44]:
mask.shape

(1, 200, 22)

In [45]:
model.missing_mode

'camera'

In [48]:
window_ret = model.forward(window_inputs, mode="test", device=device)

torch.Size([1, 200, 132]) torch.Size([1, 200, 2])


  time_gap = torch.tensor(time_gap, dtype = torch.float32)


In [33]:
def get_dataset_config(dataset):
    """
    players : number of total players contained each dataset
    ps : (width, height) pitch sizes
    """
    if dataset == "soccer":
        players = 22
        ps = (108, 72)
    elif dataset == "basketball":
        players = 10
        ps = (28.65, 15.24)
    elif dataset == "afootball":
        players = 6
        ps = (110, 49)
        # ps = (1, 1)

    return players, ps


In [None]:
if params["model"] == "nrtsi":
    match_ret, match_stats = helper.predict(
        model, dataset_type=sports, naive_baselines=naive_baselines, gap_models=gap_models
    )
else:
    match_ret, match_stats = helper.predict(model, dataset_type=sports, naive_baselines=naive_baselines)

for k, v in match_stats.items():
    stats[k] += v

# print("Total Performance:")
# print_helper(ret, pred_keys, trial=trial, save_txt=True)

# torch.save(helper, f"{save_path}/helper")
# torch.save(ret, f"{save_path}/df_dict")

n_players, _ = get_dataset_config(sports)
stats_df = pd.DataFrame(index=pred_keys, columns=["pe", "se", "sce", "ple"])

for k, v in stats.items():
if k in ["total_frames", "missing_frames"]:
    continue

pred_key = "_".join(k.split("_")[:-1])
metric = k.split("_")[-1]

if metric in ["pe", "se"]:
    stats[k] = round(v / stats["missing_frames"], 6)
    
elif metric in ["sce", "ple"]:
    stats[k] = round(v / (stats["total_frames"] * n_players), 6)

stats_df.at[pred_key, metric] = stats[k]

# print(f"Total frames: {stats['total_frames'] * n_players}")
# print(f"Missing frames: {stats['missing_frames']}")
print()
print(f"Window size: {params['window_size']}")
print(f"Missing pattern: {params['missing_pattern']}")
print(f"Missing rate: {stats['missing_frames'] / (stats['total_frames'] * n_players):.4f}")
print(stats_df.loc[["pred", "dap_f", "dap_b", "hybrid_s2", "hybrid_d", "linear"], "pe"])



### Ablation study on window size and missing rate

In [64]:
device = "cuda:0"
# trial_ids = np.sort([int(i) for i in os.listdir("saved") if int(i) >= 200 and int(i) < 250])
trial_ids = [205]

for trial in trial_ids:
    save_path = f"saved/{trial:03d}"

    with open(f"{save_path}/params.json", "r") as f:
        params = json.load(f)

    state_dict = torch.load(
        f"{save_path}/model/{params['model']}_state_dict_best.pt",
        map_location=lambda storage, _: storage,
    )

    model = load_model(params["model"], params).to(device)
    model.load_state_dict(state_dict)

    print_stats(trial, model, params)


---------- Trial 205 ----------

data/metrica_traces/match3_test.csv:


Phase 2: 100%|██████████| 10/10 [00:05<00:00,  1.71it/s]
Phase 3: 100%|██████████| 2/2 [00:00<00:00,  2.79it/s]
Phase 4: 0it [00:00, ?it/s]
Phase 5: 0it [00:00, ?it/s]
Phase 6: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
Phase 7: 100%|██████████| 2/2 [00:00<00:00,  2.07it/s]
Phase 8: 100%|██████████| 3/3 [00:01<00:00,  1.65it/s]
Phase 9: 100%|██████████| 4/4 [00:03<00:00,  1.12it/s]
Phase 10: 100%|██████████| 6/6 [00:02<00:00,  2.06it/s]
Phase 11: 100%|██████████| 7/7 [00:04<00:00,  1.66it/s]


Window size: 50
Missing pattern: playerwise
Missing rate: 0.9000
pred         5.267954
dap_f        1.723606
dap_b        1.748702
hybrid_s2     0.54647
hybrid_d     0.477616
linear       1.501628
Name: pe, dtype: object





In [18]:
self = helper
ep_traces = self.traces[helper.traces["episode"] == 36]

feature_types = ["_x", "_y", "_vx", "_vy", "_ax", "_ay"]
players = self.team1_players + self.team2_players
player_cols = [f"{p}{x}" for p in players for x in feature_types]

phase_gks = SportsDataset.detect_goalkeepers(ep_traces)
team1_code, team2_code = phase_gks[0][0], phase_gks[1][0]

ep_player_cols = ep_traces[player_cols].dropna(axis=1).columns
team1_cols = [c for c in ep_player_cols if c.startswith(team1_code)]
team2_cols = [c for c in ep_player_cols if c.startswith(team2_code)]
ball_cols = ["ball_x", "ball_y"]

ep_player_cols = team1_cols + team2_cols
ep_player_traces = torch.FloatTensor(ep_traces[ep_player_cols].values).unsqueeze(0)
ep_player_traces.shape

torch.Size([1, 253, 132])

In [20]:
bs, seq_len = ep_player_traces.shape[:2]
tensor = ep_player_traces.reshape(bs, seq_len, 22, -1)

x = tensor[..., 0:1]  # [bs, time, players, 1]
y = tensor[..., 1:2]
xy = torch.cat([x, y], dim=-1)  # [bs, time, players, 2]

x_plus_y = torch.sum(xy, dim=-1)  # [bs, time, players]

sorted_tensor = tensor.clone()
sort_idxs = torch.zeros(bs, n_players, dtype=int)

x_plus_y[0, 0]

tensor([0.6058, 1.0526, 0.8447, 0.5932, 1.2615, 0.9120, 0.8317, 0.9120, 1.1420,
        0.7295, 0.7110, 1.4746, 1.0250, 1.2611, 1.5325, 0.9509, 1.1192, 1.3261,
        1.2470, 0.8331, 0.9942, 1.0473])

## Performance analysis

##### (1) Get Main model results

In [None]:
trial = 3003
save_path = f"saved/{trial:03d}"
if os.path.isfile(save_path + "/df_dict"):
    helper =  torch.load(save_path + "/helper")
    df_dict = torch.load(save_path + "/df_dict")
    with open(f"{save_path}/params.json", "r") as f:
        params = json.load(f)

##### (2) Add baseline model results

In [None]:
# trial_dict = {4000 : "brits", 5000 : "naomi", 214 : "nrtsi"} # Metrica
trial_dict = {4003 : "brits", 5001 : "naomi", 6001 : "nrtsi", 9996 : "graphimputer"} # NBA
for (t, model_type) in trial_dict.items():
    save_path = f"saved/{t:03d}"
    if os.path.isfile(save_path + "/df_dict"):
        df_dict_ = torch.load(save_path + "/df_dict")
        df_dict[f"{model_type}_df"] = df_dict_["pred"]

In [None]:
df_dict.keys()

### Animation

##### (1) Soccer Animator

In [None]:
helper.traces["episode"].unique()

In [None]:
i0 = 479
i1 = 873

animator = TraceAnimator(
    trace_dict={"main": df_dict["target"][i0:i1], "pred": df_dict["dbhp_df"][i0:i1]},
    mask = df_dict["mask"][i0:i1],
    show_episodes=True,
    show_events=False,
    show_frames=False,
    show_polygon=True,
    annot_cols=None,
)
anim = animator.run()

path = f"animations/trial_{trial}.mp4"

writer = animation.FFMpegWriter(fps=10)
anim.save(path, writer=writer)

##### (2) Basketball Animator

In [None]:
i0 = 326
i1 = 737
animator = NBADataAnimator(
    trace_dict={"main": df_dict["target"][i0:i1], "pred": df_dict["dbhp_df"][i0:i1]},
    show_episodes=True,
    show_frames=True,
    masks = df_dict["mask"][i0:i1],
)
anim = animator.run()

path = f"animations/trial_{trial}.mp4"

writer = animation.FFMpegWriter(fps=10)
anim.save(path, writer=writer)

### plotting

In [None]:
plot_mode = "imputed_traj" # "imputed_traj", "dist_heatmap", "weights_heatmap"
sports = params["dataset"]
visualizer = VisualizeHelper(trial, df_dict, plot_mode, dataset=sports, helper=helper)
visualizer.valid_episodes()

In [None]:
visualizer.plot_run(epi_idx=0)
plt.close()