In [None]:
import os

wd = os.path.normpath(os.getcwd() + '/..')
os.chdir(wd)
os.getcwd()

In [None]:
%load_ext autoreload
%autoreload 2

import torch.nn.functional as F
import matplotlib.pyplot as plt
import os

import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt

from nfl_helper import NFLDataHelper
from tqdm import tqdm

### NFL data EDA

In [None]:
data = pd.read_csv("./data/nfl-big-data-bowl-2021/week1.csv")
data

In [None]:
print(data["x"].max(), data["x"].min())
print(data["y"].max(), data["y"].min())

### EDA processed data(from NRTSI paper)

In [None]:
train_data = np.load("./data/nfl_traces/nfl_train.npy")
test_data = np.load("./data/nfl_traces/nfl_test.npy")

print(train_data.shape, test_data.shape)

In [None]:
data_ = train_data.reshape(-1, 12)
data_.shape

In [None]:
bs = train_data.shape[0]
seq_len = train_data.shape[1]

print(f"Batch size : {bs}")
print(f"Sequence lenght : {seq_len}")

In [None]:
x_data = train_data[..., :6]
y_data = train_data[..., 6:]

print(f"x max : {x_data.max()}")
print(f"x min : {x_data.min()}")
print(f"y max : {y_data.max()}")
print(f"y min : {y_data.min()}")

##### (1) Unnormalize the data(from Yards to Meters)
* x : [0-120] yards
* y : [0-53.3] yards  
pitch size(m) : [110, 49]

In [None]:
train_data[..., :6] *= 110
train_data[..., 6:] *= 49

##### (2) Rearangement x,y position

In [None]:
x_data_ = train_data[..., :6, None]
y_data_ = train_data[..., 6:, None]

xy_data = np.concatenate([x_data_, y_data_], axis=-1)
xy_data.shape

In [None]:
data = xy_data.reshape(bs, seq_len, -1)
data.shape

##### (3) Plotting Trajectories

In [None]:
seq_data = data[2]
seq_data.shape

In [None]:
def plot_nfl(target_data):
    fig = plt.figure(0)
    ax = fig.add_subplot(111)
    
    for k in range(6):
        ax.scatter(target_data[:,2*k], target_data[:,2*k+1], color="b")
        ax.plot(target_data[:,2*k], target_data[:,2*k+1], color="m")
    plt.show()
plot_nfl(seq_data)

##### (3) Plotting L2 distance between adjacent time steps.

In [None]:
frame_diff = np.linalg.norm(xy_data[1, 1:] - xy_data[1, :-1], axis=-1)

cols = [f"player{p}" for p in range(6)]
frame_diff_df = pd.DataFrame(frame_diff, columns=cols)

import random
colors = [f'#{random.randint(0, 0xFFFFFF):06x}' for _ in range(6)]

frame_diff_df.plot(figsize=(12, 6), color=colors, markersize=1, legend=True)

plt.title("Positional Distance for 6 Players")
plt.xlabel("Time")
plt.ylabel("Distance")

plt.tight_layout()
plt.show()

### Processing NFL data

In [None]:
data_path = "./data/nfl_traces/nfl_test.npy"
helper = NFLDataHelper(data_path=data_path)

In [None]:
helper.reconstruct_df()
helper.calc_running_features(smoothing=False, remove_outliers=False)
helper.traces

In [None]:
helper.traces.to_csv(f"data/nfl_traces/nfl_test.csv", index=False)

### Debug

In [None]:
traces = helper.traces
traces

In [None]:
feature_types = ["_x", "_y", "_vx", "_vy", "_speed", "_accel", "_ax", "_ay"]
data_cols = [f"player{p}{t}" for p in range(6) for t in feature_types]
len(data_cols)

In [None]:
traces = traces[["episode"] + data_cols]
episode_traces = traces[traces["episode"] == 1]

In [None]:
episode_traces = episode_traces[data_cols].values
episode_traces.shape

In [None]:
pos_x = torch.tensor(episode_traces[:, 0::8, None])
pos_y = torch.tensor(episode_traces[:, 1::8, None])
pos_xy = torch.cat([pos_x, pos_y], dim=-1)

vel_x = torch.tensor(episode_traces[:, 2::8, None])
vel_y = torch.tensor(episode_traces[:, 3::8, None])
vel_xy = torch.cat([vel_x, vel_y], dim=-1)

speed = torch.tensor(episode_traces[:, 4::8])
accel = torch.tensor(episode_traces[:, 5::8])

print(pos_xy.shape, vel_xy.shape)

In [None]:
vel_pos = torch.zeros((pos_xy.shape[0], 6, 2))
vel_pos[0] = pos_xy[0]
vel_pos[1:] += vel_xy[:-1] * 0.1
vel_pos.cumsum_(dim=0)

In [None]:
pos_dist = torch.norm((pos_xy[1:] - vel_pos[1:]), dim=-1)
pos_dist_np = np.array(pos_dist)
pos_dist_np.shape

In [None]:
cols = [f"player{p}" for p in range(6)]
pos_dist_df = pd.DataFrame(pos_dist_np, columns=cols)

import random
colors = [f'#{random.randint(0, 0xFFFFFF):06x}' for _ in range(6)]

pos_dist_df.plot(figsize=(12, 6), color=colors, markersize=1, legend=True)

plt.title("Positional Distance for 10 Players")
plt.xlabel("Time")
plt.ylabel("Distance")

plt.tight_layout()
plt.show()