<h1>Exploring the statsbomb data</h1>



In [None]:
import os

# Change to the parent directory
os.chdir('..')

import socceraction.spadl as spadl
from socceraction.data.statsbomb import StatsBombLoader 
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm
from matplotlib.lines import Line2D
import warnings
warnings.filterwarnings('ignore') 
api = StatsBombLoader(getter="local", root="data/open-data-master/data")


Grabbing a test event to see if matching actor coordinate.

In [None]:
example_events = api.events(game_id=3788742,load_360=True)
example_events[10:20]


Get 360 data and location

In [None]:
import numpy as np

event = example_events[11:12]

three_sixty_data = event.freeze_frame_360.item()
location = event.location.item()
team_id = event.team_id.item()

for player in three_sixty_data:
    if player['actor'] == True:
        print(f"Player Loation (360 Data): {player['location']}")
        print(f"Player Loation (Reg Data): {location}")
        print(f"Player Team ID: {team_id}")
        print(f"Time: {event.timestamp.item()}")

event.type_name



From what it appears the data aligns correctly. Now for the spadl intervention

In [None]:

FL, FW = 105.0, 68.0  # meters

def sb_to_spadl_xy(x, y, fidelity_version=None, assume_cell_center=False):
    """
    Convert a single StatsBomb (x,y) to SPADL meters.
    - If `assume_cell_center=True`, subtract half-cell. Use for old, integer-ish event coords.
    - For 360 freeze-frames, pass assume_cell_center=False (no center shift).
    """
    if assume_cell_center:
        cell_side = 0.1 if fidelity_version == 2 else 1.0
        x = x - cell_side/2.0
        y = y - cell_side/2.0

    x_m = np.clip(x / 120.0 * FL, 0, FL)
    y_m = np.clip(FW - (y / 80.0 * FW), 0, FW)
    return x_m, y_m

def ltr_flip_if_away(x_m, y_m, is_away):
    if is_away:
        return (FL - x_m, FW - y_m)
    return (x_m, y_m)

# Example: transform a 360 freeze-frame list `ff` for the same LTR convention as SPADL
def transform_freeze_frame(ff, is_away, fidelity_version=None):
    out = []
    for p in ff:
        x, y = p["location"]
        xm, ym = sb_to_spadl_xy(x, y, fidelity_version, assume_cell_center=True)  # key difference
        xm, ym = ltr_flip_if_away(xm, ym, is_away)
        q = dict(p)
        q["location_spadl"] = [xm, ym]
        out.append(q)
    return out




In [None]:
spadl_action = spadl.statsbomb.convert_to_actions(event, home_team_id=776)
spadl_action = (
  spadl
  .add_names(spadl_action)  # add actiontype and result names
  .merge(api.teams(game_id=3788742))  # add team names
  .merge(api.players(game_id=3788742))  # add player names
)
fidelity = spadl.statsbomb._infer_xy_fidelity_versions(event)[1]

spadl_action_l2r = spadl.play_left_to_right(spadl_action, home_team_id=776)

spadl_location = [spadl_action_l2r.start_x.item(), spadl_action_l2r.start_y.item()]
spadl_end_location = [spadl_action_l2r.end_x.item(), spadl_action_l2r.end_y.item()]

transformed_threesixty = transform_freeze_frame(three_sixty_data,is_away=False,fidelity_version=fidelity)

for player in transformed_threesixty:
    if player['actor'] == True:
        threesixty_location = player['location_spadl']
        print(f"Player Loation (Transformed 360 Data): {threesixty_location}")
        print(f"Player Loation (Spadl 360 Data): {spadl_location}")
        print(f"Player Team ID: {team_id}")
        print(f"Time (360 Data): {event.timestamp.item()}")
        print(f"Time (Spadl Data): {spadl_action.time_seconds.item()}")





In [None]:
event2 = example_events[5:11]
spadl_action2 = spadl.statsbomb.convert_to_actions(event2, home_team_id=776)
spadl_action2 = (
  spadl
  .add_names(spadl_action2)  # add actiontype and result names
  .merge(api.teams(game_id=3788742))  # add team names
  .merge(api.players(game_id=3788742))  # add player names
)
spadl_actions_l2r = spadl.play_left_to_right(spadl_action2, home_team_id=776)

passes_all = spadl_actions_l2r[spadl_actions_l2r.type_id == 0]
passes_all.start_y


In [None]:
import matplotsoccer as mps
# Gather points
xs_tm, ys_tm = [], []
xs_op, ys_op = [], []
xs_gk_tm, ys_gk_tm = [], []
xs_gk_op, ys_gk_op = [], []
actor_xy = None

for p in transformed_threesixty:
    x, y = p["location_spadl"]
    if p["actor"]:
        actor_xy = (x, y)

    if p["keeper"]:
        if p["teammate"]:
            xs_gk_tm.append(x); ys_gk_tm.append(y)
        else:
            xs_gk_op.append(x); ys_gk_op.append(y)
    else:
        if p["teammate"]:
            xs_tm.append(x); ys_tm.append(y)
        else:
            xs_op.append(x); ys_op.append(y)


            
# Plot non-keepers
mps.field(figsize=8, show=False)

plt.scatter(xs_tm, ys_tm, label="Teammates", s=60, edgecolors="black", zorder=3)
plt.scatter(xs_op, ys_op, label="Opponents", s=60, marker="s", edgecolors="black", zorder=3)

# Plot keepers
plt.scatter(xs_gk_tm, ys_gk_tm, label="GK (team)", s=120, marker="^", edgecolors="black", zorder=4)
plt.scatter(xs_gk_op, ys_gk_op, label="GK (opp)", s=120, marker="v", edgecolors="black", zorder=4)


plt.scatter([spadl_end_location[0]], [spadl_end_location[1]], s=200, facecolors="none", edgecolors="black", linewidths=2.5, zorder=5, label="Ball End")


plt.scatter([actor_xy[0]], [actor_xy[1]], s=200, facecolors="none", edgecolors="black", linewidths=2.5, zorder=5, label="Ball Start")


plt.legend(loc="upper right")
plt.tight_layout()
plt.show()


Now I should think of how I might design the features of the state space. Things I may want to use are:

- sparse team in possession/out of posession matrix (105,68)
- dense distance to ball and goal of every grid (105,68)
- Two dense matrices containing the sine and cosine of the angle between every location to the goal and the ball location
- one dense matrix containing the angle in radians to the goal location.


for velocity vectors it would be difficult to say with poor temporal data

Sparse team in possession/out of possession matrix (105,68)

In [None]:
import torch

FL, FW = 105, 68  # grid size in meters â†’ cells

def players_to_onehot(xs, ys, H=FL, W=FW, method="floor", sparse=False, device=None, dtype=torch.float32):
    """
    xs, ys: iterables of player coordinates in meters (0..105, 0..68)
    H, W:   output grid size (rows=y along 0..H-1, cols=x along 0..W-1)
    method: 'floor' (default) or 'round' for discretization
    sparse: if True, returns a torch.sparse_coo_tensor; else a dense tensor
    """
    x = torch.tensor(xs, dtype=torch.float32, device=device)
    y = torch.tensor(ys, dtype=torch.float32, device=device)

    if method == "round":
        xi = torch.round(x).to(torch.long)
        yi = torch.round(y).to(torch.long)
    else:
        xi = torch.floor(x).to(torch.long)
        yi = torch.floor(y).to(torch.long)

    xi = xi.clamp_(0, H - 1)
    yi = yi.clamp_(0, W - 1)

    if sparse:
        idx = torch.stack([xi, yi], dim=0)
        vals = torch.ones(len(xi), dtype=dtype, device=device)
        # if duplicates exist, coalesce will sum; clamp to 1 after if you want strict one-hot
        sp = torch.sparse_coo_tensor(idx, vals, size=(H, W), device=device, dtype=dtype).coalesce()
        print(sp)
        # enforce binary:
        sp = torch.sparse_coo_tensor(sp.indices(), torch.clamp(sp.values(), max=1), (H, W), device=device, dtype=dtype)
        return sp

    grid = torch.zeros((H, W), dtype=dtype, device=device)
    grid[xi, yi] = 1.0 
    return grid

teammates_mat = players_to_onehot(xs_tm,ys_tm, sparse = False)
opponents_mat = players_to_onehot(xs_op,ys_op, sparse = False)

plt.imshow(teammates_mat)

dense distance to ball and goal of every grid (105,68)

In [None]:
import numpy as np
from mplsoccer import Pitch

def point_distance_map(x, y, H=105, W=68, device=None, dtype=torch.float32, normalized=False):
    """
    Returns a (H, W) tensor where entry [i, j] is the distance (in meters) from cell (x=i, y=j) to (x, y).

    x, y: ball location in meters (floats, 0<=x<=H, 0<=y<=W). Values outside will still work.
    H, W: grid size; we use row=x in [0..H-1], col=y in [0..W-1] to match (105,68).
    squared: if True, returns squared Euclidean distance.
    normalized: if True, divide by the max possible distance on the grid (corner-to-corner).

    Notes:
    - Fully differentiable; no loops.
    - If you prefer conventional row=y, col=x indexing, swap the dx/dy grids below.
    """
    xs = torch.arange(H, device=device, dtype=dtype).unsqueeze(1)  # shape (H, 1)
    ys = torch.arange(W, device=device, dtype=dtype).unsqueeze(0)  # shape (1, W)

    dx = xs - torch.as_tensor(x, device=device, dtype=dtype)  # (H, 1)
    dy = ys - torch.as_tensor(y, device=device, dtype=dtype)  # (1, W)


    dist = torch.hypot(dx, dy)

    """if normalized: Should I normalize or will batch norm take care of this for me? 
        max_d = torch.hypot(torch.tensor(H-1, dtype=dtype, device=device),
                            torch.tensor(W-1, dtype=dtype, device=device))
        dist = dist / max_d"""

    return dist  # shape (H, W)

ball_dis_mat = point_distance_map(actor_xy[0],actor_xy[1])

In [None]:
from utils.visualizer import SoccerVisualizer

from matplotlib.font_manager import FontProperties

import numpy as np

def shrink_polygon(verts, scale=0.9):
    """
    Shrink polygon toward its centroid.
    scale < 1.0 shrinks, scale > 1.0 expands.
    """
    verts = np.asarray(verts).reshape(-1, 2)
    centroid = verts.mean(axis=0)
    return centroid + scale * (verts - centroid)


visualizer = SoccerVisualizer()
vis_area = event.visible_area_360.item()

vis_area =  np.array([[ 44.38282544 , 77.76214112],
                    [  35  , 0.        ],
                    [75  , 0.        ],
                    [ 55  ,78.39564601],
                    [ 44.38282544  ,77.76214112]])
#visible_area = np.array(vis_area).reshape(-1, 2)
#visible_area_skinny = shrink_polygon(visible_area, scale=0.7)

fig, ax, artists = visualizer.plot_state(teammates_mat,opponents_mat,visible_area=vis_area)
ax.scatter([spadl_location[0]-1.5], [spadl_location[1]+0.75], c=("black",1), s=30, marker="o", zorder=6, linewidths=0.5, label="Ball")
handles, labels = ax.get_legend_handles_labels()
legend_font = FontProperties(
    family="serif",   # or "Arial", "Helvetica", etc.
    size=10,
    weight="light"         # "light", "regular", "bold"
)

legend = ax.legend(
    loc="upper left",
    frameon=True,
    facecolor="#aabb97",
    edgecolor="lightgray",
    labelspacing=0.8,
    borderpad=0.8,
    handletextpad=0.8,
    prop=legend_font,       # <-- this line
)
# round the legend box corners
legend.get_frame().set_boxstyle("round,pad=0.4")
legend.get_frame().set_alpha(0.85)


plt.show()
print(vis_area)

In [None]:
import matplotlib.pyplot as plt
from mplsoccer import Pitch, Sbopen
import numpy as np

## load in Statsbomb360 data remotely
parser = Sbopen()
frames, visible = parser.frame(3788741)

## get plotting data
frame_idx = 22
frame_id = visible.iloc[22].id

visible_area = np.array(visible.iloc[frame_idx].visible_area).reshape(-1, 2)
player_position_data = frames[frames.id == frame_id]

teammate_locs = player_position_data[player_position_data.teammate]
opponent_locs = player_position_data[~player_position_data.teammate]

## set up pitch
p = Pitch(pitch_type='statsbomb')
fig, ax = p.draw(figsize=(12,8))

p.scatter(teammate_locs.x, teammate_locs.y, c='orange', s=80, ec='k', ax=ax)
p.scatter(opponent_locs.x, opponent_locs.y, c='dodgerblue', s=80, ec='k', ax=ax)
p.polygon([visible_area], color=(1, 0, 0, 0.3), ax=ax)

plt.show()