In [3]:
# Imports (keep minimal)
import pandas as pd
import numpy as np
from src.extract_dataframe import (
    get_open_data_urls_and_game,
    make_tracking_df,
    make_players_df,
)
from src.utils import (
    time_to_seconds,
    get_first_possession_in_phase,
    get_all_possessions_in_phase,
    add_possession_ids_to_phases_from_events,
    enrich_events_with_linked_events,
    add_xthreat_at_start_for_possessions,
    add_xthreat_at_end_for_possessions,
    add_xthreat_potential_max,
    add_passing_option_proximity,
    add_possession_proximity,
    add_xthreat_deltas,
    add_avg_po_proximity_score
)
pd.set_option('display.max_columns', 100)

In [4]:
# Parameters
match_id = 1886347

# Get Open Data URLs and the databallpy Game
meta_url, tracking_url, events_url, phases_url, game = get_open_data_urls_and_game(match_id, only_alive=False)

# # Add velocities on tracking_data (databallpy)
# game.tracking_data.add_velocity(column_ids=game.get_column_ids())

In [None]:
# Build tracking_df from Open Data tracking JSONL
tracking_df = make_tracking_df(tracking_url, match_id)
tracking_df.head()

In [None]:
# Build players_df from match metadata JSON
players_df = make_players_df(meta_url)
players_df.head()

In [None]:
# Merge tracking and players to create enriched_tracking_data
enriched_tracking_data = tracking_df.merge(players_df, left_on=['player_id'], right_on=['id'])
enriched_tracking_data.head()

In [None]:
# Load dynamic events for the match
de_match = pd.read_csv(events_url)
de_match.head()

In [None]:
# Read phases of play data
phases_match = pd.read_csv(phase_data_github_url)
phases_match.head()

In [None]:
# --- iterate through all phases in phases_match ---
enriched_phase_rows = []

for _, row in phases_match.iterrows():
    phase_index = int(row["index"])
    frame_start = int(row["frame_start"])
    frame_end = int(row["frame_end"])

    # identify all possession events in THIS phase
    pp_all = get_all_possessions_in_phase(
        de_match,
        phase_frame_start=frame_start,
        phase_frame_end=frame_end,
        phase_index=phase_index,
        enforce_phase_index=False,
        sort=True
    )

    # store them (for debugging or downstream use)
    enriched_phase_rows.append({
        "phase_index": phase_index,
        "frame_start": frame_start,
        "frame_end": frame_end,
        "pp_all": pp_all
    })

# after the loop, build the enriched phases table:
enriched_phases_match = add_possession_ids_to_phases_from_events(
    phases_match=phases_match,
    de_match=de_match,
    enforce_phase_index=False,
    new_col_name="player_possession_event_ids",
    sort_possessions=True
)

# return or print it
print(enriched_phases_match.head())

In [None]:
# go through the dynamic events and filter all the events that are of type 'player_possession' and name it pp_all.
pp_all = de_match.loc[de_match["event_type"] == "player_possession"].copy()
pp_all = pp_all.sort_values(["frame_start", "frame_end"]).reset_index(drop=True)
print(f"Filtered {len(pp_all)} player_possession event(s).")

In [None]:
# Enrich the dynamic events with a 'linked_event' column (only for possession rows)
enriched_de_match = enrich_events_with_linked_events(
    de_match=de_match,
    pp_all=pp_all,
    new_col_name="linked_event",                 # you can rename if you prefer
    child_event_types=("passing_option", "off_ball_run", "on_ball_engagement")  # or set to None to include all
)

# Quick inspection: show possession rows with their linked events
print(
    enriched_de_match.loc[
        enriched_de_match["event_type"] == "player_possession",
        ["event_id", "player_id", "player_name", "frame_start", "frame_end", "linked_event"]
    ].head(10)
)

In [None]:
# de_match: the dynamic events DataFrame for the match
# Add xthreat_at_start (numeric for PP rows; "" for non-PP rows)
enriched_de_match = add_xthreat_at_start_for_possessions(de_match=enriched_de_match, new_col_name="xthreat_at_start")

# Inspect a few rows
print(
    enriched_de_match.loc[
        enriched_de_match['event_type'] == 'player_possession',
        ['event_id','player_id','player_name','end_type','pass_outcome','xthreat_at_start']
    ].head(10)
)

In [None]:
# de_match: your dynamic events DataFrame
enriched_de_match = add_xthreat_at_end_for_possessions(
    de_match=enriched_de_match,
    new_col_name="xthreat_at_end"
)

print(
    enriched_de_match.loc[
        enriched_de_match["event_type"] == "player_possession",
        ["event_id","end_type","pass_outcome","xthreat_at_start","xthreat_at_end"]
    ].head(10)
)

In [None]:
# enriched_de_match: your current events DataFrame (already enriched with `linked_event`,
#                    and with xthreat/xpass_completion populated on passing_option rows)

enriched_de_match = add_xthreat_potential_max(
    enriched_de_match=enriched_de_match,
    linked_col="linked_event",              # if you named it differently, change here
    new_col_name="xthreat_potential_max"
)

# Sanity check: PP rows should have float values; other rows should be empty ""
print(
    enriched_de_match.loc[
        enriched_de_match["event_type"] == "player_possession",
        ["event_id","xthreat_at_start","xthreat_at_end","xthreat_potential_max"]
    ].head(12)
)

In [None]:
# Starting from your events DF after you've already added:
#  - xthreat_at_start
#  - xthreat_at_end
#  - xthreat_potential_max

enriched_de_match = add_xthreat_deltas(
    enriched_de_match=enriched_de_match,
    col_start="xthreat_at_start",
    col_end="xthreat_at_end",
    col_pmax="xthreat_potential_max",
    out_delta="xthreat_increase",
    out_pot_red="potential_xthreat_reduction"
)

# Inspect a few player possession rows:
print(
    enriched_de_match.loc[
        enriched_de_match["event_type"] == "player_possession",
        ["event_id","xthreat_at_start","xthreat_at_end","xthreat_potential_max",
        "xthreat_increase","potential_xthreat_reduction"]
    ].head(12)
)

In [None]:
# enriched_de_match: your current events DF after previous enrichments
# enriched_tracking_data: per-frame tracking (columns: frame, player_id, x, y, is_detected, [ball_status])

# If 'team_id' is present and correct in enriched_de_match, you can omit team_map:
enriched_de_match = add_passing_option_proximity(
    enriched_de_match=enriched_de_match,
    enriched_tracking_data=enriched_tracking_data,
    new_col_name="po_proximity_score",   # you can rename as you like
    team_map=None,                       # or pass a DataFrame ['player_id','team_id'] to control mapping
    require_ball_in_play=False           # set True if you want to ignore dead-ball frames
)

# Quick sanity check
print(
    enriched_de_match.loc[
        enriched_de_match["event_type"] == "passing_option",
        ["event_id","player_id","frame_start","frame_end","po_proximity_score"]
    ].head(12)
)

In [None]:
# enriched_de_match: your enriched events DataFrame (with player_possession & passing_option rows, etc.)
# enriched_tracking_data: per-frame tracking with columns: frame, player_id, x, y, is_detected, [ball_status]

# If `team_id` is present and correct in enriched_de_match, you can omit team_map:
enriched_de_match = add_possession_proximity(
    enriched_de_match=enriched_de_match,
    enriched_tracking_data=enriched_tracking_data,
    new_col_name="pp_proximity_score",
    team_map=None,                 # or pass a DataFrame ['player_id','team_id'] if you prefer to control mapping
    require_ball_in_play=False     # set True to ignore frames where ball_status == 'dead'
)

# Inspect a few player possession rows:
print(
    enriched_de_match.loc[
        enriched_de_match["event_type"] == "player_possession",
        ["event_id","player_id","frame_start","frame_end","pp_proximity_score"]
    ].head(12)
)

In [None]:
# enriched_de_match: your current events DF (already has `linked_event`
# and `po_proximity_score` computed for passing_option rows)

enriched_de_match = add_avg_po_proximity_score(
    enriched_de_match=enriched_de_match,
    linked_col="linked_event",             # change only if your linked column has a different name
    po_score_col="po_proximity_score",
    new_col_name="avg_po_proximity_score"
)

# Quick sanity check: PP rows should have a float or NaN; others should be empty
print(
    enriched_de_match.loc[
        enriched_de_match["event_type"] == "player_possession",
        ["event_id","avg_po_proximity_score"]
    ].head(12)
)

## Notes
- This notebook keeps code minimal and imports reusable helpers from `src/`.
- Adjust `match_id` or phase selection as needed for specific analyses.
- For plotting or pressure maps, extend with functions that use tracking velocities and KDTree over frames.