In [1]:
# Cell 1: Imports and Configuration
import sys
sys.path.append('..')

import pandas as pd

# Import helper functions
from src.visualisation import (
    build_position_group_performance_summary,
    create_comparison_widget,
    build_performance_scatter
)

# Thresholds
MIN_MINUTES_DEFAULT = 45
MIN_SPRINTS_DEFAULT = 5
MIN_RUNS_DEFAULT = 5
MIN_PRESSES_DEFAULT = 8

# Metric family configuration
METRIC_FAMILIES = {
    "Sprints": {
        "metrics": [
            "high_value_sprint_pct",
            "attacking_sprint_pct",
            "defensive_sprint_pct",
            "high_value_sprints_per_90",
            "sprint_distance_per_90"
            ],
        "volume_col": "sprint_count",
        "min_volume_default": MIN_SPRINTS_DEFAULT,
        "primary_rate": "high_value_sprints_per_90"
    },
    "Off-ball runs": {
        "metrics": [
            "avg_xthreat",
            "threat_per_90",
            "high_value_run_pct",
            "high_value_runs_per_90",
            "avg_opponents_beaten"
        ],
        "volume_col": "run_count",
        "min_volume_default": MIN_RUNS_DEFAULT,
        "primary_rate": "threat_per_90"
    },
    "Pressing": {
        "metrics": [
            "press_success_rate",
            "regain_rate",
            "disruption_rate",
            "shot_creation_rate",
            "successful_presses_per_90",
            "pressing_actions_per_90",
        ],
        "volume_col": "pressing_action_count",
        "min_volume_default": MIN_PRESSES_DEFAULT,
        "primary_rate": "successful_presses_per_90"
    },
}

print("✓ Configuration loaded")

✓ Configuration loaded


### Updated KPI selection for visualisation

The metric families used in the visualisation have been updated to focus on
metrics that are:

- tactically meaningful
- position-agnostic where possible
- stable at match-level sample sizes
- aligned with SkillCorner modelling concepts (value, intent, impact)

#### Sprints
Removed `sprints_in_attacking_third_pct` as it behaved inconsistently due to
small denominators and role-specific skew (e.g., CBs only sprint occasionally
and often during set pieces). Added:

- `sprint_distance_per_90` for physical intensity
- `sprints_in_shot_possessions_pct` for value alignment
- kept optional `defensive_sprint_pct` for role orientation

#### Off-ball runs
Selected only the most stable, interpretable KPIs:
- `threat_per_90`
- `high_value_runs_per_90`
Other subtype-based or speed metrics were kept out for clarity.

#### Pressing
Added outcome-based pressing value:
- `shot_creation_rate`  
This captures pressing actions contributing to shot creation and is a strong
indicator of tactical effectiveness.

In [2]:
# Cell 2: Load and clean data
df = pd.read_csv('../output/player_metrics.csv')

# Basic cleaning
df = df.dropna(subset=['player_id', 'player_short_name', 'position_group'])
df = df[df['minutes_played'] >= MIN_MINUTES_DEFAULT].copy()

print(f"Loaded {len(df)} player-match records")
print(f"Unique players: {df['player_short_name'].nunique()}")
print(f"Position groups: {sorted(df['position_group'].unique())}")
print(f"Teams: {sorted(df['team_name'].unique())}")

Loaded 207 player-match records
Unique players: 145
Position groups: ['Center Forward', 'Central Defender', 'Full Back', 'Midfield', 'Other', 'Wide Attacker']
Teams: ['Adelaide United Football Club', 'Auckland FC', 'Brisbane Roar FC', 'Central Coast Mariners Football Club', 'Macarthur FC', 'Melbourne City FC', 'Melbourne Victory Football Club', 'Newcastle United Jets FC', 'Perth Glory Football Club', 'Sydney Football Club', 'Wellington Phoenix FC', 'Western United']


In [None]:
# Cell 4: Unified interactive comparison widget
create_comparison_widget(df, METRIC_FAMILIES, MIN_MINUTES_DEFAULT, MIN_SPRINTS_DEFAULT)

VBox(children=(HBox(children=(Dropdown(description='Metric:', options=('Sprints', 'Off-ball runs', 'Pressing')…

Output()

In [4]:
# Cell: Position-group performance by metric
print("=== Position-group performance by metric ===\n")

summaries = build_position_group_performance_summary(df, METRIC_FAMILIES, MIN_MINUTES_DEFAULT)

for family_name, summary_df in summaries.items():
    print(f"\n{family_name}:")
    
    if len(summary_df):
        print(summary_df.to_string(index=False))
        top_groups = summary_df["best_position_group"].value_counts()
        print(f"  → Most often best: {', '.join(top_groups.index[:2])}")
    else:
        print("  (no eligible players for this family)")

=== Position-group performance by metric ===


Sprints:
                   metric best_position_group  player_count  total_volume  mean_volume  mean_metric  median_metric  weighted_metric score_type
    high_value_sprint_pct            Midfield            20         179.0          7.8        0.744          0.800            0.744       mean
     attacking_sprint_pct      Center Forward            17         188.0          8.2        0.543          0.556            0.543       mean
     defensive_sprint_pct    Central Defender            17         171.0          6.8        0.856          0.857            0.856       mean
high_value_sprints_per_90       Wide Attacker            21         278.0          9.6        6.912          7.100            6.912       mean
   sprint_distance_per_90       Wide Attacker            21         278.0          9.6      211.341        192.998          211.341       mean
  → Most often best: Wide Attacker, Midfield

Off-ball runs:
                metric be

In [5]:
# Cell 6: Performance Scatter
print("=== Player Performance Scatter (Interactive) ===\n")
scatter_df, scatter_widget = build_performance_scatter(df, METRIC_FAMILIES)
display(scatter_widget)
print(f"\nAnalysed {len(scatter_df)} players meeting minimum criteria")
print("Top 5 per position group highlighted in green.")

=== Player Performance Scatter (Interactive) ===







VBox(children=(Dropdown(description='Metric:', options=('HV Sprints/90', 'Threat/90', 'Succ. Presses/90'), sty…


Analysed 135 players meeting minimum criteria
Top 5 per position group highlighted in green.
