In [15]:
import os, json
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import random
import fsspec
import gcsfs
import io

from mplsoccer import Pitch

import umap
import hdbscan
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [2]:
pd.set_option('display.max_columns', None)

In [11]:
BUCKET_NAME = "arthur_tmp"
BASE_GCS_PATH = f"gs://{BUCKET_NAME}"

DYNAMIC_DIR = f"{BASE_GCS_PATH}/dynamic"
META_DIR = f"{BASE_GCS_PATH}/meta"
TRACKING_DIR = f"{BASE_GCS_PATH}/tracking"
POSSESSION_DIR = f"{BASE_GCS_PATH}/possession"
PITCH_CONTROL_DIR = f"{BASE_GCS_PATH}/pitch_control"
CLUSTERING_DIR = f"{BUCKET_NAME}/clustering"

# Pitch constants (SkillCorner → 105x68, origin at left-bottom after rescale)
X_MIN, X_MAX = -52, 52
Y_MIN, Y_MAX = -34, 34
PITCH_LENGTH, PITCH_WIDTH = 105.0, 68.0

In [4]:
def prepare_clustering_features():
    """
    Transforme la liste de dictionnaires en matrice de features pour le clustering.
    Lit les informations contextuelles (player, équipe, minute, etc.)
    directement depuis le fichier POSSESSION_DIR/<match_id>.parquet.

    Les frames contenant des NaN dans 'player_position', 
    'distance_to_nearest_teammate' ou 'distance_to_nearest_opponent' 
    sont exclues des résultats.

    Colonnes ajoutées dans meta :
        - match_id
        - player_id
        - player_name
        - player_team
        - opponent_team
        - minute
    """

    fs = gcsfs.GCSFileSystem()
    feature_vectors = []
    meta_info = []
    frames_inputs = []

    pitch_control_files = fs.glob(f"{PITCH_CONTROL_DIR}/*.npz")
    if not pitch_control_files:
        print(f"⚠️ Aucun fichier trouvé dans {PITCH_CONTROL_DIR}")
        return None, None, None

    for gcs_path in sorted(pitch_control_files):
        match_id = gcs_path.split("/")[-1].replace(".npz", "")

        # --- Lecture du fichier .npz ---
        try:
            with fs.open(gcs_path, "rb") as f:
                data = np.load(f, allow_pickle=True)
                results = data["results"].tolist()
        except Exception as e:
            print(f"⚠️ Erreur lecture {gcs_path} : {e}")
            continue

        if not results:
            continue

        # --- Lecture du fichier POSSESSION correspondant ---
        possession_path = f"{POSSESSION_DIR}/{match_id}.parquet"
        try:
            with fs.open(possession_path, "rb") as fp:
                possession_df = pd.read_parquet(fp)
        except Exception as e:
            print(f"⚠️ Impossible de lire {possession_path} : {e}")
            possession_df = None

        # --- Préparer les noms d'équipes (pour déduction adversaire) ---
        all_teams = (
            possession_df["team_name"].unique().tolist()
            if possession_df is not None and "team_name" in possession_df.columns
            else []
        )

        # --- Itération sur les résultats valides ---
        for item in results:
            # Exclusion si NaN critiques
            player_position = item.get("player_position", (np.nan, np.nan))
            if (
                np.isnan(player_position[0]) or np.isnan(player_position[1]) or
                np.isnan(item.get("distance_to_nearest_teammate", np.nan)) or
                np.isnan(item.get("distance_to_nearest_opponent", np.nan))
            ):
                continue  # on ignore cette frame

            player_id = item.get("player_id")
            frame = item.get("frame")

            # (1) Flatten de la carte de Pitch Control
            pitch_flat = item["pitch_control_map"].flatten()

            # (2) Features contextuelles
            defensive_lines = np.array(item["defensive_lines"], dtype=float)
            defensive_lines = np.pad(
                defensive_lines, (0, 3 - len(defensive_lines)),
                mode='constant', constant_values=np.nan
            )

            ball_x, ball_y = item["ball_position"]
            in_possession = 1.0 if item["in_possession"] else 0.0

            context_features = np.array([
                *defensive_lines,
                ball_x,
                ball_y,
                in_possession,
                item["distance_to_ball"],
                item["distance_to_nearest_teammate"],
                item["distance_to_nearest_opponent"]
            ], dtype=float)

            full_vector = np.concatenate([pitch_flat, context_features])
            feature_vectors.append(full_vector)
            frames_inputs.append(item)

            # (3) Récupération des infos depuis le parquet possession
            if possession_df is not None:
                row = possession_df[
                    (possession_df["frame"] == frame) &
                    (possession_df["player_id"] == player_id)
                ]
                if not row.empty:
                    row = row.iloc[0]
                    player_name = row.get("player_short_name", None)
                    player_team = row.get("team_name", None)
                    minute = None
                    time_val = row.get("time", None)
                    if isinstance(time_val, str) and len(time_val) >= 5:
                        try:
                            minute = int(time_val.split(":")[1])
                        except Exception:
                            minute = None

                    # Déduire l’adversaire
                    opponent_team = None
                    if all_teams and player_team in all_teams:
                        opponent_team = [t for t in all_teams if t != player_team]
                        opponent_team = opponent_team[0] if opponent_team else None
                else:
                    player_name = None
                    player_team = None
                    opponent_team = None
                    minute = None
            else:
                player_name = None
                player_team = None
                opponent_team = None
                minute = None

            # (4) Ajouter au meta_info
            meta_info.append({
                "match_id": match_id,
                "frame": frame,
                "player_id": player_id,
                "player_position_role": item.get("player_position_role", None),
                "player_name": player_name,
                "player_team": player_team,
                "opponent_team": opponent_team,
                "minute": minute
            })

    if not feature_vectors:
        print("⚠️ Aucun vecteur de features généré (fichiers vides ?)")
        return None, None, None

    # --- Matrice finale des features ---
    X = np.vstack(feature_vectors)

    # --- Gestion des NaN restants (remplacés par moyenne colonne) ---
    nan_mask = np.isnan(X)
    if np.any(nan_mask):
        col_means = np.nanmean(X, axis=0)
        inds = np.where(nan_mask)
        X[inds] = np.take(col_means, inds[1])

    meta = pd.DataFrame(meta_info)
    return X, meta, frames_inputs

In [5]:
%%time

X, meta, frames_inputs = prepare_clustering_features()

CPU times: user 54.3 s, sys: 4.94 s, total: 59.3 s
Wall time: 7min 56s


In [13]:
fs = gcsfs.GCSFileSystem()
fs.mkdirs(CLUSTERING_DIR, exist_ok=True)

X_path = f"{CLUSTERING_DIR}/X_features_center_forward.npz"
with fs.open(X_path, "wb") as f:
    np.savez_compressed(f, X=X)

print(f"✅ Matrice X sauvegardée sur GCS : {X_path} (shape={X.shape})")

✅ Matrice X sauvegardée sur GCS : arthur_tmp/clustering/X_features_center_forward.npz (shape=(78865, 1609))


In [14]:
meta_path = f"{CLUSTERING_DIR}/meta_frames_center_forward_w_labels.parquet"
with fs.open(meta_path, "wb") as f:
    meta.to_parquet(f, index=False)
    
print(f"✅ DataFrame meta sauvegardé sur GCS : {meta_path} (shape={meta.shape})")

✅ DataFrame meta sauvegardé sur GCS : arthur_tmp/clustering/meta_frames_center_forward_w_labels.parquet (shape=(78865, 8))


In [61]:
pd.DataFrame(X).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,542,543,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,975,976,977,978,979,980,981,982,983,984,985,986,987,988,989,990,991,992,993,994,995,996,997,998,999,1000,1001,1002,1003,1004,1005,1006,1007,1008,1009,1010,1011,1012,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023,1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,1104,1105,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,1117,1118,1119,1120,1121,1122,1123,1124,1125,1126,1127,1128,1129,1130,1131,1132,1133,1134,1135,1136,1137,1138,1139,1140,1141,1142,1143,1144,1145,1146,1147,1148,1149,1150,1151,1152,1153,1154,1155,1156,1157,1158,1159,1160,1161,1162,1163,1164,1165,1166,1167,1168,1169,1170,1171,1172,1173,1174,1175,1176,1177,1178,1179,1180,1181,1182,1183,1184,1185,1186,1187,1188,1189,1190,1191,1192,1193,1194,1195,1196,1197,1198,1199,1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211,1212,1213,1214,1215,1216,1217,1218,1219,1220,1221,1222,1223,1224,1225,1226,1227,1228,1229,1230,1231,1232,1233,1234,1235,1236,1237,1238,1239,1240,1241,1242,1243,1244,1245,1246,1247,1248,1249,1250,1251,1252,1253,1254,1255,1256,1257,1258,1259,1260,1261,1262,1263,1264,1265,1266,1267,1268,1269,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279,1280,1281,1282,1283,1284,1285,1286,1287,1288,1289,1290,1291,1292,1293,1294,1295,1296,1297,1298,1299,1300,1301,1302,1303,1304,1305,1306,1307,1308,1309,1310,1311,1312,1313,1314,1315,1316,1317,1318,1319,1320,1321,1322,1323,1324,1325,1326,1327,1328,1329,1330,1331,1332,1333,1334,1335,1336,1337,1338,1339,1340,1341,1342,1343,1344,1345,1346,1347,1348,1349,1350,1351,1352,1353,1354,1355,1356,1357,1358,1359,1360,1361,1362,1363,1364,1365,1366,1367,1368,1369,1370,1371,1372,1373,1374,1375,1376,1377,1378,1379,1380,1381,1382,1383,1384,1385,1386,1387,1388,1389,1390,1391,1392,1393,1394,1395,1396,1397,1398,1399,1400,1401,1402,1403,1404,1405,1406,1407,1408,1409,1410,1411,1412,1413,1414,1415,1416,1417,1418,1419,1420,1421,1422,1423,1424,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434,1435,1436,1437,1438,1439,1440,1441,1442,1443,1444,1445,1446,1447,1448,1449,1450,1451,1452,1453,1454,1455,1456,1457,1458,1459,1460,1461,1462,1463,1464,1465,1466,1467,1468,1469,1470,1471,1472,1473,1474,1475,1476,1477,1478,1479,1480,1481,1482,1483,1484,1485,1486,1487,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,1515,1516,1517,1518,1519,1520,1521,1522,1523,1524,1525,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535,1536,1537,1538,1539,1540,1541,1542,1543,1544,1545,1546,1547,1548,1549,1550,1551,1552,1553,1554,1555,1556,1557,1558,1559,1560,1561,1562,1563,1564,1565,1566,1567,1568,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,1591,1592,1593,1594,1595,1596,1597,1598,1599,1600,1601,1602,1603,1604,1605,1606,1607,1608
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.7e-05,8e-05,4.7e-05,2.1e-05,5e-06,0.0,0.0,7e-06,1.4e-05,3e-05,4.7e-05,6.2e-05,0.000103,0.000124,5.9e-05,4e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.5e-05,4.5e-05,4.3e-05,3e-05,1.6e-05,7e-06,1.1e-05,5e-06,6e-06,2.4e-05,4.7e-05,0.000101,0.000143,0.000233,0.000484,0.000557,0.00039,0.00025,0.000146,7.2e-05,5.9e-05,5.2e-05,5.5e-05,5.9e-05,9.6e-05,0.000135,0.000169,0.00021,0.000172,3.1e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7e-06,5.6e-05,7.7e-05,8.8e-05,9.5e-05,9.9e-05,0.000107,0.000112,0.00015,0.000293,0.000487,0.000731,0.000983,0.001396,0.002009,0.002841,0.001523,0.000966,0.000597,0.000351,0.000164,8.4e-05,5.1e-05,3.5e-05,6.3e-05,0.000103,0.000144,0.000184,0.000224,0.000282,0.00031,0.000103,4e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.3e-05,9.3e-05,8.8e-05,9.2e-05,8.6e-05,8.5e-05,8.1e-05,7.8e-05,8e-05,0.000106,0.000156,0.000358,0.000726,0.001261,0.002809,0.005881,0.006362,0.003644,0.002017,0.001145,0.000579,0.000232,7.2e-05,2.4e-05,9.6e-05,0.000116,0.000226,0.000399,0.000591,0.000716,0.000853,0.001029,0.000318,7.5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.6e-05,0.000157,0.000176,0.000173,0.000164,0.000144,0.000117,9.7e-05,7.5e-05,6.7e-05,7.8e-05,0.000149,0.000356,0.000865,0.002153,0.005654,0.01205,0.014451,0.018018,0.008443,0.005145,0.00257,0.001973,0.001483,0.001049,0.00104,0.001068,0.001071,0.000969,0.001368,0.001422,0.001741,0.001936,0.000764,0.000239,6.2e-05,4e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000199,0.000307,0.000333,0.000272,0.00026,0.000236,0.000244,0.000233,0.000206,0.000167,0.000109,5.2e-05,0.00011,0.000493,0.001197,0.004702,0.011293,0.016311,0.017502,0.017126,0.01953,0.030091,0.036587,0.028909,0.022337,0.016292,0.013301,0.011036,0.009395,0.007978,0.00579,0.006824,0.007139,0.007059,0.003377,0.001343,0.00051,0.000183,6.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.6e-05,0.000163,0.000293,0.000465,0.000593,0.000533,0.000468,0.000451,0.000513,0.000611,0.000705,0.000848,0.00092,0.001542,0.004335,0.010045,0.020545,0.021455,0.020109,0.015131,0.029495,0.037652,0.042368,0.063617,0.118888,0.125847,0.084211,0.057001,0.041701,0.02978,0.026741,0.022345,0.019105,0.015097,0.018642,0.019266,0.008001,0.003258,0.001397,0.00059,0.000268,9.9e-05,2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5e-05,6.6e-05,0.000109,0.000195,0.000325,0.00063,0.00116,0.002177,0.002213,0.002594,0.003262,0.004646,0.006454,0.011296,0.031116,0.053935,0.035815,0.023844,0.035677,0.026621,0.041944,0.099568,0.149678,0.187392,0.185814,0.089342,0.056248,0.037845,0.03038,0.02157,0.017905,0.014045,0.014809,0.014796,0.014557,0.018419,0.0211,0.017755,0.007665,0.003201,0.001483,0.000675,0.000346,0.000154,3.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9e-05,5.6e-05,0.000108,0.000229,0.000491,0.001038,0.002445,0.005967,0.016023,0.022231,0.03428,0.040279,0.07079,0.09269,0.110597,0.113657,0.143846,0.201982,0.309566,0.45011,0.586367,0.511309,0.282738,0.132748,0.056475,0.03619,0.025303,0.018909,0.021316,0.024217,0.022633,0.023093,0.018241,0.013487,0.016534,0.019618,0.021416,0.012555,0.006084,0.002989,0.001572,0.000707,0.000219,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3e-05,9.2e-05,0.000312,0.001004,0.003012,0.008253,0.025881,0.022436,0.016661,0.018869,0.033766,0.058193,0.106093,0.248191,0.556306,0.806726,0.918076,0.938644,0.776787,0.501792,0.203514,0.069947,0.034481,0.033157,0.036373,0.039822,0.039204,0.040108,0.040489,0.039338,0.03656,0.023634,0.023201,0.026006,0.024816,0.022163,0.010901,0.004887,0.002225,0.000837,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.1e-05,0.000367,0.001219,0.004068,0.012835,0.010871,0.011323,0.01224,0.015654,0.013877,0.027536,0.118019,0.334225,0.633721,0.834381,0.895536,0.802829,0.584026,0.291241,0.176797,0.1333,0.105192,0.073088,0.059227,0.04727,0.042224,0.037501,0.035244,0.033277,0.028293,0.022055,0.015624,0.018115,0.021528,0.020588,0.019584,0.012162,0.004616,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.6e-05,0.000258,0.000793,0.003197,0.010705,0.012903,0.012509,0.010778,0.018788,0.025296,0.035968,0.0644,0.130602,0.337951,0.576133,0.695049,0.504786,0.248145,0.099001,0.044436,0.024912,0.018289,0.017384,0.015997,0.016249,0.016436,0.017561,0.017613,0.017619,0.017568,0.017183,0.013169,0.010327,0.01195,0.01386,0.014468,0.012907,0.005301,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2e-06,3.9e-05,0.000125,0.000407,0.001431,0.00454,0.004555,0.004482,0.004184,0.002973,0.005436,0.011849,0.025117,0.053954,0.10459,0.172871,0.203547,0.213525,0.173061,0.087724,0.031456,0.009921,0.00436,0.004194,0.005519,0.006236,0.007386,0.008098,0.008809,0.01012,0.010231,0.010906,0.011594,0.010754,0.009893,0.008163,0.007858,0.007688,0.006285,0.000874,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.6e-05,6e-05,0.000148,0.000411,0.001139,0.002652,0.00248,0.002457,0.002012,0.001632,0.001171,0.000979,0.002281,0.005543,0.011923,0.022979,0.043957,0.087379,0.095509,0.044012,0.031195,0.019014,0.0092,0.005316,0.004718,0.004459,0.004582,0.00487,0.005497,0.00605,0.006612,0.007186,0.007775,0.007895,0.009016,0.008407,0.007783,0.005822,0.00346,0.001647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8e-06,3.1e-05,4.7e-05,8e-05,0.000226,0.000478,0.001001,0.00206,0.001909,0.001879,0.001785,0.00151,0.001313,0.001014,0.001129,0.001839,0.002845,0.005783,0.012152,0.023922,0.041248,0.067194,0.032831,0.020996,0.011784,0.007248,0.004509,0.004204,0.00411,0.003927,0.004095,0.00478,0.005064,0.005,0.005567,0.005713,0.006203,0.006706,0.006964,0.006668,0.00514,0.002807,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.4e-05,0.00012,0.000274,0.000546,0.000923,0.001788,0.001638,0.001682,0.001524,0.001416,0.001286,0.001082,0.001013,0.001043,0.001552,0.00281,0.005769,0.009642,0.015406,0.026608,0.041484,0.030038,0.015764,0.011703,0.008901,0.005181,0.0036,0.003669,0.003545,0.003528,0.003878,0.003916,0.004249,0.004554,0.004765,0.005313,0.005407,0.004764,0.003789,0.001512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000277,0.000792,0.001552,0.001486,0.001444,0.001309,0.001213,0.001174,0.001117,0.00111,0.001166,0.001312,0.001914,0.003209,0.005784,0.008521,0.01357,0.019505,0.027263,0.027145,0.017759,0.011569,0.008998,0.006229,0.004554,0.00378,0.003115,0.003104,0.003046,0.003537,0.003671,0.004002,0.003967,0.004104,0.003726,0.002478,0.000374,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000723,0.001209,0.00119,0.001182,0.001086,0.001051,0.001068,0.001269,0.001439,0.001572,0.001817,0.003376,0.005,0.007692,0.010777,0.015737,0.02085,0.025545,0.01697,0.011874,0.009044,0.00763,0.005934,0.004376,0.003722,0.003346,0.002848,0.002544,0.002854,0.002926,0.00283,0.002475,0.000985,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.455769,71.616058,82.43762,68.653846,57.5,0.0,11.391938,10.267189,5.084613


In [6]:
meta

Unnamed: 0,match_id,frame,player_id,player_position_role,player_name,player_team,opponent_team,minute
0,1650385,1625,1498,Center Forward,Bruno Fernandes,Manchester United,Fulham,2
1,1650385,1648,1498,Center Forward,Bruno Fernandes,Manchester United,Fulham,2
2,1650385,1692,1498,Center Forward,Bruno Fernandes,Manchester United,Fulham,2
3,1650385,1699,1498,Center Forward,Bruno Fernandes,Manchester United,Fulham,2
4,1650385,1728,1498,Center Forward,Bruno Fernandes,Manchester United,Fulham,2
...,...,...,...,...,...,...,...,...
78860,2018580,54746,13368,Center Forward,Y. Wissa,Brentford FC,Wolverhampton Wanderers,29
78861,2018580,56507,13368,Center Forward,Y. Wissa,Brentford FC,Wolverhampton Wanderers,32
78862,2018580,56545,13368,Center Forward,Y. Wissa,Brentford FC,Wolverhampton Wanderers,32
78863,2018580,56572,13368,Center Forward,Y. Wissa,Brentford FC,Wolverhampton Wanderers,32


In [7]:
meta.isna().sum()

match_id                0
frame                   0
player_id               0
player_position_role    0
player_name             0
player_team             0
opponent_team           0
minute                  0
dtype: int64

In [8]:
meta.player_name.value_counts()

player_name
E. Haaland        7071
R. Jiménez        3689
A. Isak           3526
D. Solanke        3282
N. Jackson        3143
                  ... 
C. Jones            42
E. Smith Rowe       37
M. Gibbs-White      26
E. Ferguson         22
A. Grønbæk          20
Name: count, Length: 87, dtype: int64

In [82]:
with fs.open("arthur_tmp/pitch_control/1650961.npz", "rb") as f:
    data = np.load(f, allow_pickle=True)
    results = data["results"].tolist()

In [83]:
frame = next((d for d in results if d["frame"] == 56795), None)

In [84]:
frame

{'pitch_control_map': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'defensive_lines': [27.199038461538464, 60.95552884615384, 88.21153846153845],
 'frame': 56795,
 'player_id': 2766,
 'player_position': (nan, nan),
 'ball_position': (99.68942307692309, 26.159999999999997),
 'in_possession': False,
 'distance_to_ball': nan,
 'distance_to_nearest_teammate': nan,
 'distance_to_nearest_opponent': nan,
 'player_position_role': 'Center Forward'}

In [14]:
len(frames_inputs)

73120

In [15]:
frames_inputs[0]

{'pitch_control_map': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'defensive_lines': [60.455769230769235, 71.61605769230769, 82.43762019230769],
 'frame': 1625,
 'player_id': 1498,
 'player_position': (77.88173076923077, 50.82),
 'ball_position': (68.65384615384616, 57.5),
 'in_possession': False,
 'distance_to_ball': 11.391938135139778,
 'distance_to_nearest_teammate': 10.26718897565972,
 'distance_to_nearest_opponent': 5.084613021681336,
 'player_position_role': 'Center Forward'}

In [16]:
umap_kwargs = dict(n_neighbors=200, min_dist=0.3, n_components=5, random_state=42)
hdbscan_kwargs = dict(min_cluster_size=6000, min_samples=50, cluster_selection_method='eom',
                      cluster_selection_epsilon=0.0, metric='euclidean')

In [None]:
#umap_kwargs = dict(n_neighbors=100, min_dist=0.2, n_components=5, random_state=42)
#hdbscan_kwargs = dict(min_cluster_size=3000, min_samples=30, cluster_selection_method='eom')

In [None]:
#umap_kwargs = dict(n_neighbors=50, min_dist=0.1, n_components=10, random_state=42)
#hdbscan_kwargs = dict(min_cluster_size=1500, min_samples=15, cluster_selection_method='leaf')

In [None]:
%%time

# 1. Standardisation
X_scaled = StandardScaler().fit_transform(X)

# 2. PCA pour compresser à 50 dimensions
pca = PCA(n_components=50, random_state=42)
X_pca = pca.fit_transform(X_scaled)

# 3. UMAP sur PCA
reducer = umap.UMAP(
    n_neighbors=100,   # structure locale
    min_dist=0.2,     # densité de projection
    n_components=20,   # pour clustering
    random_state=42
)
embedding = reducer.fit_transform(X_pca)

# 4. HDBSCAN
clusterer = hdbscan.HDBSCAN(
    min_cluster_size=3000,
    min_samples=30,
    cluster_selection_epsilon=0.05,
    cluster_selection_method='leaf'
).fit(embedding)

labels = clusterer.labels_
n_clusters = len(set(labels[labels >= 0]))
print("clusters:", n_clusters, "noise:", np.sum(labels==-1))

IOStream.flush timed out


In [None]:
#def cluster_pitch_control_with_context(X, n_neighbors=20, n_components=20, min_dist=0.1, min_cluster_size=30):
#    """
#    Applique UMAP + HDBSCAN sur les features combinées PitchControl + Context.
#    Retourne les embeddings UMAP et les labels HDBSCAN.
#    """
#    # ⚖️ Standardisation : utile ici car les context features sont sur des échelles différentes
#    X_scaled = StandardScaler().fit_transform(np.nan_to_num(X))
#
#    # 🌀 Réduction UMAP
#    reducer = umap.UMAP(
#        n_neighbors=n_neighbors,
#        min_dist=min_dist,
#        n_components=n_components,
#        metric="euclidean",
#        random_state=42
#    )
#    embedding = reducer.fit_transform(X_scaled)
#
#    # 🧩 Clustering HDBSCAN
#    clusterer = hdbscan.HDBSCAN(
#        min_cluster_size=min_cluster_size,
#        min_samples=None,
#        cluster_selection_epsilon=0.01,
#        cluster_selection_method='leaf',
#        metric='euclidean'
#    ).fit(embedding)
#
#    labels = clusterer.labels_
#    probs = clusterer.probabilities_
#
#    return embedding, labels, probs, reducer, clusterer

In [None]:
#embedding, labels, probs, reducer, clusterer = cluster_pitch_control_with_context(
#    X=X,
#    n_neighbors=10,
#    n_components=40,
#    min_dist=0.05,
#    min_cluster_size=5
#)

In [None]:
labels

In [None]:
len(set(labels)) - (1 if -1 in labels else 0)  # nombre de clusters (excluant le bruit)

In [None]:
meta["cluster"] = labels

In [None]:
meta_path = f"{CLUSTERING_DIR}/meta_frames_center_forward_w_labels.parquet"
with fs.open(meta_path, "wb") as f:
    meta.to_parquet(f, index=False)

In [None]:
def plot_cluster_summary_pitch_control_with_pitch(
    results,
    meta,
    n_cols=3,
    pitch_length=105,
    pitch_width=68,
    half_pitch=True
):
    """
    Affiche la carte moyenne de pitch control et les features contextuelles moyennes par cluster.
    """

    # Clusters valides (on exclut les outliers HDBSCAN = -1)
    unique_clusters = sorted([c for c in meta["cluster"].unique() if c != -1])
    if len(unique_clusters) == 0:
        print("Aucun cluster valide (hors -1) trouvé dans meta['cluster'].")
        return

    n_clusters = len(unique_clusters)
    n_rows = int(np.ceil(n_clusters / n_cols))

    fig, axes = plt.subplots(n_rows, n_cols, figsize=(6 * n_cols, 6 * n_rows))
    axes = axes.flatten()

    cmap = plt.cm.RdBu_r
    cmap.set_bad(color="white")

    pcm = None  # utilisé pour la colorbar globale

    for i, cluster_id in enumerate(unique_clusters):
        ax = axes[i]

        # ---- Extraire les éléments du cluster ----
        cluster_indices = meta.index[meta["cluster"] == cluster_id].tolist()
        cluster_items = [results[idx] for idx in cluster_indices]

        if len(cluster_items) == 0:
            ax.axis("off")
            continue

        # ---- Moyenne des cartes de pitch control (ignorer NaN) ----
        maps = np.array([item["pitch_control_map"] for item in cluster_items])
        # maps shape = (n_frames, H, W)
        mean_map = np.nanmean(maps, axis=0)  # si certains éléments contiennent NaN, on les ignore

        # ---- Moyenne des lignes défensives ----
        # On s'assure que chaque defensive_lines est un array de longueur 3 (avec NaN si manquant)
        def_lines_list = []
        for item in cluster_items:
            lines = np.array(item.get("defensive_lines", []), dtype=float)
            if lines.size == 0:
                padded = np.array([np.nan, np.nan, np.nan])
            else:
                padded = np.pad(lines, (0, max(0, 3 - len(lines))), mode="constant", constant_values=np.nan)
            def_lines_list.append(padded)
        all_lines = np.vstack(def_lines_list)  # shape (n_frames, 3)
        mean_lines = np.nanmean(all_lines, axis=0)  # résultat shape (3,)
        # si toutes les valeurs d'une colonne sont NaN, np.nanmean renvoie nan -> on détecte ensuite

        # ---- Moyennes des features contextuelles (ignorer NaN) ----
        # utils : colonne peut contenir NaN
        def nanmean_safe(lst):
            arr = np.array(lst, dtype=float)
            if np.all(np.isnan(arr)):
                return np.nan
            return float(np.nanmean(arr))

        mean_dist_ball = nanmean_safe([item.get("distance_to_ball", np.nan) for item in cluster_items])
        mean_dist_tm = nanmean_safe([item.get("distance_to_nearest_teammate", np.nan) for item in cluster_items])
        mean_dist_op = nanmean_safe([item.get("distance_to_nearest_opponent", np.nan) for item in cluster_items])

        # ---- Possession ratio & nombre de frames ----
        in_poss_array = np.array([1.0 if item.get("in_possession", False) else 0.0 for item in cluster_items], dtype=float)
        possession_ratio = float(np.nanmean(in_poss_array)) if in_poss_array.size > 0 else np.nan
        n_frames = len(cluster_items)

        # ---- Création du pitch + grille ----
        pitch = Pitch(
            pitch_type="custom",
            pitch_length=pitch_length,
            pitch_width=pitch_width,
            line_color="black",
            pitch_color="white"
        )

        H, W = mean_map.shape
        x = np.linspace(0, pitch_length, W)
        y = np.linspace(0, pitch_width, H)
        bin_statistic = dict(statistic=mean_map, x_grid=x, y_grid=y)

        # ---- Plot de la heatmap (utilise mean_map, NaN seront affichés en blanc grâce à cmap.set_bad) ----
        pitch.draw(ax=ax)
        pcm = pitch.heatmap(
            bin_statistic,
            ax=ax,
            cmap=cmap,
            vmin=0,
            vmax=1,
            alpha=0.9,
        )

        if half_pitch:
            ax.set_xlim(pitch_length / 2, pitch_length)
            ax.set_ylim(0, pitch_width)

        # ---- Lignes défensives : ne tracer que les composantes non-NaN ----
        if mean_lines.size > 0 and not np.all(np.isnan(mean_lines)):
            colors = ["black", "gray", "silver"]
            for j, x_mean in enumerate(mean_lines):
                if not np.isnan(x_mean):
                    ax.axvline(x=float(x_mean), color=colors[j % len(colors)], linestyle="--", linewidth=2)

        # ---- Titres & annotations ----
        # formatage des moyennes en remplaçant nan par 'n/a'
        def fmt(v):
            return f"{v:.2f}" if (v is not None and not np.isnan(v)) else "n/a"

        stats_text = (
            f"Frames: {n_frames}  |  Possession: {possession_ratio:.0%}\n"
            f"Dist. Ballon: {fmt(mean_dist_ball)} m  |  "
            f"Dist. Coéquipier: {fmt(mean_dist_tm)} m  |  "
            f"Dist. Adversaire: {fmt(mean_dist_op)} m"
        )

        ax.set_title(f"Cluster {cluster_id}", fontsize=13, fontweight="bold")
        ax.text(0.5, -0.08, stats_text, transform=ax.transAxes, fontsize=10, ha="center", va="top")

    # ---- Supprimer les axes vides (derniers) ----
    for j in range(i + 1, len(axes)):
        axes[j].axis("off")

    # ---- Ajustement des marges pour éviter le chevauchement ----
    plt.subplots_adjust(right=0.92, wspace=0.35, hspace=0.45)

    # ---- Barre de couleur globale (placée à droite, hors de la zone des subplots) ----
    if pcm is not None:
        cbar_ax = fig.add_axes([0.94, 0.25, 0.02, 0.5])  # [left, bottom, width, height] en fraction de la fig
        cbar = fig.colorbar(pcm, cax=cbar_ax)
        cbar.set_label("Mean Pitch Control Probability", fontsize=11)

    fig.suptitle("Cartes moyennes de Pitch Control par Cluster", fontsize=18, fontweight="bold")
    plt.show()

In [None]:
plot_cluster_summary_pitch_control_with_pitch(frames_inputs, meta, n_cols=3)

In [None]:
def plot_representative_frames_for_cluster(
    cluster_id,
    X_embedded,
    meta,
    results,
    n_representatives=3,
    pitch_length=105,
    pitch_width=68,
    half_pitch=True
):
    """
    Affiche les n_representatives frames les plus proches du centroïde du cluster donné.
    Montre aussi les lignes défensives et les infos contextuelles sous chaque frame.
    
    Paramètres
    ----------
    cluster_id : int
        Identifiant du cluster à afficher.
    X_embedded : np.ndarray
        Embeddings (UMAP ou PCA) utilisés pour le clustering. shape=(n_frames, n_dim)
    meta : pd.DataFrame
        Métadonnées alignées avec X_embedded (doit contenir 'cluster').
    results : list
        Liste de dictionnaires correspondant à chaque frame.
    n_representatives : int
        Nombre de frames les plus représentatives à afficher.
    pitch_length, pitch_width : float
        Dimensions du terrain.
    half_pitch : bool
        Si True, n'affiche que la moitié offensive du terrain.
    """
    # --- Sélection des frames du cluster ---
    cluster_mask = (meta["cluster"] == cluster_id)
    cluster_indices = np.where(cluster_mask)[0]
    if len(cluster_indices) == 0:
        print(f"Aucun frame trouvé pour le cluster {cluster_id}.")
        return
    
    X_cluster = X_embedded[cluster_indices]

    # --- Approximation du centroïde (moyenne des embeddings du cluster) ---
    centroid = X_cluster.mean(axis=0, keepdims=True)

    # --- Distance à ce centroïde ---
    distances = pairwise_distances(X_cluster, centroid).flatten()
    top_indices = np.argsort(distances)[:n_representatives]
    representative_indices = cluster_indices[top_indices]

    # --- Setup du plot ---
    fig, axes = plt.subplots(1, n_representatives, figsize=(6 * n_representatives, 6))
    if n_representatives == 1:
        axes = [axes]

    pitch = Pitch(
        pitch_type="custom",
        pitch_length=pitch_length,
        pitch_width=pitch_width,
        line_color="black",
        pitch_color="white"
    )

    cmap = plt.cm.RdBu_r
    cmap.set_bad(color="white")

    # --- Boucle sur les frames représentatives ---
    for i, idx in enumerate(representative_indices):
        ax = axes[i]
        frame_data = results[idx]

        # --- Pitch control map ---
        pc_map = frame_data["pitch_control_map"]
        H, W = pc_map.shape
        x = np.linspace(0, pitch_length, W)
        y = np.linspace(0, pitch_width, H)
        bin_statistic = dict(statistic=pc_map, x_grid=x, y_grid=y)

        pitch.draw(ax=ax)
        pcm = pitch.heatmap(bin_statistic, ax=ax, cmap=cmap, vmin=0, vmax=1, alpha=0.9)

        if half_pitch:
            ax.set_xlim(pitch_length / 2, pitch_length)
            ax.set_ylim(0, pitch_width)

        # --- Lignes défensives ---
        def_lines = np.array(frame_data.get("defensive_lines", []), dtype=float)
        if def_lines.size > 0:
            colors = ["black", "gray", "silver"]
            for j, x_line in enumerate(def_lines[:3]):  # max 3 lignes
                ax.axvline(x=float(x_line), color=colors[j % len(colors)], linestyle="--", linewidth=2)

        # --- Informations contextuelles ---
        def fmt(v):
            return f"{v:.2f}" if v is not None and not np.isnan(v) else "n/a"

        dist_ball = frame_data.get("distance_to_ball", np.nan)
        dist_tm = frame_data.get("distance_to_nearest_teammate", np.nan)
        dist_op = frame_data.get("distance_to_nearest_opponent", np.nan)
        in_poss = "✅" if frame_data.get("in_possession", False) else "❌"

        # --- Infos depuis meta ---
        frame_id = meta.loc[idx, "frame"]
        time_min = meta.loc[idx, "time"]
        player = meta.loc[idx, "player_short_name"]
        team = meta.loc[idx, "team_name"]

        # --- Titre au-dessus et texte en-dessous ---
        ax.set_title(
            f"Cluster {cluster_id} | Frame {frame_id}\n{team} - {player}\nMinute {time_min}",
            fontsize=12, fontweight="bold"
        )

        stats_text = (
            f"Possession: {in_poss}  |  Dist. Ballon: {fmt(dist_ball)} m\n"
            f"Dist. Coéquipier: {fmt(dist_tm)} m  |  Dist. Adversaire: {fmt(dist_op)} m"
        )
        ax.text(0.5, -0.12, stats_text, transform=ax.transAxes,
                fontsize=10, ha="center", va="top", wrap=True)

    plt.suptitle(f"Frames les plus représentatives du cluster {cluster_id}", fontsize=16, fontweight="bold")
    plt.tight_layout()
    plt.show()

In [None]:
plot_representative_frames_for_cluster(
    cluster_id=3,
    X_embedded=embedding,
    meta=meta,
    results=frames_inputs,
    n_representatives=3
)