In [2]:
# 1. open tracklet file
# 2. group tracklets by entity
# 3. extract all tracklets images for a game
# 4. extract all raw results using florence

import os
import pandas as pd
import numpy as np
from glob import glob

tracklet_fps = glob(
    "/mnt/sun/levlevi/nba-plus-statvu-dataset/player-tracklets" + "/*.txt"
)
ex = tracklet_fps[0]


def tracklet_to_df(fp: str):
    with open(ex, "r") as f:
        lines = f.readlines()
    data = [line.strip().split(",") for line in lines]
    data_array = np.array(data, dtype=float)[
        :, :6
    ]  # Convert to float for numerical operations
    # `frame, id, x1, y1, width, height, pad, pad, pad`
    df = pd.DataFrame(
        data_array,
        columns=[
            "frame",
            "id",
            "x1",
            "y1",
            "width",
            "height",
        ],
    )
    return df


df = tracklet_to_df(ex)
df.head()

Unnamed: 0,frame,id,x1,y1,width,height
0,61.0,1.0,485.6,268.6,73.4,107.0
1,61.0,2.0,966.2,255.7,73.5,127.8
2,61.0,3.0,889.3,228.3,65.4,110.7
3,62.0,1.0,489.3,265.6,65.3,108.3
4,62.0,2.0,962.0,254.0,72.1,133.1


In [3]:
def remove_short_tracklets(df, min_frames=30):
    id_counts = df.sort_values(by=["id", "frame"])
    # Initialize a list to keep track of valid IDs
    valid_ids = []
    # Iterate over the grouped data by 'id'
    for id, group in id_counts.groupby("id"):
        # Check for consecutive frames
        group["frame_diff"] = group["frame"].diff().fillna(1)
        consecutive_frames = (
            (group["frame_diff"] == 1)
            .astype(int)
            .groupby(group["frame_diff"].ne(1).cumsum())
            .cumsum()
        )
        # Check if there are at least 30 consecutive frames
        if consecutive_frames.max() >= min_frames:
            valid_ids.append(id)
    # Filter the DataFrame to keep only valid IDs
    filtered_df = df[df["id"].isin(valid_ids)]
    filtered_df
    return df


df_filtered = remove_short_tracklets(df)
df_filtered.head()

Unnamed: 0,frame,id,x1,y1,width,height
0,61.0,1.0,485.6,268.6,73.4,107.0
1,61.0,2.0,966.2,255.7,73.5,127.8
2,61.0,3.0,889.3,228.3,65.4,110.7
3,62.0,1.0,489.3,265.6,65.3,108.3
4,62.0,2.0,962.0,254.0,72.1,133.1


In [4]:
# 305290
# 303337


def get_video_fp_from_tracklet_fp(tracklet_fp: str):
    pass


tracklet_fps[0]

'/mnt/sun/levlevi/nba-plus-statvu-dataset/player-tracklets/168270_12-05-2015_3281_utah jazz_9_indiana pacers_period4.txt'

In [5]:
GAME_REPLAYS = "/mnt/sun/levlevi/nba-plus-statvu-dataset/game-replays"
PLAYER_TRACKLETS = "/mnt/sun/levlevi/nba-plus-statvu-dataset/player-tracklets"
replays_dir = GAME_REPLAYS
tracklet_dirs = PLAYER_TRACKLETS
tracklet_file_paths = glob(tracklet_dirs + '/*.txt')
video_file_paths = glob(replays_dir + '/*.mp4')
video_file_paths_map = {os.path.basename(fp).lower(): fp for fp in video_file_paths}

In [6]:
MIN_ARR_SIZE = 30
NAMES = ["frame", "entity_id", "x1", "y1", "width", "height", "conf"]
example_fp = tracklet_file_paths[0]


def format_tracklets_for_reid(tracklet_fp: str):
    # Read CSV into DataFrame
    df = pd.read_csv(tracklet_fp, sep=",", names=NAMES, usecols=NAMES[:7])
    # Extract video file path
    video_path_name = os.path.basename(tracklet_fp).replace(".txt", ".mp4")
    video_file_path = video_file_paths_map.get(video_path_name)
    rows = []
    e_id = 0
    # Iterate through each unique entity_id
    for entity_id in df["entity_id"].unique():
        df_entity = df[df["entity_id"] == entity_id]
        last_frame_idx = -1
        temp_entity_rows = []
        for row in df_entity.itertuples(index=False):
            temp_frame_idx = row.frame
            if last_frame_idx != -1 and temp_frame_idx != last_frame_idx + 1:
                if len(temp_entity_rows) >= MIN_ARR_SIZE:
                    rows.append(
                        [
                            video_file_path,
                            e_id,
                            pd.DataFrame(
                                temp_entity_rows,
                                columns=[
                                    "video_file_path",
                                    "frame",
                                    "x1",
                                    "y1",
                                    "width",
                                    "height",
                                    "conf",
                                ],
                            ),
                        ]
                    )
                    e_id += 1
                temp_entity_rows = []
            temp_entity_rows.append(
                [
                    video_file_path,
                    row.frame,
                    row.x1,
                    row.y1,
                    row.width,
                    row.height,
                    row.conf,
                ]
            )
            last_frame_idx = temp_frame_idx
        if len(temp_entity_rows) >= MIN_ARR_SIZE:
            rows.append(
                [
                    video_file_path,
                    e_id,
                    pd.DataFrame(
                        temp_entity_rows,
                        columns=[
                            "video_file_path",
                            "frame",
                            "x1",
                            "y1",
                            "width",
                            "height",
                            "conf",
                        ],
                    ),
                ]
            )
            e_id += 1
    tracklets_df = pd.DataFrame(
        rows, columns=["video_path_name", "entity_id", "tracklet_dataframe"]
    )
    return tracklets_df

In [7]:
df = format_tracklets_for_reid(ex)
df

Unnamed: 0,video_path_name,entity_id,tracklet_dataframe
0,/mnt/sun/levlevi/nba-plus-statvu-dataset/game-...,0,video_fi...
1,/mnt/sun/levlevi/nba-plus-statvu-dataset/game-...,1,video_f...
2,/mnt/sun/levlevi/nba-plus-statvu-dataset/game-...,2,video_f...
3,/mnt/sun/levlevi/nba-plus-statvu-dataset/game-...,3,video_f...
4,/mnt/sun/levlevi/nba-plus-statvu-dataset/game-...,4,video_fi...
...,...,...,...
1278,/mnt/sun/levlevi/nba-plus-statvu-dataset/game-...,1278,video_f...
1279,/mnt/sun/levlevi/nba-plus-statvu-dataset/game-...,1279,video_fi...
1280,/mnt/sun/levlevi/nba-plus-statvu-dataset/game-...,1280,video_fi...
1281,/mnt/sun/levlevi/nba-plus-statvu-dataset/game-...,1281,video_fi...
