In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from pathlib import Path
import os
from tqdm import tqdm
import pandas as pd
import sys
import numpy as np
import matplotlib.pyplot as plt
import cv2
import re
import shutil
from pathlib import Path
from sorted_alpha import sorted_alpha
from scipy.spatial import distance
from sklearn.neighbors import NearestNeighbors
from scipy import interpolate
from frame_count import frame_count
from scipy.signal import savgol_filter
from scipy.spatial import distance


In [None]:
def process_skeleton_data(csv_file_path):
    # 1. read data
    data = pd.read_csv(csv_file_path)
    temp_data = data.copy()

    
    # 2. drop skeletons with missing neck
    for i in range(0, len(data.columns[1:]), 3):
        neck_value = data[data.columns[1:][i]].loc[1]
        if pd.isna(neck_value) or neck_value == 0:
            temp_data = temp_data.drop([data.columns[1:][i], data.columns[1:][i+1], data.columns[1:][i+2]], axis=1)  # Drop skeletons with missing neck
    data = temp_data

    # 3. if no skeletons with neck return None
    if len(data.columns) < 4:  # If not enough skeletons with neck present, return None
        return None, None

    # remove fourth skeleton if detected
    elif len(data.columns) > 10:
        # Drop the fourth skeleton if it has been detected
        data = data.drop(data.columns[10:], axis=1) 

    # 4. initialize the tracking with the first two skeletons and save the neck_points
    detections = []
    neck_keypoints = []
    for col in range(0, len(data.columns[1:]), 3):
        skeleton = data.iloc[:, col:col+3].values
        detections.append(skeleton[:,1:])  # Select only x and y columns
        neck_keypoints.append(skeleton[:,1:][1])  # Assuming neck keypoint is the second row (index 1)
    
    detections = np.array(detections)  # Shape: (num_skeletons, num_keypoints)
    neck_keypoints = np.array(neck_keypoints)  # Shape: (num_skeletons, 2)

    return detections, neck_keypoints


In [None]:
### COMBINE multiple bbox features
import numpy as np

def relative_position(tracks, detection_buffer):
    def calculate_bounding_box(keypoints):
        if keypoints is None or len(keypoints) == 0 or not isinstance(keypoints[0], (list, np.ndarray)):
            print(f"Invalid keypoints format: {keypoints}")
            raise ValueError("Invalid keypoints format")
        x_coordinates = []
        y_coordinates = []
        for point in keypoints:
            if isinstance(point, (list, np.ndarray)) and len(point) == 2:
                if not np.isnan(point).any():
                    x_coordinates.append(point[0])
                    y_coordinates.append(point[1])
            else:
                print(f"Invalid point format: {point}")
        if len(x_coordinates) == 0 or len(y_coordinates) == 0:
            return 0, 0, 0, 0  # Return zero width and height if all coordinates are NaN
        x_min, x_max = np.nanmin(x_coordinates), np.nanmax(x_coordinates)
        y_min, y_max = np.nanmin(y_coordinates), np.nanmax(y_coordinates)
        width = x_max - x_min
        height = y_max - y_min
        return x_min, y_min, x_max, y_max, width, height

    def calculate_iou(box1, box2):
        x1_min, y1_min, x1_max, y1_max = box1
        x2_min, y2_min, x2_max, y2_max = box2

        xi1 = max(x1_min, x2_min)
        yi1 = max(y1_min, y2_min)
        xi2 = min(x1_max, x2_max)
        yi2 = min(y1_max, y2_max)
        inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)

        box1_area = (x1_max - x1_min) * (y1_max - y1_min)
        box2_area = (x2_max - x2_min) * (y2_max - y2_min)
        union_area = box1_area + box2_area - inter_area

        return inter_area / union_area if union_area != 0 else 0

    def calculate_center(keypoints):
        x_coordinates = [point[0] for point in keypoints if not np.isnan(point[0])]
        y_coordinates = [point[1] for point in keypoints if not np.isnan(point[1])]
        if len(x_coordinates) == 0 or len(y_coordinates) == 0:
            return 0, 0
        center_x = np.mean(x_coordinates)
        center_y = np.mean(y_coordinates)
        return center_x, center_y

    def calculate_diagonal(width, height):
        return np.sqrt(width**2 + height**2)

    def calculate_velocity(center1, center2):
        return np.sqrt((center2[0] - center1[0])**2 + (center2[1] - center1[1])**2)

    def calculate_mean_features(track):
        areas = []
        aspect_ratios = []
        centers = []
        diagonals = []
        velocities = []
        for i in range(1, len(track[-50:])):
            detection, _ = track[-50:][i]
            prev_detection, _ = track[-50:][i-1]
            _, _, _, _, width, height = calculate_bounding_box(detection)
            prev_center = calculate_center(prev_detection)
            center = calculate_center(detection)
            areas.append(width * height)
            aspect_ratios.append(width / height if height != 0 else 0)
            centers.append(center)
            diagonals.append(calculate_diagonal(width, height))
            velocities.append(calculate_velocity(prev_center, center))
        return np.mean(areas), np.mean(aspect_ratios), np.mean(centers, axis=0), np.mean(diagonals), np.mean(velocities)

    def calculate_score(detection, mean_area, mean_aspect_ratio, mean_center, mean_diagonal, mean_velocity, last_detection):
        x_min, y_min, x_max, y_max, width, height = calculate_bounding_box(detection)
        area = width * height
        aspect_ratio = width / height if height != 0 else 0
        center = calculate_center(detection)
        diagonal = calculate_diagonal(width, height)
        velocity = calculate_velocity(calculate_center(last_detection), center)
        iou = calculate_iou((x_min, y_min, x_max, y_max), calculate_bounding_box(last_detection)[:4])
        return (weights['area'] * abs(area - mean_area) +
                weights['aspect_ratio'] * abs(aspect_ratio - mean_aspect_ratio) +
                weights['center'] * np.linalg.norm(np.array(center) - np.array(mean_center)) +
                weights['diagonal'] * abs(diagonal - mean_diagonal) +
                weights['velocity'] * abs(velocity - mean_velocity) -
                weights['iou'] * iou)

    # Flatten the buffered detections
    all_detections = [det for sublist in detection_buffer for det in sublist[0]]
    all_frame_numbers = [frame_number for sublist in detection_buffer for frame_number in [sublist[1]] * len(sublist[0])]

    track1, track2 = tracks[:2]
    mean_features_track1 = calculate_mean_features(track1)
    mean_features_track2 = calculate_mean_features(track2)
    # Define weights for each feature
    weights = {
        'area': 0.3,
        'aspect_ratio': 0.5,
        'center': 1.0,
        'diagonal': 1.0,
        'velocity': 1.0,
        'iou': 1.0
    }
    scores = []
    for detection in all_detections:
        last_detection_track1 = track1[-1][0]
        last_detection_track2 = track2[-1][0]
        score_track1 = calculate_score(detection, *mean_features_track1, last_detection_track1)
        score_track2 = calculate_score(detection, *mean_features_track2, last_detection_track2)
        scores.append((score_track1, score_track2))

    # Determine the best track for each detection
    track_assignments = []
    for score_track1, score_track2 in scores:
        if score_track1 < score_track2:
            track_assignments.append(1)
        else:
            track_assignments.append(2)

    return track_assignments

In [None]:
### with smoothing 
import numpy as np

from scipy.signal import savgol_filter

def savitzky_golay(data, window_size=5, polyorder=2):
    """Apply Savitzky-Golay smoothing to the data."""
    if len(data) < window_size:
        return data  # Return the original data if it's too short to smooth
    smoothed_data = savgol_filter(data, window_size, polyorder)
    return smoothed_data

def relative_position(tracks, detection_buffer):
    def calculate_bounding_box(keypoints):
        if keypoints is None or len(keypoints) == 0 or not isinstance(keypoints[0], (list, np.ndarray)):
            print(f"Invalid keypoints format: {keypoints}")
            raise ValueError("Invalid keypoints format")
        x_coordinates = []
        y_coordinates = []
        for point in keypoints:
            if isinstance(point, (list, np.ndarray)) and len(point) == 2:
                if not np.isnan(point).any():
                    x_coordinates.append(point[0])
                    y_coordinates.append(point[1])
            else:
                print(f"Invalid point format: {point}")
        if len(x_coordinates) == 0 or len(y_coordinates) == 0:
            return 0, 0, 0, 0  # Return zero width and height if all coordinates are NaN
        x_min, x_max = np.nanmin(x_coordinates), np.nanmax(x_coordinates)
        y_min, y_max = np.nanmin(y_coordinates), np.nanmax(y_coordinates)
        width = x_max - x_min
        height = y_max - y_min
        return x_min, y_min, x_max, y_max, width, height

    def calculate_iou(box1, box2):
        x1_min, y1_min, x1_max, y1_max = box1
        x2_min, y2_min, x2_max, y2_max = box2

        xi1 = max(x1_min, x2_min)
        yi1 = max(y1_min, y2_min)
        xi2 = min(x1_max, x2_max)
        yi2 = min(y1_max, y2_max)
        inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)

        box1_area = (x1_max - x1_min) * (y1_max - y1_min)
        box2_area = (x2_max - x2_min) * (y2_max - y2_min)
        union_area = box1_area + box2_area - inter_area

        return inter_area / union_area if union_area != 0 else 0

    def calculate_center(keypoints):
        x_coordinates = [point[0] for point in keypoints if not np.isnan(point[0])]
        y_coordinates = [point[1] for point in keypoints if not np.isnan(point[1])]
        if len(x_coordinates) == 0 or len(y_coordinates) == 0:
            return 0, 0
        center_x = np.mean(x_coordinates)
        center_y = np.mean(y_coordinates)
        return center_x, center_y

    def calculate_diagonal(width, height):
        return np.sqrt(width**2 + height**2)

    def calculate_velocity(center1, center2):
        return np.sqrt((center2[0] - center1[0])**2 + (center2[1] - center1[1])**2)

    def calculate_mean_features(track):
        areas = []
        aspect_ratios = []
        centers = []
        diagonals = []
        velocities = []
        num_detections = len(track)
        for i in range(1, num_detections):
            detection, _ = track[i]
            prev_detection, _ = track[i-1]
            _, _, _, _, width, height = calculate_bounding_box(detection)
            prev_center = calculate_center(prev_detection)
            center = calculate_center(detection)
            areas.append(width * height)
            aspect_ratios.append(width / height if height != 0 else 0)
            centers.append(center)
            diagonals.append(calculate_diagonal(width, height))
            velocities.append(calculate_velocity(prev_center, center))
        return np.mean(areas), np.mean(aspect_ratios), np.mean(centers, axis=0), np.mean(diagonals), np.mean(velocities)

    def calculate_score(detection, mean_area, mean_aspect_ratio, mean_center, mean_diagonal, mean_velocity, last_detection, weights):
        x_min, y_min, x_max, y_max, width, height = calculate_bounding_box(detection)
        area = width * height
        aspect_ratio = width / height if height != 0 else 0
        center = calculate_center(detection)
        diagonal = calculate_diagonal(width, height)
        velocity = calculate_velocity(calculate_center(last_detection), center)
        iou = calculate_iou((x_min, y_min, x_max, y_max), calculate_bounding_box(last_detection)[:4])
        return (weights['area'] * abs(area - mean_area) +
                weights['aspect_ratio'] * abs(aspect_ratio - mean_aspect_ratio) +
                weights['center'] * np.linalg.norm(np.array(center) - np.array(mean_center)) +
                weights['diagonal'] * abs(diagonal - mean_diagonal) +
                weights['velocity'] * abs(velocity - mean_velocity) -
                weights['iou'] * iou)

    # Flatten the buffered detections
    all_detections = [det for sublist in detection_buffer for det in sublist[0]]
    all_frame_numbers = [frame_number for sublist in detection_buffer for frame_number in [sublist[1]] * len(sublist[0])]

    # Smooth the detections in the buffer
    smoothed_detections = []
    for detection in all_detections:
        smoothed_detection = []
        for keypoint in detection:
            smoothed_detection.append(savitzky_golay(np.array(keypoint)))
        smoothed_detections.append(smoothed_detection)

    # Smooth the detections in the last 50 frames of each track
    for track in tracks:
        smoothed_track_detections = []
        for detection, frame_number in track[-50:]:
            smoothed_detection = []
            for keypoint in detection:
                smoothed_detection.append(savitzky_golay(np.array(keypoint)))
            smoothed_track_detections.append((smoothed_detection, frame_number))
        track[-50:] = smoothed_track_detections

    track1, track2 = tracks[:2]
    mean_features_track1 = calculate_mean_features(track1)
    mean_features_track2 = calculate_mean_features(track2)

    # Define weights for each feature
    weights = {
        'area': 1.0,
        'aspect_ratio': 1.0,
        'center': 1.0,
        'diagonal': 1.0,
        'velocity': 1.0,
        'iou': 1.0
    }

    scores = []
    for detection in smoothed_detections:
        last_detection_track1 = track1[-1][0]
        last_detection_track2 = track2[-1][0]
        score_track1 = calculate_score(detection, *mean_features_track1, last_detection_track1, weights)
        score_track2 = calculate_score(detection, *mean_features_track2, last_detection_track2, weights)
        scores.append((score_track1, score_track2))

    # Determine the best track for each detection
    track_assignments = []
    for score_track1, score_track2 in scores:
        if score_track1 < score_track2:
            track_assignments.append(1)
        else:
            track_assignments.append(2)

    return track_assignments

In [None]:

    

from scipy.spatial.distance import euclidean
import os
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy.optimize import linear_sum_assignment

# Initialize tracking variables
tracks = []
frames = []
knn = NearestNeighbors(n_neighbors=1, metric='euclidean')
threshold = 80
# Example usage
path = '/Users/andrei-macpro/Documents/Data/pose/play_openpose/1264_play'
detects = sorted_alpha(path)
numbers = [int(f.split('_')[-1].split('.')[0]) for f in detects]
buffer_size = 5  # Number of frames to buffer
detection_buffer = []

for i, number in zip(range(len(detects)), numbers):
    frames.append(number)
    print('frame no:', number)
    
    data = pd.read_csv(os.path.join(path, detects[i]))
    
    if i == 0:  # For the first frame, just get the skeletons, don't do anything
        detections, necks = process_skeleton_data(os.path.join(path, detects[i]))
        # Initialize track lists for each detection
        for _ in range(len(detections)):
            tracks.append([])

        # Append each detection to its corresponding track list with the frame number
        for j, detection in enumerate(detections):
            tracks[j].append((detection, number))

    else:
        detections, necks = process_skeleton_data(os.path.join(path, detects[i]))

        # Check if there are valid neck joints
        if necks is None or len(necks) == 0 or all(neck is None for neck in necks):
            continue
        
        # Add detections to the buffer
        detection_buffer.append((detections, number))
        if i<50:
            last_necks = np.array([track[-1][0][1] for track in tracks])

            # Fit the KNN model on the last neck positions
            knn.fit(last_necks)
            # Keep track of which detections have been assigned
            assigned_detections = set()
            # Store distances and indices
            distances_indices = []
            # Compute Euclidean distances between each pair of new detections 
            necks_array = np.array(necks)
            num_detections = len(necks_array)
            # Handle the case where there is only one detection
            if num_detections == 1:
                if len(tracks) == 1:
                    tracks[0].append((detections[0], number))

                else:
                    # Assign the single detection to the closest track
                    dist, index = knn.kneighbors([necks[0]])
                    tracks[index[0][0]].append((detections[0], number))
                            # Find the closest track for each new detection
            elif len(tracks) == 1:
                for j, neck in enumerate(necks):
                    dist, index = knn.kneighbors([neck])
                    distances_indices.append((dist[0][0], index[0][0], j))
                    # Sort detections by distance to the closest track
                distances_indices.sort()
                # Assign the closest detections to the tracks
                for dist, closest_track_index, j in distances_indices[:len(tracks)]:
                    tracks[closest_track_index].append((detections[j], number))
                    assigned_detections.add(j)

            elif len(detections) == 3 and len(tracks) == 2:
                for j, neck in enumerate(necks):
                    dist, index = knn.kneighbors([neck])
                    distances_indices.append((dist[0][0], index[0][0], j))
                    # Sort detections by distance to the closest track
                distances_indices.sort()
                # Assign the closest detections to the tracks
                for dist, closest_track_index, j in distances_indices[:len(tracks)]:
                    tracks[closest_track_index].append((detections[j], number))
                    assigned_detections.add(j)

            else:    # Find the closest track for each new detection
                for j, neck in enumerate(necks):
                    dist, index = knn.kneighbors([neck])
                    distances_indices.append((dist[0][0], index[0][0], j))
                distances_indices.sort()
                # Assign the closest detections to the tracks
                for dist, closest_track_index, j in distances_indices[:len(tracks)]:
                    tracks[closest_track_index].append((detections[j], number))
                    assigned_detections.add(j)


        else:
            # Process buffer if it reaches the buffer size
            if len(detection_buffer) >= buffer_size:

                # Flatten the buffered detections
                all_detections = [det for sublist in detection_buffer for det in sublist[0]]
                all_frame_numbers = [frame_number for sublist in detection_buffer for frame_number in [sublist[1]] * len(sublist[0])]

                # Assign detections to tracks using buffered data
                track_assignments = relative_position(tracks, detection_buffer)

                # Update tracks with buffered detections
                for j, (detection, frame_number) in enumerate(zip(all_detections, all_frame_numbers)):
                    if track_assignments[j] == 1:
                        tracks[0].append((detection, frame_number))
                    else:
                        tracks[1].append((detection, frame_number))
                
                # Clear the buffer
                detection_buffer = []

In [None]:
tracks[0]

In [None]:
import pandas as pd

## let's see how the interpolated tracks look like in a dataframe 

# Example structure of a detection (assuming each detection is a list of keypoints, each keypoint is a list [x, y])
# detections = [[[x1, y1], [x2, y2], ...], [[x1, y1], [x2, y2], ...], ...]

# Flatten the tracks list and extract the detection, frame number, and track index
flattened_tracks = [(detection, frame, track_index) for track_index, track in enumerate(tracks) for detection, frame in track]

# Initialize a list to store the data for the DataFrame
data = []

# Iterate over the flattened tracks to extract keypoints
for detection, frame, track_index in flattened_tracks:
    for keypoint_index, (x, y) in enumerate(detection):
        data.append({'Frame': frame, 'Track': track_index, 'Keypoint': keypoint_index, 'X': x, 'Y': y})

# Create a DataFrame from the data
df_tracks = pd.DataFrame(data)

# Pivot the DataFrame to have 'Track' as columns and keypoints as rows
df_pivot = df_tracks.pivot_table(index=['Frame', 'Keypoint'], columns='Track', values=['X', 'Y'])

# Flatten the multi-level columns
df_pivot.columns = [f'{coord}_Track{track}' for coord, track in df_pivot.columns]

# Reset the index to make 'Frame' and 'Keypoint' columns
df_pivot.reset_index(inplace=True)

# Display the DataFrame
print(df_pivot)

In [None]:
video_path = '/Users/andrei-macpro/Documents/Data/videos/play_videos/1264_play.mp4'
cap = cv2.VideoCapture(video_path)
colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255)]
# Get the width and height of the frames
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

output_path = '/Users/andrei-macpro/Documents/Data/videos/1264_play_annotated_multiple_.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Flag to toggle frame number display
show_frame_number = True

# Iterate over the video frames
frame_count = 1
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Get the rows for the current frame
    frame_data = df_pivot[df_pivot['Frame'] == frame_count]

    # Iterate over each track
    for track in range(len(tracks)):
        # Get the keypoints for the current track
        keypoints = frame_data[[f'X_Track{track}', f'Y_Track{track}']].values

        # Choose a color for the current track
        color = colors[track % len(colors)]

        # Draw bounding boxes around the keypoints and display track number
        for x, y in keypoints:
            if not pd.isna(x) and not pd.isna(y):
                cv2.rectangle(frame, (int(x) - 5, int(y) - 5), (int(x) + 5, int(y) + 5), color, 2)
                cv2.putText(frame, f'Track {track}', (int(x) + 10, int(y) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA)

    # Display the frame number if the flag is set
    if show_frame_number:
        cv2.putText(frame, f'Frame {frame_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)



    # Write the frame to the output video
    out.write(frame)

    frame_count += 1

# Release the video capture object and close all OpenCV windows
cap.release()
out.release()
cv2.destroyAllWindows()

In [None]:
pip install opencv-python opencv-python-headless numpy torch torchvision

In [None]:
pip install deep_sort_realtime

In [None]:
import cv2
import torch
from torchvision import transforms
from deep_sort_realtime.deepsort_tracker import DeepSort

# Load YOLO model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Initialize DeepSORT
deepsort = DeepSort(max_age=30, n_init=3, nms_max_overlap=1.0, max_cosine_distance=0.2)

In [None]:
pip install --upgrade torch torchvision

In [None]:
conda install pytorch torchvision torchaudio -c pytorch-nightly

In [None]:
import time
import cv2
import torch
from torchvision import transforms
from deep_sort_realtime.deepsort_tracker import DeepSort

# Measure the start time
start_time = time.time()

# Set device to CPU or MPS (Metal Performance Shaders) for Mac M1
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

# Measure the time taken to set the device
device_time = time.time()
print(f"Time to set device: {device_time - start_time:.2f} seconds")

# Load YOLO model
model = torch.hub.load('ultralytics/yolov5', 'yolov5n', device=device)

# Measure the time taken to load the model
model_time = time.time()
print(f"Time to load YOLO model: {model_time - device_time:.2f} seconds")

# Initialize DeepSORT
deepsort = DeepSort(max_age=30, n_init=3, nms_max_overlap=1.0, max_cosine_distance=0.2)

# Measure the time taken to initialize DeepSORT
deepsort_time = time.time()
print(f"Time to initialize DeepSORT: {deepsort_time - model_time:.2f} seconds")

# Total time
total_time = time.time()
print(f"Total initialization time: {total_time - start_time:.2f} seconds")

In [None]:
import os
import time
import cv2
import torch
from torchvision import transforms
from deep_sort_realtime.deepsort_tracker import DeepSort

# Measure the start time
start_time = time.time()

# Set device to CPU or MPS (Metal Performance Shaders) for Mac M1
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

# Path to the YOLOv5 repository and weights
repo_path = os.path.expanduser('/Users/andrei-macpro/Downloads/yolov5')
weights_path = os.path.expanduser('/Users/andrei-macpro/Downloads/yolov5n.pt')

# Load YOLO model with local weights
model = torch.hub.load(repo_path, 'custom', path=weights_path, source='local', device=device)

# Measure the time taken to load the model
model_time = time.time()
print(f"Time to load YOLO model: {model_time - start_time:.2f} seconds")

# Initialize DeepSORT
deepsort = DeepSort(max_age=30, n_init=3, nms_max_overlap=1.0, max_cosine_distance=0.2)

# Measure the time taken to initialize DeepSORT
deepsort_time = time.time()
print(f"Time to initialize DeepSORT: {deepsort_time - model_time:.2f} seconds")

# Total time
total_time = time.time()
print(f"Total initialization time: {total_time - start_time:.2f} seconds")