In [27]:
import os
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist, euclidean
from scipy.optimize import linear_sum_assignment

def splitCSV(input_folder, output_folder):

    # Load the CSV file
    #df = pd.read_csv('/Users/fjonalutaj/Desktop/processed_data/20240305_120424.csv')

    # Dictionary to keep track of each body's data frames
    bodies_data = {}

    # Dictionary to keep the last known coordinates and additional identifiers of each body for re-identification
    last_known_details = {}

    def calculate_additional_distances(row):
        # Calculate distances for additional body parts
        shoulder_to_elbow = euclidean((row['SHOULDER_LEFT_x'], row['SHOULDER_LEFT_y'], row['SHOULDER_LEFT_z']),
                                      (row['ELBOW_LEFT_x'], row['ELBOW_LEFT_y'], row['ELBOW_LEFT_z']))
        elbow_to_wrist = euclidean((row['ELBOW_LEFT_x'], row['ELBOW_LEFT_y'], row['ELBOW_LEFT_z']),
                                   (row['WRIST_LEFT_x'], row['WRIST_LEFT_y'], row['WRIST_LEFT_z']))
        pelvis_to_knee = euclidean((row['PELVIS_x'], row['PELVIS_y'], row['PELVIS_z']),
                                   (row['KNEE_LEFT_x'], row['KNEE_LEFT_y'], row['KNEE_LEFT_z']))
        knee_to_ankle = euclidean((row['KNEE_LEFT_x'], row['KNEE_LEFT_y'], row['KNEE_LEFT_z']),
                                  (row['ANKLE_LEFT_x'], row['ANKLE_LEFT_y'], row['ANKLE_LEFT_z']))

        return shoulder_to_elbow + elbow_to_wrist, pelvis_to_knee + knee_to_ankle

    def find_closest_body(new_details, last_known_details):
        if last_known_details:
            bodies, details = zip(*last_known_details.items())
            coords = [d['coords'] for d in details]
            # Corrected to access 'additional_distances' instead of non-existent keys
            additional_distances = [d['additional_distances'] for d in details]

            # Calculate Euclidean distance for pelvis coordinates
            pelvis_distances = cdist([new_details['coords']], coords, metric='euclidean').flatten()

            # Calculate differences in additional distances
            # Corrected to properly handle the structure of additional_distances
            additional_distances_diff = np.abs(np.array(additional_distances) - np.array(new_details['additional_distances']))

            # Combine the distances to find the most similar body
            combined_distances = pelvis_distances + additional_distances_diff.sum(axis=1)

            return bodies[np.argmin(combined_distances)], np.min(combined_distances)
        return None, None
        # Ensure output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate over each file in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith('.csv'):
            filepath = os.path.join(input_folder, filename)
            df = pd.read_csv(filepath)

            # Your existing processing logic
            bodies_data = {}
            last_known_details = {}

            # Process each frame
            for _, row in df.iterrows():
                timestamp = row['timestamp']
                new_coords = (row['PELVIS_x'], row['PELVIS_y'], row['PELVIS_z'])
                additional_distances = calculate_additional_distances(row)

                new_details = {'coords': new_coords, 'additional_distances': additional_distances}

                # Attempt to re-identify the body based on closest known details
                body_id, combined_distance = find_closest_body(new_details, last_known_details)

                # Threshold for identifying a re-entering body; adjust based on expected movement scale and additional distances
                DISTANCE_THRESHOLD = 1600  # Adjust this threshold as needed

                if body_id is None or combined_distance > DISTANCE_THRESHOLD:
                    # New body or a body re-entering the frame beyond the threshold
                    body_id = max(bodies_data.keys(), default=0) + 1
                    bodies_data[body_id] = []

                # Update data for the identified or new body
                bodies_data[body_id].append(row)
                # Update the last known details for this body
                last_known_details[body_id] = new_details
            
            for body_id, data in bodies_data.items():
                body_df = pd.DataFrame(data)
                # Extract first timestamp from the data for naming
                first_timestamp = body_df.iloc[0]['timestamp'].replace(':', '').replace('-', '').replace(' ', '_')
                output_path = os.path.join(output_folder, f'{first_timestamp}_{body_id}.csv')
                body_df.to_csv(output_path, index=False)
                print(f"Saved: {output_path}") # Optional: print statement to confirm file save





In [28]:
splitCSV('/Users/fjonalutaj/Desktop/processed_data', '/Users/fjonalutaj/Desktop/split')

Saved: /Users/fjonalutaj/Desktop/split/20240305_120055.961456_1.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120056.596412_2.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120056.596412_3.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120105.901436_4.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120149.556472_5.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120456.015719_1.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120456.015719_2.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120520.254551_3.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120543.207543_4.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120256.005447_1.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120300.240021_2.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120313.807878_3.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_120325.288435_4.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_121056.474567_1.csv
Saved: /Users/fjonalutaj/Desktop/split/20240305_