# Functions for process and analyze labels of Yolov8

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt

class YOLOv8DataFrameBuilder:
    def __init__(self, labels_dir):
        self.labels_dir = labels_dir
        self.tracking_data = []
        self.malformed_lines_count = 0

    def load_data(self):
        """Load and process all tracking data from text files in the directory."""
        for filename in os.listdir(self.labels_dir):
            if filename.endswith(".txt"):
                treatment, rep, frame = self.extract_treatment_rep_frame(filename)
                file_path = os.path.join(self.labels_dir, filename)
                self.parse_file(file_path, treatment, rep, frame)

        print(f"Number of malformed lines skipped: {self.malformed_lines_count}")

    def extract_treatment_rep_frame(self, filename):
        """Extract treatment, repetition, and frame information from the filename."""
        parts = filename.split('_')
        treatment = parts[0]
        rep = parts[1]
        frame = parts[2].split('.')[0]  # Assuming the frame number is before the .txt extension
        return treatment, rep, frame

    def parse_file(self, file_path, treatment, rep, frame):
        """Parse a single file and return the tracking data."""
        with open(file_path, 'r') as file:
            for line in file:
                self.parse_line(line.strip(), treatment, rep, frame)

    def parse_line(self, line, treatment, rep, frame):
        """Parse a single line of tracking data."""
        values = line.split()
        if len(values) != 6:
            self.malformed_lines_count += 1
            return
        self.tracking_data.append({
            'frame': int(frame),
            'x': float(values[1]),
            'y': float(values[2]),
            'trajectory': int(values[5]),
            'treatment': treatment,
            'rep': rep
        })

    def to_dataframe(self):
        """Convert tracking data to a pandas DataFrame."""
        return pd.DataFrame(self.tracking_data)

    def save_to_csv(self, output_file_path):
        """Save the tracking data to a CSV file."""
        df = self.to_dataframe()
        df.to_csv(output_file_path, index=False)

    def plot_x_y_by_trajectory(self, output_dir=None):
        """Plot x and y coordinates by trajectory for each treatment."""
        df = self.to_dataframe()
        
        for treatment in df['treatment'].unique():
            subset_treatment = df[df['treatment'] == treatment]
            plt.figure(figsize=(10, 6))
            
            for trajectory in subset_treatment['trajectory'].unique():
                subset_trajectory = subset_treatment[subset_treatment['trajectory'] == trajectory]
                plt.plot(subset_trajectory['x'], subset_trajectory['y'], marker='o', linestyle='', alpha=0.2)
            
            plt.title(f'Treatment: {treatment}')
            plt.xlabel('X Coordinate')
            plt.ylabel('Y Coordinate')
            plt.grid(True)
            
            if output_dir:
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
                plt.savefig(os.path.join(output_dir, f'{treatment}_trajectories.png'))
            plt.show()

    def sort_and_reassign_trajectories(self):
        """Sort trajectories by treatment and reassign trajectory numbers if duplicates are found across repetitions."""
        df = self.to_dataframe()
        new_tracking_data = []
        
        for treatment in df['treatment'].unique():
            treatment_df = df[df['treatment'] == treatment]
            rep_trajectories = {}

            # Collect all unique trajectories for each repetition
            for rep in treatment_df['rep'].unique():
                rep_df = treatment_df[treatment_df['rep'] == rep]
                rep_trajectories[rep] = sorted(rep_df['trajectory'].unique())

            # Reassign trajectory numbers to avoid duplicates across repetitions
            global_trajectory = 1
            trajectory_mapping = {}

            for rep in sorted(rep_trajectories.keys()):
                for trajectory in rep_trajectories[rep]:
                    if trajectory not in trajectory_mapping:
                        trajectory_mapping[trajectory] = global_trajectory
                        global_trajectory += 1

            # Apply the new trajectory numbers to the dataframe
            for _, row in treatment_df.iterrows():
                row['trajectory'] = trajectory_mapping[row['trajectory']]
                new_tracking_data.append(row)

        self.tracking_data = new_tracking_data

    def save_sum_of_traj(self, output_file_path):
        """Save the sum of unique trajectories by treatment and time interval to a CSV file."""
        
        df = self.to_dataframe()

        # Define movie duration in minutes
        movie_duration = 10

        # Define the sampling rate (frames per second)
        sampling_rate = 25

        # Calculate the number of frames per 10-minute interval
        frames_per_interval = sampling_rate * 60

        # Calculate the number of intervals
        num_intervals = movie_duration // 1

        # Create intervals
        intervals = pd.cut(df['frame'], bins=num_intervals, labels=False)

        # Assign intervals to DataFrame
        df['time_interval'] = intervals

        # Group by treatment, time_interval, and rep to count unique trajectories
        general_visits = df.groupby(['treatment', 'time_interval', 'rep'])['trajectory'].nunique().reset_index(name='counts')
        
        # Save the results to a CSV file
        general_visits.to_csv(output_file_path, index=False)




## Main

In [3]:

if __name__ == "__main__":
    # Create an instance of the YOLOv8DataFrameBuilder class
    builder = YOLOv8DataFrameBuilder('Your/path/to/labels')

    # Load and process all tracking data from the directory
    builder.load_data()

    # Sort and reassign trajectories
    builder.sort_and_reassign_trajectories()

    # Convert the tracking data to a pandas DataFrame
    df = builder.to_dataframe()

    # Print the DataFrame
    print(df)

    # Save the DataFrame to a CSV file
    builder.save_to_csv('tracking_data.csv')

    # Save the sum of unique trajectories by treatment and time interval to a CSV file
    builder.save_sum_of_traj('general_visits.csv')

    # Plot x and y coordinates by trajectory for each treatment
    #builder.plot_x_y_by_trajectory('plots')

Number of malformed lines skipped: 1505
       frame         x         y  trajectory treatment   rep
0      14507  0.642501  0.836773         372       cnt  rep1
1      14507  0.700810  0.158550         375       cnt  rep1
3      10893  0.360041  0.834082         264       cnt  rep1
4      10893  0.929697  0.537853         270       cnt  rep1
5      10893  0.833472  0.186961         269       cnt  rep1
...      ...       ...       ...         ...       ...   ...
83351   9155  0.443195  0.935426          40        b2  rep3
83354   6067  0.666741  0.973270          43        b2  rep2
83355   1595  0.892041  0.760637          12        b2  rep1
83365   1637  0.794672  0.953286          12        b2  rep1
83366   5232  0.246546  0.506426          35        b2  rep2

[83368 rows x 6 columns]
