In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Function to read an annotation file and return a DataFrame
def read_annotation_file(file_path):
    df = pd.read_csv(file_path, sep='\t')
    return df

# Function to process a directory of annotation files
def process_annotation_aggregated(directory_path):
    # Initialize an empty DataFrame to store aggregated data
    aggregated_df = pd.DataFrame(columns=['Label', 'SampleCount', 'TotalSampleTime'])

    # Loop through each file in the directory
    for file_name in os.listdir(directory_path):
        if file_name.endswith(".txt") and "BG" not in file_name:
            file_path = os.path.join(directory_path, file_name)
            annotation_df = read_annotation_file(file_path)

            # Aggregate data
            label_counts = annotation_df['Annotation'].value_counts()
            total_sample_time = annotation_df['End Time (s)'].max()

            # Update aggregated DataFrame
            for label, count in label_counts.items():
                if label not in aggregated_df['Label'].tolist():
                    aggregated_df = aggregated_df.append({'Label': label, 'SampleCount': 0, 'TotalSampleTime': 0}, ignore_index=True)
                aggregated_df.loc[aggregated_df['Label'] == label, 'SampleCount'] += count
                aggregated_df.loc[aggregated_df['Label'] == label, 'TotalSampleTime'] += total_sample_time

    return aggregated_df

# Function to process a directory of annotation files
def process_annotation_directory(directory_path):
    # Initialize empty DataFrames to store regular and background annotations
    # regular_annotations = pd.DataFrame(columns=['Selection', 'View', 'Channel', 'Begin Time (s)', 'End Time (s)',
    #                                             'Low Freq (Hz)', 'High Freq (Hz)', 'Annotation', 'File'])
    # background_annotations = pd.DataFrame(columns=['Selection', 'View', 'Channel', 'Begin Time (s)', 'End Time (s)',
    #                                                'Low Freq (Hz)', 'High Freq (Hz)', 'Annotation', 'File'])
    aggregated_dataframe = pd.DataFrame(columns=['Selection', 'View', 'Channel', 'Begin Time (s)', 'End Time (s)',
                                                'Low Freq (Hz)', 'High Freq (Hz)', 'Annotation', 'File'])
    
    # Loop through each file in the directory
    for file_name in os.listdir(directory_path):
        file_path = os.path.join(directory_path, file_name)

        if file_name.endswith(".txt"):
            annotation_df = read_annotation_file(file_path)

            # Add a new column to store the file name
            annotation_df['File'] = file_name

            # Check if the file has 'BG' in its name
            # if 'BG' in file_name:
            #     background_annotations = pd.concat([background_annotations, annotation_df], ignore_index=True)
            # else:
            #     regular_annotations = pd.concat([regular_annotations, annotation_df], ignore_index=True)
            aggregated_dataframe = pd.concat([aggregated_dataframe, annotation_df], ignore_index=True)
            
    return aggregated_dataframe


In [None]:

# Set the directory path where your annotation files are located
directory_path = 'C:/Users/amitg/Documents/Deep_Voice/ocean-whispers/Anotations Ocean Wispers'
directory_2_path = 'C:/Users/amitg/Documents/Deep_Voice/ocean-whispers/Background noise annotations'

# Process the annotation directory
aggregated_data1 = process_annotation_aggregated(directory_path)
aggregated_data2 = process_annotation_aggregated(directory_2_path)

aggregated_data = pd.concat([aggregated_data1, aggregated_data2], ignore_index=True)

# Display the aggregated data
print(aggregated_data)