In [1]:
import pandas as pd
from viame_annotation import Viame
import os

In [8]:
def viame_to_standard(csv_path, source):
    # Load the CSV file
    df = pd.read_csv(csv_path, skiprows=lambda x: x in [1])
    viame = Viame()

    # Initialize a list to hold the data for each row
    rows_list = []

    # Iterate over the rows of the DataFrame and process each annotation
    for index, row in df.iterrows():
        # Build the Filename
        filename = viame.get_id(row, source)
        # Extract Family, Genus, Species
        family, genus, species = viame.get_taxonomy(row)
        
        # Extract bounding box coordinates
        xmin, ymin, xmax, ymax = viame.get_bbox(row)
        
        # Prepare the new row as a Series
        new_row = pd.Series({
            'Filename': filename,
            'Family': family,
            'Genus': genus,
            'Species': species,
            'ymin': ymin,
            'xmin': xmin,
            'xmax': xmax,
            'ymax': ymax,
            'Augmentation': "none",
            'Source': source
        })

        # Append the new Series to the list
        rows_list.append(new_row)

    converted_df = pd.DataFrame(columns=['Filename', 'Family', 'Genus', 'Species', 'ymin', 'xmin', 'xmax', 'ymax', 'Augmentation', 'Source'])
    # Concatenate all the Series into a new DataFrame
    if len(rows_list) > 0:
        converted_df = pd.concat(rows_list, axis=1).transpose()

    # Write the converted DataFrame to a new CSV file
    return converted_df

In [3]:
def standard_to_viame(standard_df, original_csv_path, video_folder):
  # TODO: THIS IS CHATGPT. CHECK BACK
    # Read the original CSV to get the column names
    original_df = pd.read_csv(original_csv_path, nrows=0)

    # Create a new DataFrame with the same columns as the original
    viame_df = pd.DataFrame(columns=original_df.columns)

    # Iterate over the rows of the standard DataFrame and convert each back to the original format
    for index, row in standard_df.iterrows():
        # Rebuild the '10-11+: Repeated Species' field
        repeated_species = f"{row['Genus']} {row['Species']}"
        
        # Rebuild the '4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y)' field
        tl_x = row['xmin']
        tl_y = row['ymin']
        br_x = row['xmax']
        br_y = row['ymax']
        bbox = f"{tl_x},{tl_y},{br_x},{br_y}"
        
        # Rebuild the '3: Unique Frame Identifier' field
        frame_id = int(row['Filename'].split('_frame')[1].split('.')[0]) // 30
        
        # Prepare the new row
        new_row = {
            '1: Detection or Track-id': "",  # Fill in or calculate as needed
            '2: Video or Image Identifier': video_folder,  # Assuming video_folder is equivalent to this field
            '3: Unique Frame Identifier': frame_id,
            '4-7: Img-bbox(TL_x,TL_y,BR_x,BR_y)': bbox,
            '8: Detection or Length Confidence': "",  # Fill in or calculate as needed
            '9: Target Length (0 or -1 if invalid)': 0,  # Assuming default value
            '10-11+: Repeated Species,Confidence Pairs or Attributes': repeated_species
            # Add additional columns as needed
        }

        # Append the new row to the DataFrame
        viame_df = viame_df.append(new_row, ignore_index=True)
    
    # Return the converted DataFrame
    return viame_df

# USAGE

In [16]:
# Example usage of the function
annotation_folder = '/vol/biomedic3/bglocker/ugproj2324/fv220/datasets/frame_extraction_raw/sp/cleaned_annotations/annotations_viame'
output_folder = '/vol/biomedic3/bglocker/ugproj2324/fv220/datasets/frame_extraction_raw/sp/cleaned_annotations/annotations_standard'

for annotation in os.listdir(annotation_folder):
  if annotation.endswith(".csv"):
    annotation_name = annotation.split('.')[0]
    print(f"Converting {annotation}")
    # Convert the CSV file
    standard_df = viame_to_standard(os.path.join(annotation_folder, annotation), annotation_name)
    # Write the converted DataFrame to a new CSV file
    standard_df.to_csv(os.path.join(output_folder, annotation), index=False)
    print(f"Converted {annotation}")

Converting sp_palau4.csv
Converted sp_palau4.csv
Converting sp_palau3.csv
Converted sp_palau3.csv
Converting sp_palau.csv
Converted sp_palau.csv
Converting sp_palau2.csv
Converted sp_palau2.csv
Converting sp_palau5.csv
Converted sp_palau5.csv


## Move Annotations to the folders in phase2

In [None]:
import shutil

output_folder = '/vol/biomedic3/bglocker/ugproj2324/fv220/datasets/phase2'
base_folder = '/vol/biomedic3/bglocker/ugproj2324/fv220/datasets/frame_extraction_raw/'
annotations_folders = [
  base_folder + 'sp/cleaned_annotations/annotations_standard',
  base_folder + 'shlife/cleaned_annotations/annotations_standard',
  base_folder + 'gfp/cleaned_annotations/annotations_standard',
]

for annotation_folder in annotations_folders:
  for annotation in os.listdir(annotation_folder):
    if annotation.endswith(".csv"):
      source_name = annotation.split('.')[0]
      if source_name in os.listdir(output_folder):
        print(f"Source {source_name} already exists, copying")
        # copy the file to the source folder
        shutil.copy(os.path.join(annotation_folder, annotation), os.path.join(output_folder, source_name, annotation))
      else:
        print(f'not copying {source_name} as it does not exist in the output folder')

## Remove Negative Annotations from Phase2

In [None]:
# For any annotation file in any subfolder of output_folder, make all the values in xmin, ymin, xmax, ymax columns be max(0, value)
for root, dirs, files in os.walk(output_folder):
  for file in files:
    if file.endswith(".csv"):
      print(f"Processing {file}")
      df = pd.read_csv(os.path.join(root, file))
      df['xmin'] = df['xmin'].apply(lambda x: max(0, x))
      df['ymin'] = df['ymin'].apply(lambda x: max(0, x))
      df['xmax'] = df['xmax'].apply(lambda x: max(0, x))
      df['ymax'] = df['ymax'].apply(lambda x: max(0, x))
      df.to_csv(os.path.join(root, file), index=False)