## Adding Bounding Box to the dataset
The dataset is already annotated with the bounding boxes, but the bounding boxes are not in a format that is easy to use. 

In [5]:
import os
import pandas as pd
import json

In [8]:
species_list = [
    "Heterochone calyx",
    "Heteropolypus ritteri",
    "Hexactinellida",
    "Isididae",
    "Paragorgia arborea",
    "Pennatulacea",
    "Porifera"
]

# Function to normalize the image and annotation data into DataFrames
def annotated_df(data):
    images_df = pd.json_normalize(data['images'])
    annotations_df = pd.json_normalize(data['annotations'])
    combined_df = pd.merge(images_df, annotations_df, left_on='id', right_on='image_id')

    # Create detailed columns for each bbox
    combined_df['bbox_x'] = combined_df['bbox'].apply(lambda x: x[0])
    combined_df['bbox_y'] = combined_df['bbox'].apply(lambda x: x[1])
    combined_df['bbox_width'] = combined_df['bbox'].apply(lambda x: x[2])
    combined_df['bbox_height'] = combined_df['bbox'].apply(lambda x: x[3])

    # Drop the original bbox column and rename the id column
    combined_df.drop(['bbox', 'id_x'], axis=1, inplace=True)
    combined_df.rename(columns={'id_y': 'id'}, inplace=True)
    combined_df = combined_df.sort_values(by='file_name')

    return combined_df

# Initialize an empty DataFrame to store all species annotations
all_species_df = pd.DataFrame()

# Iterate over each species
for species in species_list:
    # Construct the file path with underscores instead of spaces
    species_name_formatted = species.replace(' ', '_')
    file_path = f"/Users/jaskiratkaur/Documents/ACV/Reef-madness/data/big_species/Annotations/{species_name_formatted}/dataset.json"

    try:
        # Load the JSON file
        with open(file_path, 'r') as file:
            data = json.load(file)

        # Process the data and append it to the DataFrame
        species_df = annotated_df(data)
        species_df['species'] = species
        all_species_df = pd.concat([all_species_df, species_df], ignore_index=True)

    except FileNotFoundError:
        print(f"File not found: {file_path}")

print(all_species_df.head())

   width  height                                 file_name  license  \
0   1920    1080  000c1820-ce3f-441a-ac8a-b3881958fd68.png        0   
1   1920    1080  000c1820-ce3f-441a-ac8a-b3881958fd68.png        0   
2   1920    1080  000c1820-ce3f-441a-ac8a-b3881958fd68.png        0   
3   1920    1080  000c1820-ce3f-441a-ac8a-b3881958fd68.png        0   
4   1920    1080  000c1820-ce3f-441a-ac8a-b3881958fd68.png        0   

                                          flickr_url  \
0  https://fathomnet.org/static/m3/framegrabs/Doc...   
1  https://fathomnet.org/static/m3/framegrabs/Doc...   
2  https://fathomnet.org/static/m3/framegrabs/Doc...   
3  https://fathomnet.org/static/m3/framegrabs/Doc...   
4  https://fathomnet.org/static/m3/framegrabs/Doc...   

                                            coco_url        date_captured  \
0  https://fathomnet.org/static/m3/framegrabs/Doc...  2014-06-08 00:44:05   
1  https://fathomnet.org/static/m3/framegrabs/Doc...  2014-06-08 00:44:05   
2  ht