In [1]:
# Create COCO JSON files for CT classifier

import pandas as pd
import os
import json

# Path to original JSON
old_coco_path = '/home/cara/oregon_critters/json/dataset_ground_620_adjxy.json' #ground
#old_coco_path = '/home/cara/oregon_critters/json/dataset_trail_180_adjxy.json' #trail
#old_coco_path = '/home/cara/oregon_critters/json/dataset_bothB_adjxy.json' #both

# Path to image crops
crops_folder = '/home/cara/ct_classifier/data/ground_cropped_images/' #ground
#crops_folder = '/home/cara/ct_classifier/data/trail_cropped_images/' #trail
#crops_folder = '/home/cara/ct_classifier/data/bothB_cropped_images/' #both

# Path to save new COCO path
new_coco_path = '/home/cara/ct_classifier/coco_ground_crop.json' #ground
#new_coco_path = '/home/cara/ct_classifier/coco_trail_crop.json' #trail
#new_coco_path = '/home/cara/ct_classifier/coco_bothB_crop.json' #both

# Path to train/val/test .txt files
train_file_path = '/home/cara/oregon_critters/sampled_ds_ground_620_train.txt'
val_file_path = '/home/cara/oregon_critters/sampled_ds_ground_620_val.txt'
test_file_path = '/home/cara/oregon_critters/sampled_ds_ground_620_test.txt'
# train_file_path = '/home/cara/oregon_critters/sampled_ds_trail_180_train.txt'
# val_file_path = '/home/cara/oregon_critters/sampled_ds_trail_180__val.txt'
# test_file_path = '/home/cara/oregon_critters/sampled_ds_trail_180__test.txt'
# train_file_path = '/home/cara/oregon_critters/sampled_ds_bothB_train.txt'
# val_file_path = '/home/cara/oregon_critters/sampled_ds_bothB__val.txt'
# test_file_path = '/home/cara/oregon_critters/sampled_ds_bothB__test.txt'

# Path to save new COCOs
train_json = 'coco_ground_crop_train.json'
val_json = 'coco_ground_crop_val.json'
test_json = 'coco_ground_crop_test.json'

#Open JSON
with open(old_coco_path, 'r') as f:
    old_coco_data = json.load(f)


In [2]:
# Function to convert filename to image ID
def filename_to_image_id(filename):
    # Extract image ID from the filename
    image_id = os.path.splitext(filename)[0]
    return image_id

In [3]:
# Create new COCO structure
new_coco_data = {
    "info": {
        "version": "",
        "description": ""
    },
    "categories": [],
    "images": [],
    "annotations": []
}

In [4]:
# Function to extract base filename without "_croppedX" suffix
def extract_base_filename(filename):
    base_name = filename.split('_cropped')[0]
    return base_name

In [5]:
#Loop through to create new COCO JSON

for crop_filename in os.listdir(crops_folder):
    if crop_filename.endswith('.JPG'):
        base_image_filename = extract_base_filename(crop_filename)
        image_id = filename_to_image_id(base_image_filename)
        
        old_image_info = None
        for image in old_coco_data['images']:
            old_base_filename = os.path.splitext(os.path.basename(image['file_name']))[0]
            if old_base_filename == base_image_filename:
                old_image_info = image
            
        # Create new image entry
        new_image_entry = {
            "id": image_id,
            "file_name": crop_filename,
            "width": old_image_info.get("width"),
            "height": old_image_info.get("height"),
            # Add other optional fields
        }
        new_coco_data['images'].append(new_image_entry)
        
        # Find annotations for the image
        annotations = [annotation for annotation in old_coco_data['annotations'] if annotation['image_id'] == old_image_info['id']]
        
        for annotation in annotations:
            new_annotation_entry = {
                "id": str(annotation['id']),
                "image_id": image_id,
                "category_id": annotation['category_id'] + 1,
                "bbox": annotation['bbox'],
                # Add other optional fields
            }
            new_coco_data['annotations'].append(new_annotation_entry)

In [6]:
## Fix categories (they now have to start with 0: empty)

# Find original categories
original_categories = old_coco_data['categories']

# Create the "empty" category with ID 0
empty_category = {
    "id": 0,
    "name": "empty"
}
new_coco_data["categories"].append(empty_category)

# Create a mapping for old category IDs to new category IDs
category_mapping = {0: 1}  # Map old category ID 0 to new category ID 1

# Transform remaining categories into the new format
new_categories = []
for original_category in original_categories:
    new_category_entry = {
        "id": original_category["id"] + 1,  # Increment IDs by 1
        "name": original_category["name"]
    }
    new_categories.append(new_category_entry)
    # Update the mapping
    category_mapping[original_category["id"]] = new_category_entry["id"]

# Insert new categories into the new COCO JSON
new_coco_data["categories"].extend(new_categories)

print(new_coco_data["categories"])

[{'id': 0, 'name': 'empty'}, {'id': 1, 'name': 'Bat'}, {'id': 2, 'name': 'CanadaJay'}, {'id': 3, 'name': 'Coyote'}, {'id': 4, 'name': 'PileatedWoodpecker'}, {'id': 5, 'name': 'Owl'}, {'id': 6, 'name': 'Mink'}, {'id': 7, 'name': 'RedFox'}, {'id': 8, 'name': 'Marten'}, {'id': 9, 'name': 'AmericanRobin'}, {'id': 10, 'name': 'DouglasSquirrel'}, {'id': 11, 'name': 'FlyingSquirrel'}, {'id': 12, 'name': 'Grouse'}, {'id': 13, 'name': 'Invertebrate'}, {'id': 14, 'name': 'MountainBeaver'}, {'id': 15, 'name': 'OtherBird'}, {'id': 16, 'name': 'SmallMammal'}, {'id': 17, 'name': 'Warbler'}, {'id': 18, 'name': 'Chickadee'}, {'id': 19, 'name': 'MountainQuail'}, {'id': 20, 'name': 'StripedSkunk'}, {'id': 21, 'name': 'Human'}, {'id': 22, 'name': 'Dog'}, {'id': 23, 'name': 'BlackBear'}, {'id': 24, 'name': 'BlackTailedDeer'}, {'id': 25, 'name': 'Bobcat'}, {'id': 26, 'name': 'Cougar'}, {'id': 27, 'name': 'DarkEyedJunco'}, {'id': 28, 'name': 'GraySquirrel'}, {'id': 29, 'name': 'Herp'}, {'id': 30, 'name': 'N

In [7]:
# Save
with open(new_coco_path, 'w') as f:
    json.dump(new_coco_data, f, indent=2)

In [8]:
# Split JSON by train, val, test

# Read filenames from the .txt files
with open(val_file_path, 'r') as f:
    val_filenames = [line.strip() for line in f]

with open(train_file_path, 'r') as f:
    train_filenames = [line.strip() for line in f]

with open(test_file_path, 'r') as f:
    test_filenames = [line.strip() for line in f]    

# Create separate JSON objects for each split
test_data = {
    "info": old_coco_data["info"],
    "images": [img for img in old_coco_data["images"] if img["file_name"] in test_filenames],
    "categories": old_coco_data["categories"],
    "annotations": [anno for anno in old_coco_data["annotations"] if anno["image_id"] in (img["id"] for img in old_coco_data["images"] if img["file_name"] in test_filenames)]
}

val_data = {
    "info": old_coco_data["info"],
    "images": [img for img in old_coco_data["images"] if img["file_name"] in val_filenames],
    "categories": old_coco_data["categories"],
    "annotations": [anno for anno in old_coco_data["annotations"] if anno["image_id"] in (img["id"] for img in old_coco_data["images"] if img["file_name"] in val_filenames)]
}

train_data = {
    "info": old_coco_data["info"],
    "images": [img for img in old_coco_data["images"] if img["file_name"] in train_filenames],
    "categories": old_coco_data["categories"],
    "annotations": [anno for anno in old_coco_data["annotations"] if anno["image_id"] in (img["id"] for img in old_coco_data["images"] if img["file_name"] in train_filenames)]
}

# Save each JSON object to separate JSON files
with open(test_json, 'w') as f:
    json.dump(test_data, f)

with open(val_json, 'w') as f:
    json.dump(val_data, f)

with open(train_json, 'w') as f:
    json.dump(train_data, f)
