# Imports

In [43]:
import os
import boto3
import pandas as pd
import json
from botocore import UNSIGNED
from botocore.config import Config


# Load data

In [26]:
# set up the S3 client
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
bucket_name = 'airborne-obj-detection-challenge-training'
prefix = 'part1/'

# local folder to save the data
dataset_folder = '/Users/renatoboemer/code/developer/aot-object-tracking-aws/raw_data/airborne_data'
os.makedirs(dataset_folder, exist_ok=True)


In [None]:
# load annotations
def download_annotations(bucket, key, local_path):
    s3.download_file(bucket, key, local_path)

download_annotations(bucket_name, prefix + 'ImageSets/groundtruth.json', os.path.join(dataset_folder, 'groundtruth.json'))
download_annotations(bucket_name, prefix + 'ImageSets/valid_encounters_maxRange700_maxGap3_minEncLen30.json', os.path.join(dataset_folder, 'valid_encounters.json'))
print("Annotations downloaded successfully.")

In [42]:
def download_annotations(bucket, key, local_path):
    s3.download_file(bucket, key, local_path)

download_annotations(bucket_name, prefix + 'ImageSets/valid_encounters_maxRange700_maxGap3_minEncLen30.csv', os.path.join(dataset_folder, 'valid_encounters.csv'))
print("Annotations downloaded successfully.")

Annotations downloaded successfully.


## Groundtruth

In [27]:
# load and inspect groundtruth.json
groundtruth_path = os.path.join(dataset_folder, 'groundtruth.json')
with open(groundtruth_path, 'r') as f:
    groundtruth_data = json.load(f)

print(groundtruth_data.keys())


dict_keys(['metadata', 'samples'])


In [28]:
groundtruth_data['metadata']

{'description': 'camera0',
 'last_modified': 'Jan-08-2021 23:27:55',
 'version': '1.0'}

In [38]:
for key, value in groundtruth_data.items():
    if isinstance(value, dict):
        print(f"{key}: Dict with {len(value)} keys. Showing first 3 keys: {list(value.keys())[:3]}")

    elif isinstance(value, list):
        first_elements = value[:2]  # Get the first 2 elements
        print(f"{key}: List with {len(value)} elements. First 3 elements: {first_elements}")


metadata: Dict with 3 keys. Showing first 3 keys: ['description', 'last_modified', 'version']
samples: Dict with 1311 keys. Showing first 3 keys: ['673f29c3e4b4428fa26bc55d812d45d9', '63c5597a57b04b448723f1f1844a2b78', '00bb96a5a68f4fa5bc5c5dc66ce314d2']


## Valid encounters

In [67]:
def fix_json_commas(file_path, output_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()

    with open(output_path, 'w') as f_out:
        f_out.write('[\n')  # Start array

        inside_object = False  # Track whether we're inside an object
        first_object = True    # Track the first object to avoid unnecessary commas

        for line in lines:
            stripped_line = line.strip()

            # Detect start of an object
            if stripped_line == '{':
                if not first_object:
                    f_out.write(',\n')  # Add comma before starting a new object, but not for the first object
                first_object = False
                inside_object = True
                f_out.write(line)

            # Detect end of an object
            elif stripped_line == '}':
                inside_object = False
                f_out.write(line)

            # Skip lines that are just commas
            elif stripped_line == ',':
                continue

            # Copy lines within the object directly
            else:
                f_out.write(line)

        f_out.write('\n]')  # End array

# Example file paths
input_file = os.path.join(dataset_folder, 'valid_encounters.json')
output_file = os.path.join(dataset_folder, 'cleaned_valid_encounters.json')

# Run the fix
fix_json_commas(input_file, output_file)

print(f"Fixed JSON file written to: {output_file}")

Fixed JSON file written to: /Users/renatoboemer/code/developer/aot-object-tracking-aws/raw_data/airborne_data/cleaned_valid_encounters.json


In [68]:
# load and inspect cleaned_valid_encounters
valid_encounters_path = os.path.join(dataset_folder, 'cleaned_valid_encounters.json')
with open(valid_encounters_path, 'r') as f:
    valid_encounters_data = json.load(f)

# Inspect the data
print(type(valid_encounters_data))  # Check the type of the loaded data (should be a list)
print(len(valid_encounters_data))   # Number of entries in the JSON array
print(valid_encounters_data[0])

<class 'list'>
879
{'encounter_id': 0.0, 'flight_id': '0001ba865c8e410e88609541b8f55ffc', 'framemin': 229, 'framemax': 325, 'framecount': 97, 'enc_len_with_gaps': 97, 'is_above_horizon': -1.0, 'min_enc_range': 188.5532181915, 'max_enc_range': 698.0991541037, 'median_enc_range': 437.8502544826, 'mean_enc_range': 439.4182395796, 'img_name': ['15549773720667933040001ba865c8e410e88609541b8f55ffc.png', '15549773721671169930001ba865c8e410e88609541b8f55ffc.png', '15549773722724494270001ba865c8e410e88609541b8f55ffc.png', '15549773723943993760001ba865c8e410e88609541b8f55ffc.png', '15549773724669998190001ba865c8e410e88609541b8f55ffc.png', '15549773725845259780001ba865c8e410e88609541b8f55ffc.png', '15549773726991626310001ba865c8e410e88609541b8f55ffc.png', '15549773727668135120001ba865c8e410e88609541b8f55ffc.png', '15549773728666936070001ba865c8e410e88609541b8f55ffc.png', '15549773729765164320001ba865c8e410e88609541b8f55ffc.png', '15549773731063586990001ba865c8e410e88609541b8f55ffc.png', '15549773

In [69]:
flight_ids = [encounter['flight_id'] for encounter in valid_encounters_data]
flight_ids[:5]

['0001ba865c8e410e88609541b8f55ffc',
 '0011f3f114a741b5b02326c9e96e597a',
 '0036dcc16c474b1abaf14d1735a7e1cf',
 '006345553ed64e77a52d94035e1e747a',
 '0071600242424070b5e0c8cb237192db']