In [None]:
import glob
import fiftyone as fo
import os
import requests
import json
from pathlib import Path
from eyepop import EyePopSdk
from eyepop.data.data_endpoint import DataEndpoint
from eyepop.data.data_jobs import DataJob
from eyepop.data.data_types import DatasetCreate, AssetImport, AutoAnnotateParams, Dataset, Asset, ChangeEvent, ChangeType, DatasetUpdate, UserReview, Model, ModelCreate, ModelStatus
import asyncio
from tqdm.notebook import tqdm
import hashlib
from fiftyone import Dataset
from fiftyone.core.labels import Detection, Detections
from fiftyone.core.metadata import ImageMetadata

# 🔧 Configuration
accountUUID = "034cb8e37f5444e98a78f1be65fd0bff"
fullModelUUID = "0687884e19e87bb38000eda578e1e386"
fullModelQuantizedUUID = "" #technically same 
# https://localhost:3000/wizardModel?type=object&step=deploy&accountUUID=49326f2e085a46c39ba73f91c52e436c&modelUUID=067ad32ae1bf704b80003bc57fbe3694&datasetUUID=066f5aed921579b280009378da7c0049
sub300ModelUUID = "" #blythe running this
sub300ModelQuantizedUUID = "" # haven't quantized yet

datasetUUID = "068775771e0a7d2e8000aa10e818018a"
fullDatasetQuantizedUUID = ""
sub300datasetUUID = "06881580891175618000bf9989ccd2e5"
sub300datasetQuantizedUUID = "" # haven't quantized yet

modelLabel = {
    datasetUUID: "EyePop.ai Trained Model (full)",
    fullDatasetQuantizedUUID: "EyePop.ai Trained Model (full, quantized)",
    sub300datasetUUID: "EyePop.ai Trained Model (sub-300)",
    sub300datasetQuantizedUUID: "EyePop.ai Trained Model (sub-300, quantized)",
}

#apikey = "AAE_w6lCcrCa27chNAbZO-WdZ0FBQUFBQmwyUFk5bmtLZnJBQ2RFVWVDbzU1MnkwTUMzYXhQWjA4a0ZEczFKWWdONjdRS0NGWUZ5aF90aXVQZ3FrcWdkZWwwUEx6Q0luM0F3b3ItMjdqRmhUQkxyTWVvSndFLWRCUENjZGNlanZhbGhRTDdtV289"
apikey = "AAGcsWj8N2PlKQl9c9ydz3QFZ0FBQUFBQm1mZDB5eDUwalNlYi12NWotd3hsVGJiMW1sVXF1dE9aOU9oSGVBOWtBQXoxZmNjUE5Nb1YzY3RROUdzbVUwUkZtcDhZcG5vSWROTzR1TU8ybGhZckx6RTgzYVZwMjZEREZjalZubnpYaUNMWVdBODg9"
cache_directory = "./.cache/voxel51/" + accountUUID + "/" + datasetUUID

def checkCacheDirectory():
    if not os.path.exists(os.path.expanduser(cache_directory)):
        print("Cache directory does not exist. Creating it...")
        os.makedirs(os.path.expanduser(cache_directory))
    cache_files = os.listdir(os.path.expanduser(cache_directory))
    if not cache_files:
        print("Cache directory is empty.")
        return False
    print("Cache directory contains files:", cache_files)    
    return True

def convert_annotations_to_coco(asset_uuid, annotations, image_id, starting_annotation_id=1):
    if(not annotations or len(annotations) == 0):
        print("No annotations found.")
        return None
    
    image_width = annotations[0].annotation.source_height
    image_height = annotations[0].annotation.source_height
    
    coco = {
        "images": [
            {
                "id": image_id,
                "width": image_width,
                "height": image_height,
                "file_name": f"{asset_uuid}.jpg",
            }
        ],
        "annotations": [],
        "categories": []
    }

    category_name_to_id = {}
    annotation_id = starting_annotation_id

    for annotation in annotations:
        if not hasattr(annotation, "annotation") or not hasattr(annotation.annotation, "objects"):
            continue

        for obj in annotation.annotation.objects:
            label = obj.classLabel
            if label not in category_name_to_id:
                category_id = len(category_name_to_id) + 1
                category_name_to_id[label] = category_id
                coco["categories"].append({
                    "id": category_id,
                    "name": label,
                    "supercategory": "none"
                })
            else:
                category_id = category_name_to_id[label]

            bbox = [
                obj.x,
                obj.y,
                obj.width,
                obj.height
            ]
            area = obj.width * obj.height

            coco["annotations"].append({
                "id": annotation_id,
                "image_id": image_id,
                "category_id": category_id,
                "bbox": bbox,
                "area": area,
                "iscrowd": 0
            })
            annotation_id += 1

    return coco

async def downloadDatasetToCache(accountUUID, datasetUUID):
    print("Downloading dataset from EyePop.ai to cache directory...")
    print("Account UUID:", accountUUID)
    print("Dataset UUID:", datasetUUID)
    print("Cache directory:", cache_directory)
    print("Using API key:", apikey)

    async with EyePopSdk.dataEndpoint(
        #eyepop_url = "https://dataset-api.staging.eyepop.xyz/",
        eyepop_url = "https://web-api.staging.eyepop.xyz/",
        secret_key=apikey, 
        account_id=accountUUID, 
        is_async=True, 
        disable_ws=False) as endpoint:

        asset_list = await endpoint.list_assets(dataset_uuid=datasetUUID, include_annotations=True)
        print(f"Found {len(asset_list)} assets in the dataset.")
        
        os.makedirs(os.path.expanduser(cache_directory), exist_ok=True)
        cache_path = Path(os.path.expanduser(cache_directory))
        images_dir = cache_path / "data"
        annotations_dir = cache_path / "annotations"
        images_dir.mkdir(exist_ok=True)
        annotations_dir.mkdir(exist_ok=True)

        # write entire asset_list to a JSON file for debugging
        asset_list_path = cache_path / "eyepop_asset_list.json"
        asset_list_val_path = cache_path / "eyepop_asset_list_val.json"
        def default_serializer(obj):
            if hasattr(obj, "isoformat"):
                return obj.isoformat()
            raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")
        with open(asset_list_path, "w") as f:
            json.dump([asset.model_dump() for asset in asset_list], f, indent=2, default=default_serializer)
        print(f"Asset list saved to {asset_list_path}")

        # make a copy of the asset_list and filter out assets that do not have "partition": "val"
        asset_list_val = [asset for asset in asset_list if asset.partition == "val"]
        with open(asset_list_val_path, "w") as f:
            json.dump([asset.model_dump() for asset in asset_list_val], f, indent=2, default=default_serializer)
        print(f"Asset list VAL saved to {asset_list_val_path}")

        combined_coco = {
            "images": [],
            "annotations": [],
            "categories": []
        }
        category_name_to_id = {}
        uuid_to_image_id = {}
        annotation_id = 1
        next_image_id = 1

        pbar = tqdm(asset_list, desc="Starting downloads")
        for asset in pbar:
            pbar.set_description(f"Downloading {asset.uuid}")

            image_path = images_dir / f"{asset.uuid}.jpg"
            if not image_path.exists():
                image_response = await endpoint.download_asset(asset.uuid, datasetUUID, dataset_version=None)
                image_bytes = await image_response.read()
                with open(image_path, "wb") as f:
                    f.write(image_bytes)

            # Assign numeric image_id
            image_id = next_image_id
            uuid_to_image_id[asset.uuid] = image_id
            next_image_id += 1

            metadata = convert_annotations_to_coco(
                asset_uuid=asset.uuid,
                annotations=asset.annotations,
                image_id=image_id,
                starting_annotation_id=annotation_id
            )

            if not metadata or "images" not in metadata or "annotations" not in metadata:
                print(f"⚠️ Skipping asset {asset.uuid} due to unexpected metadata format")
                continue

            combined_coco["images"].extend(metadata["images"])

            for ann in metadata["annotations"]:
                ann["id"] = annotation_id
                annotation_id += 1
                combined_coco["annotations"].append(ann)

            for cat in metadata.get("categories", []):
                if cat["name"] not in category_name_to_id:
                    cat_id = len(category_name_to_id) + 1
                    category_name_to_id[cat["name"]] = cat_id
                    combined_coco["categories"].append({
                        "id": cat_id,
                        "name": cat["name"],
                        "supercategory": "none"
                    })

        # Normalize category IDs across annotations
        name_to_id = {cat["name"]: cat["id"] for cat in combined_coco["categories"]}
        for ann in combined_coco["annotations"]:
            cat_id = ann["category_id"]
            cat_name = next((c["name"] for c in combined_coco["categories"] if c["id"] == cat_id), None)
            if cat_name:
                ann["category_id"] = name_to_id[cat_name]

        annotations_path = annotations_dir / "annotations.json"
        with open(annotations_path, "w") as f:
            json.dump(combined_coco, f, indent=2)

        await endpoint.disconnect()

    print("✅ Dataset downloaded successfully.")
    dataset = export_fiftyone_format(
        asset_list=asset_list_val,
        cache_dir=cache_directory,
        datasetUUID=datasetUUID
    )
    print("✅ FiftyOne dataset exported.")
    return dataset


def export_fiftyone_format(asset_list, cache_dir, datasetUUID):
    name = f"eyepop_dataset_{datasetUUID}"
    if name in fo.list_datasets():
        print(f"Dataset {name} already exists. Deleting it...")
        fo.delete_dataset(name)

    dataset = Dataset(name=name)

    for asset in asset_list:
        image_path = os.path.join(cache_dir, "data", f"{asset.uuid}.jpg")
        if not os.path.exists(image_path):
            continue

        sample = fo.Sample(
            filepath=image_path,
            tags=[asset.partition] if hasattr(asset, "partition") else [],
            metadata=ImageMetadata(),  # ✅ use correct type
        )

        # ✅ Store custom metadata as separate fields
        sample["partition"] = getattr(asset, "partition", None)
        sample["uuid"] = asset.uuid
        
        detections = []
        predictions = []
        if hasattr(asset, "annotations"):
            for annotation in asset.annotations:
                if not hasattr(annotation, "annotation") or not hasattr(annotation.annotation, "objects"):
                    continue
                for obj in annotation.annotation.objects:
                    if(obj.classLabel != "stenosis"):
                        continue

                    if(obj.confidence < 0.5):
                        continue

                    if annotation.type == "prediction" or annotation.type == "auto":
                        predictions.append(
                            Detection(
                                label=obj.classLabel  +" - "+ modelLabel.get(datasetUUID, "Unknown Model"),
                                bounding_box=[obj.x, obj.y, obj.width, obj.height],
                                confidence=obj.confidence if hasattr(obj, "confidence") else 0
                            )
                        )
                    elif annotation.type == "ground_truth":
                        #if(annotation.source is None):
                        detections.append(
                            Detection(
                                label=obj.classLabel + (annotation.source if annotation.source is not None else ""),
                                bounding_box=[obj.x, obj.y, obj.width, obj.height],
                                confidence=1
                            )
                        )

        if detections:
            sample["ground_truth"] = Detections(detections=detections)
            
        if predictions:
            sample["predictions"] = Detections(detections=predictions)

        dataset.add_sample(sample)

    export_path = os.path.join(cache_dir, "fiftyone_dataset")
    dataset.export(
        export_dir=export_path,
        dataset_type=fo.types.FiftyOneDataset,
        label_field="ground_truth"
    )

    print(f"✅ FiftyOne dataset exported to {export_path}")
    return dataset

In [2]:

async def run():
    print("Cache directory:", cache_directory)
    dataset = await downloadDatasetToCache(accountUUID, datasetUUID)

    print("Images directory:", os.listdir(os.path.join(cache_directory, "data"))[:5])
    print("Annotations file exists:", os.path.exists(os.path.join(cache_directory, "annotations", "annotations.json")))
    with open(os.path.join(cache_directory, "annotations", "annotations.json")) as f:
        coco = json.load(f)
        print("Total images in COCO:", len(coco["images"]))
        print("Total annotations:", len(coco["annotations"]))

   
    print("Loading dataset into FiftyOne...")    
    
    session = fo.launch_app(dataset, browser=True)
    
await run()

Cache directory: ./.cache/voxel51/034cb8e37f5444e98a78f1be65fd0bff/068775771e0a7d2e8000aa10e818018a
Downloading dataset from EyePop.ai to cache directory...
Account UUID: 034cb8e37f5444e98a78f1be65fd0bff
Dataset UUID: 068775771e0a7d2e8000aa10e818018a
Cache directory: ./.cache/voxel51/034cb8e37f5444e98a78f1be65fd0bff/068775771e0a7d2e8000aa10e818018a
Using API key: AAGcsWj8N2PlKQl9c9ydz3QFZ0FBQUFBQm1mZDB5eDUwalNlYi12NWotd3hsVGJiMW1sVXF1dE9aOU9oSGVBOWtBQXoxZmNjUE5Nb1YzY3RROUdzbVUwUkZtcDhZcG5vSWROTzR1TU8ybGhZckx6RTgzYVZwMjZEREZjalZubnpYaUNMWVdBODg9
Found 3000 assets in the dataset.
Asset list saved to .cache/voxel51/034cb8e37f5444e98a78f1be65fd0bff/068775771e0a7d2e8000aa10e818018a/eyepop_asset_list.json
Asset list VAL saved to .cache/voxel51/034cb8e37f5444e98a78f1be65fd0bff/068775771e0a7d2e8000aa10e818018a/eyepop_asset_list_val.json


Starting downloads:   0%|          | 0/3000 [00:00<?, ?it/s]

✅ Dataset downloaded successfully.
Dataset eyepop_dataset_068775771e0a7d2e8000aa10e818018a already exists. Deleting it...
Directory './.cache/voxel51/034cb8e37f5444e98a78f1be65fd0bff/068775771e0a7d2e8000aa10e818018a/fiftyone_dataset' already exists; export will be merged with existing files
Exporting samples...
 100% |████████████████████| 179/179 [173.7ms elapsed, 0s remaining, 1.0K docs/s]       
✅ FiftyOne dataset exported to ./.cache/voxel51/034cb8e37f5444e98a78f1be65fd0bff/068775771e0a7d2e8000aa10e818018a/fiftyone_dataset
✅ FiftyOne dataset exported.
Images directory: ['0687757e173e722e800053a8af29fa7f.jpg', '068775798299716580005f08e7ff5011.jpg', '0687757893eb72348000888fd91003ca.jpg', '0687757b64df713f8000ae1eb6450822.jpg', '0687757bb58c7b488000a7233d92f98c.jpg']
Annotations file exists: True
Total images in COCO: 3000
Total annotations: 65628
Loading dataset into FiftyOne...


In [3]:
import webbrowser

webbrowser.open("http://localhost:5151/")

True

In [4]:
import os
from pathlib import Path
import json

def extract_val_assets():
    dataset_path = Path(os.path.expanduser(cache_directory)) / "eyepop_asset_list_val.json"
    if not dataset_path.exists():
        print(f"Dataset file {dataset_path} does not exist.")
        return []

    with open(dataset_path, "r") as f:
        data = json.load(f)  # Load as full array
    
    assets = [Asset.model_validate(obj) for obj in data]  # or pydantic.parse_obj_as(List[Asset], data)
    return assets

def extract_coco_val_annotations(val_assets):
    annotations_path = Path(os.path.expanduser(cache_directory)) / "annotations" / "annotations.json"
    if not annotations_path.exists():
        print(f"Annotations file {annotations_path} does not exist.")
        return None

    with open(annotations_path, "r") as f:
        coco_data = json.load(f)

    # Filter annotations for the validation assets
    asset_uuids = {asset.uuid for asset in val_assets}
    coco_data["images"] = [img for img in coco_data["images"] if img["file_name"].replace(".jpg", "") in asset_uuids]

    #save filtered annotations
    filtered_annotations_path = Path(os.path.expanduser(cache_directory)) / "annotations" / "annotations_val.json"
    with open(filtered_annotations_path, "w") as f:
        json.dump(coco_data, f, indent=2)
    print(f"Filtered annotations saved to {filtered_annotations_path}")
    
    return coco_data

async def run():
    assets = extract_val_assets()
    print(f"Extracted {len(assets)} assets from the dataset.")
    #list all asset UUIDs
    # for asset in assets:
    #     print(f"Asset UUID: {asset.uuid}, Partition: {asset.partition}, Annotations: {len(asset.annotations)}")
    
    coco_annotations = extract_coco_val_annotations(assets)

    print("Loading dataset into FiftyOne...")    
    dataset = fo.Dataset.from_dir(
        dataset_dir=os.path.expanduser(cache_directory),
        dataset_type=fo.types.COCODetectionDataset,
        labels_path="annotations/annotations_val.json",
    )
    session = fo.launch_app(dataset, browser=True)
    
# await run()