# 🧠 EyePop + Voxel51 Dataset Download & Visualization

This notebook downloads a dataset from EyePop.ai, extracts the assets and annotations,
converts them to COCO format, and visualizes them using FiftyOne.


In [None]:
%pip install fiftyone eyepop tqdm

In [None]:
import glob
import fiftyone as fo
import os
import requests
import json
from pathlib import Path
from eyepop import EyePopSdk
from eyepop.data.data_endpoint import DataEndpoint
from eyepop.data.data_jobs import DataJob
from eyepop.data.data_types import DatasetCreate, AssetImport, AutoAnnotateParams, Dataset, Asset, ChangeEvent, ChangeType, DatasetUpdate, UserReview, Model, ModelCreate, ModelStatus
import asyncio
from tqdm.notebook import tqdm
import hashlib


In [None]:
# 🔧 Configuration
accountUUID = "60107c508dde44b19c3e7d1f30a1e2c0"
datasetUUID = "0686c4296f957d3980005a829876d16e"
apikey = "AAE_w6lCcrCa27chNAbZO-WdZ0FBQUFBQmwyUFk5bmtLZnJBQ2RFVWVDbzU1MnkwTUMzYXhQWjA4a0ZEczFKWWdONjdRS0NGWUZ5aF90aXVQZ3FrcWdkZWwwUEx6Q0luM0F3b3ItMjdqRmhUQkxyTWVvSndFLWRCUENjZGNlanZhbGhRTDdtV289"
cache_directory = "./.cache/voxel51/" + accountUUID + "/" + datasetUUID


In [None]:
def checkCacheDirectory():
    if not os.path.exists(os.path.expanduser(cache_directory)):
        print("Cache directory does not exist. Creating it...")
        os.makedirs(os.path.expanduser(cache_directory))
    cache_files = os.listdir(os.path.expanduser(cache_directory))
    if not cache_files:
        print("Cache directory is empty.")
        return False
    print("Cache directory contains files:", cache_files)    
    return True


In [None]:
def convert_annotations_to_coco(asset_uuid, annotations, image_id, starting_annotation_id=1):
    if(not annotations or len(annotations) == 0):
        print("No annotations found.")
        return None
    
    image_width = annotations[0].annotation.source_height
    image_height = annotations[0].annotation.source_height
    
    coco = {
        "images": [
            {
                "id": image_id,
                "width": image_width,
                "height": image_height,
                "file_name": f"{asset_uuid}.jpg",
            }
        ],
        "annotations": [],
        "categories": []
    }

    category_name_to_id = {}
    annotation_id = starting_annotation_id

    for annotation in annotations:
        if not hasattr(annotation, "annotation") or not hasattr(annotation.annotation, "objects"):
            continue

        for obj in annotation.annotation.objects:
            label = obj.classLabel
            if label not in category_name_to_id:
                category_id = len(category_name_to_id) + 1
                category_name_to_id[label] = category_id
                coco["categories"].append({
                    "id": category_id,
                    "name": label,
                    "supercategory": "none"
                })
            else:
                category_id = category_name_to_id[label]

            bbox = [
                obj.x,
                obj.y,
                obj.width,
                obj.height
            ]
            area = obj.width * obj.height

            coco["annotations"].append({
                "id": annotation_id,
                "image_id": image_id,
                "category_id": category_id,
                "bbox": bbox,
                "area": area,
                "iscrowd": 0
            })
            annotation_id += 1

    return coco

In [None]:


async def downloadDatasetToCache():
    print("Downloading dataset from EyePop.ai to cache directory...")

    async with EyePopSdk.dataEndpoint(secret_key=apikey, account_id=accountUUID, is_async=True, disable_ws=False) as endpoint:
        asset_list = await endpoint.list_assets(dataset_uuid=datasetUUID, include_annotations=True)
        print(f"Found {len(asset_list)} assets in the dataset.")
        
        os.makedirs(os.path.expanduser(cache_directory), exist_ok=True)
        cache_path = Path(os.path.expanduser(cache_directory))
        images_dir = cache_path / "data"
        annotations_dir = cache_path / "annotations"
        images_dir.mkdir(exist_ok=True)
        annotations_dir.mkdir(exist_ok=True)

        combined_coco = {
            "images": [],
            "annotations": [],
            "categories": []
        }
        category_name_to_id = {}
        uuid_to_image_id = {}
        annotation_id = 1
        next_image_id = 1

        pbar = tqdm(asset_list, desc="Starting downloads")
        for asset in pbar:
            pbar.set_description(f"Downloading {asset.uuid}")

            image_path = images_dir / f"{asset.uuid}.jpg"
            if not image_path.exists():
                image_response = await endpoint.download_asset(asset.uuid, datasetUUID, dataset_version=None)
                image_bytes = await image_response.read()
                with open(image_path, "wb") as f:
                    f.write(image_bytes)

            # Assign numeric image_id
            image_id = next_image_id
            uuid_to_image_id[asset.uuid] = image_id
            next_image_id += 1

            metadata = convert_annotations_to_coco(
                asset_uuid=asset.uuid,
                annotations=asset.annotations,
                image_id=image_id,
                starting_annotation_id=annotation_id
            )

            if not metadata or "images" not in metadata or "annotations" not in metadata:
                print(f"⚠️ Skipping asset {asset.uuid} due to unexpected metadata format")
                continue

            combined_coco["images"].extend(metadata["images"])

            for ann in metadata["annotations"]:
                ann["id"] = annotation_id
                annotation_id += 1
                combined_coco["annotations"].append(ann)

            for cat in metadata.get("categories", []):
                if cat["name"] not in category_name_to_id:
                    cat_id = len(category_name_to_id) + 1
                    category_name_to_id[cat["name"]] = cat_id
                    combined_coco["categories"].append({
                        "id": cat_id,
                        "name": cat["name"],
                        "supercategory": "none"
                    })

        # Normalize category IDs across annotations
        name_to_id = {cat["name"]: cat["id"] for cat in combined_coco["categories"]}
        for ann in combined_coco["annotations"]:
            cat_id = ann["category_id"]
            cat_name = next((c["name"] for c in combined_coco["categories"] if c["id"] == cat_id), None)
            if cat_name:
                ann["category_id"] = name_to_id[cat_name]

        annotations_path = annotations_dir / "annotations.json"
        with open(annotations_path, "w") as f:
            json.dump(combined_coco, f, indent=2)

        await endpoint.disconnect()

    print("✅ Dataset downloaded successfully.")

In [None]:
async def run():
    print("Cache directory:", cache_directory)
    await downloadDatasetToCache()

    print("Images directory:", os.listdir(os.path.join(cache_directory, "data"))[:5])
    print("Annotations file exists:", os.path.exists(os.path.join(cache_directory, "annotations", "annotations.json")))
    with open(os.path.join(cache_directory, "annotations", "annotations.json")) as f:
        coco = json.load(f)
        print("Total images in COCO:", len(coco["images"]))
        print("Total annotations:", len(coco["annotations"]))

    print("Loading dataset into FiftyOne...")    
    dataset = fo.Dataset.from_dir(
        dataset_dir=os.path.expanduser(cache_directory),
        dataset_type=fo.types.COCODetectionDataset,
        labels_path="annotations/annotations.json",
    )
    session = fo.launch_app(dataset, browser=True)
    
await run()


In [None]:
import webbrowser

webbrowser.open("http://localhost:5151/")