# Export Images and Annotations from Datature Nexus using Datature Python SDK

In [None]:
#!/usr/bin/env python
# -*-coding:utf-8 -*-
"""
  ████
██    ██   Datature
  ██  ██   Powering Breakthrough AI
    ██

@File    :   export_images_annotations.ipynb
@Author  :   Wei Loon Cheng
@Version :   1.0
@Contact :   hello@datature.io
@License :   Apache License 2.0
@Desc    :   Export images and annotations from Datature Nexus using Python SDK
"""

### Install prerequisites

In [1]:
%pip install datature
%pip install wget

Looking in indexes: https://pypi.org/simple, https://asia-python.pkg.dev/datature-puppeteer/python/simple/
Note: you may need to restart the kernel to use updated packages.
Looking in indexes: https://pypi.org/simple, https://asia-python.pkg.dev/datature-puppeteer/python/simple/
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import urllib
from pathlib import Path

from datature.nexus import Client, ApiTypes

In [3]:
# Change this to your project secret key to the new project you want to upload your sliced images to
# It can be located on Nexus under the Integrations tab in your project page
SECRET_KEY = "<YOUR_SECRET_KEY>"

# Change this to your project ID on Nexus. This can be found via two methods:
# 1. In the URL of the project page (https://nexus.datature.io/project/<YOUR_PROJECT_ID>)
# 2. Project Key in the Integrations page
PROJECT_ID = "proj_<YOUR_PROJECT_ID>"

In [4]:
client = Client(SECRET_KEY)
project = client.get_project(PROJECT_ID)

### Download images

Images from your Nexus project will be downloaded and saved to the `images` folder. Datature Python SDK has an asset pagination limit of 100, so multiple calls will made to the API to download all images if your project has more than 100 images.

In [5]:
# Change this to the directory you want to store your images
image_dir = Path("images")
os.makedirs(image_dir, exist_ok=True)

# Download images from Nexus
print("Downloading images from Nexus...")
urls = []
assets = project.assets.list({"limit": 100})
urls.extend([(asset["filename"], asset["url"]) for asset in assets["data"]])
next_page = assets["next_page"]

while next_page:
    assets = project.assets.list(
        {
            "limit": 100,
            "page": next_page,
        }
    )
    urls.extend([(asset["filename"], asset["url"]) for asset in assets["data"]])
    next_page = assets["next_page"]

for filename, url in urls:
    urllib.request.urlretrieve(url, os.path.join(image_dir, filename))
print(f"{len(urls)} images saved to: {image_dir}")

Downloading images from Nexus...
100 images saved to: images


### Export and download annotations

Datature supports the annotation formats listed [here](https://developers.datature.io/docs/exporting-annotations#annotation-formats). You can choose various formats depending on whether your annotations are bounding boxes or polygons.

In [6]:
print("Downloading annotations from Nexus...")
annotation_dir = Path("annotations")
annotation_export_metadata = ApiTypes.AnnotationExportMetadata(
    # change this to your desired annotation format
    # supported formats are listed here
    # https://developers.datature.io/docs/types-sdk-functions#annotationformat
    format="coco",
    options=ApiTypes.AnnotationExportOptions(
        # 0.0 means all annotations are stored in the same file,
        # change this value if you want a train-test split
        split_ratio=0.0,
        normalized=False,
        seed=0,
    ),
)
operation = project.annotations.create_export(annotation_export_metadata)
project.operations.wait_until_done(operation["id"])
annotation_results = project.annotations.download_exported_file(
    operation["id"], annotation_dir
)
annotation_filename = annotation_results["file_names"][-1]
print(f"Annotations saved to: {annotation_filename}")

Downloading annotations from Nexus...
Annotations saved to: 41b8e35ec179b34adf19e3c670daa3de/dump.json
