<a target="_blank" href="https://colab.research.google.com/github/biigle/community-scripts/blob/async-grab-largo-patches/grab-largo-patches/grab-largo-patches.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

### Install packages

In [None]:
%pip install nest-asyncio       #we only need this for jupyter notebooks
%pip install aiohttp
%pip install aiofiles
%pip install tqdm
!wget https://raw.githubusercontent.com/biigle/community-scripts/master/biigle/biigle.py

### Import all needed modules

In [None]:
from biigle import Api
import os
import nest_asyncio                   #we only need this for jupyter notebooks         
nest_asyncio.apply()
import asyncio
import aiohttp 
import aiofiles
import shutil
import tqdm

### Enter Biigle Credentials and the volume/project and label id you are interested in

In [None]:
# # Enter your user email address here.
email = ''
# # Enter your API token here.
token = ''
# Type of the model to process (either project or volume)
model_type = 'project'
# ID of the project/volume to process.
model_id = 0
# ID of the label to fetch Largo patches for. select -1 for all labels.
label_id = -1

### Create api endpoint

In [None]:
api = Api(email,token)

### If label ids is set to -1 fetch all available labels. Get the labels in said project or volume.

In [None]:
labels = []
#all labels

# if a project is requested get all label-trees which are used and their respective labels
if model_type == 'project': #
    labelresponse=api.get(f"projects/{model_id}/label-trees").json()
    labels = [[label['id'],label['name']] for labeltree in labelresponse for label in labeltree['labels']]
# if a volume is requested get all used labels
else:
    labelresponse=api.get(f"volumes/{model_id}/annotation-labels").json()
    labels = [[label['id'],label['name']] for label in labelresponse]

#only one label
if label_id != -1:
    # as there is no api function to provide the name for a label id, we just filter the results of all laebels
    labels = [[label[0],label[1]] for label in labels if label[0]==29]

### define function to async download images

In [None]:
def asyncDownloadImages(annotations,id,name, numberConccurentTasks=10):
    os.makedirs(f"patches/{id}_{name}", exist_ok=True)
    sema = asyncio.BoundedSemaphore(numberConccurentTasks)

    async def fetch_file(annotation_id, image_uuid):
        patch_url = 'https://biigle.de/storage/largo-patches/{}/{}/{}/{}.jpg'
        url = patch_url.format(image_uuid[:2], image_uuid[2:4], image_uuid, annotation_id)
        fname = url.split("/")[-1]
        async with sema, aiohttp.ClientSession() as session:
            async with session.get(url) as resp:
                assert resp.status == 200
                data = await resp.read()

        async with aiofiles.open(
            os.path.join(f"patches/{id}_{name}", fname), "wb"
        ) as outfile:
            await outfile.write(data)

    loop = asyncio.get_event_loop()
    tasks = [loop.create_task(fetch_file(annotation_id, image_uuid )) for annotation_id, image_uuid  in annotations.items()]
    loop.run_until_complete(asyncio.wait(tasks))

### fetch annotation ids and respective image uuids and execute the asyncDownloadImages function to download the images

In [None]:
for lbl_id,lbl_name in tqdm.tqdm(labels):
    annotations = api.get(f"{model_type}s/{model_id}/image-annotations/filter/label/{lbl_id}").json()
    if annotations:
        asyncDownloadImages(annotations,lbl_id,lbl_name)

### after all is donwloaded zip it to one file for easier downloading


In [None]:
shutil.make_archive('patches', 'zip', 'patches')