In [None]:
!pip install --upgrade roboflow pycocotools dioptra

In [None]:
####
#
# Setup credentials
#
####

import os

roboflow_api_key = '....'
os.environ['DIOPTRA_API_KEY'] = '....'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '....'

In [None]:
####
#
# Download Roboflow project
#
####

import roboflow

roboflow_project_name = '...'
project_version = '...'

rf = roboflow.Roboflow(api_key=roboflow_api_key)
project = rf.project(roboflow_project_name)
version = project.version(project_version)
version.download('coco')

In [None]:
####
#
# Convert from Coco format to Dioptra format
#
####

from pycocotools.coco import COCO
import os
import json


#
# Utility method to convert robo flow file path to gs uris
#

img_prefix  = 'gs://....'

def process_img_path(img_path):
    original_img_name = img_path.split('.')[0].replace('_jpg', '.jpg')
    return f'{img_prefix}{original_img_name}'


project_path = f'{version.project}-{version.version}'
dataset_tag = {
    'dataset_name': roboflow_project_name
}

my_records = []

for split in ['train', 'valid', 'test']:
    split_dir = os.path.join(project_path, split)
    if not os.path.isdir(split_dir):
        continue
    annottation_file = os.path.join(split_dir, '_annotations.coco.json')
    coco = COCO(annottation_file)

    for image_id in coco.imgs.keys():
        image_info = coco.imgs[image_id]
        annotations = coco.loadAnns(coco.getAnnIds([image_id]))
        my_annotations = [{
            'task_type': 'INSTANCE_SEGMENTATION',
            'bboxes': []
        }]
        for annotation in annotations:
            category = coco.cats[annotation['category_id']]['name']
            if 'segmentation' in annotation and annotation['segmentation'] != []:
                my_annotation = {
                    'class_name': category
                }
                my_annotation['coco_polygon'] = annotation['segmentation'][0]
                my_annotations[0]['bboxes'].append(my_annotation)
              
        my_records.append({
            'type': 'IMAGE',
            'metadata': {
                'uri': process_img_path(image_info['file_name']),
                'width': image_info['width'],
                'height': image_info['height'],
            },
            'groundtruths': my_annotations,
            'tags': {
                **dataset_tag,
                'data_split': split
            }
        })


In [None]:
####
#
# Delete the lake (Optional)
#
###

# from dioptra.lake.utils import delete_datapoints

# delete_datapoints(
#     [{'left': 'tags.name', 'op': '=', 'right': 'dataset_name'},
#      {'left': 'tags.value', 'op': '=', 'right': roboflow_project_name}])

# print('done')

In [None]:
####
#
# Upload to Lake ML
#
###

from dioptra.lake.utils import upload_to_lake, wait_for_upload

wait_for_upload(upload_to_lake(my_records))

In [None]:
####
#
# Create a Dioptra dataset
#
###


from dioptra.lake.datasets import Dataset as DioptraDataset

my_dataset = DioptraDataset()
my_dataset.get_or_create('chess board')

In [None]:
####
#
# Create a Random Miner
#
###


from dioptra.miners.random_miner import RandomMiner

my_miner = RandomMiner(
    display_name='My random Miner',
    select_filters=[
        {'left': 'tags.name', 'op': '=', 'right': 'dataset_name'},
        {'left': 'tags.value', 'op': '=', 'right': roboflow_project_name}],
    size=10)
my_miner.run()

In [None]:
####
#
# Add to the dataset and commit a new version
#
###

my_dataset.add_datapoints(my_miner.get_results())
my_dataset.commit('my first run')

In [None]:
####
#
# Download the dataset, get the groundtruth and create a pytorch dataset
#
###

from dioptra.lake.utils import select_groundtruths, join_on_datapoints
from dioptra.lake.torch.object_store_datasets import ImageDataset

datapoints_df = my_dataset.download_datapoints()
gt_df = select_groundtruths(
    [{'left': 'datapoint', 'op': 'in', 'right': list(my_dataset.download_datapoints()['id'])}],
    fields=['*', 'bboxes.*'])

joined_df = join_on_datapoints(datapoints_df, gt_df)

my_torch_dataset = ImageDataset(joined_df)
my_torch_dataset[0]['image']