In [7]:
import oci
import json

In [9]:
OCI_CONFIG = '~/.oci/config'
COMPARMENT_OCID = 'ocid1.compartment.****'
OBJECT_STORAGE_NAMESPACE='namespace'
BUCKET_NAME='my-bucket'
DATASET_NAME='my-dataset'

DATASET_FILENAME = 'dataset.jsonl'
RECORDS_FILENAME = 'records.jsonl'

config = oci.config.from_file(OCI_CONFIG)


In [4]:
with open('dataset.jsonl') as f:
    dataset = json.load(f)

records = {}
with open('records.jsonl') as f:
    for line in f:
        record_metadata = json.loads(line)
        records[record_metadata['sourceDetails']['path']] = record_metadata

In [10]:
from oci.data_labeling_service import DataLabelingManagementClient, DataLabelingManagementClientCompositeOperations
from oci.data_labeling_service.models import CreateDatasetDetails, ImageDatasetFormatDetails, \
    ObjectStorageSourceDetails, InitialRecordGenerationConfiguration, Label, LabelSet, AnnotationFormat

# Data labeling clients
data_labeling_management_client = DataLabelingManagementClient(config)
data_labeling_composite_ops = DataLabelingManagementClientCompositeOperations(data_labeling_management_client)


# Request param with object storage detail
dataset_source_details = ObjectStorageSourceDetails(source_type='OBJECT_STORAGE', namespace=OBJECT_STORAGE_NAMESPACE, bucket=BUCKET_NAME)
dataset_format_details = ImageDatasetFormatDetails()
dataset_format_details.format_type = 'IMAGE'

initial_record_generation_configuration = InitialRecordGenerationConfiguration()
initial_record_generation_configuration.limit = 5000

label_set = LabelSet()
label_set.items = []

# Add all the labels used in dataset
for category in dataset['labelsSet']:
    label = Label()
    label.name = category['name']
    label_set.items.append(label)

    
# Request param to create dataset
create_dataset_details = CreateDatasetDetails(display_name=DATASET_NAME, description=DATASET_NAME,
                                              compartment_id=COMPARMENT_OCID,
                                              labeling_instructions='Labeling instructions',
                                              annotation_format=dataset['annotationFormat'],
                                              label_set=label_set,
                                              dataset_source_details=dataset_source_details,
                                              dataset_format_details=dataset_format_details,
                                              initial_record_generation_configuration=initial_record_generation_configuration)

# Service call to create (and wait) for the dataset
data_label_response = data_labeling_composite_ops.create_dataset_and_wait_for_state(create_dataset_details=create_dataset_details, wait_for_states=['SUCCEEDED', 'FAILED'])

dataset_ocid = data_label_response.data.resources[0].identifier

In [14]:
from oci.data_labeling_service_dataplane import DataLabelingClient
from oci.data_labeling_service_dataplane.models import CreateRecordDetails, RecordMetadata, CreateObjectStorageSourceDetails, \
    CreateAnnotationDetails, GenericEntity, Label, ImageObjectSelectionEntity, BoundingPolygon, NormalizedVertex

# Data labeling client
data_labeling_client = DataLabelingClient(config)

# Service call to get the current records
list_records = data_labeling_client.list_records(compartment_id=COMPARMENT_OCID, dataset_id=dataset_ocid, limit=1000)

dls_records = {}
for record_summary in list_records.data.items:
    dls_records[record_summary.name] = record_summary.id

# In case that the list of records request has more items
while list_records.has_next_page:
    list_records = data_labeling_client.list_records(compartment_id=COMPARMENT_OCID, dataset_id=dataset_ocid, limit=1000, page=list_records.next_page)
    for record_summary in list_records.data.items:
        dls_records[record_summary.name] = record_summary.id


for img_path in records.keys():
    # Request model to create record
    record = records[img_path]
    record_metadata = RecordMetadata()
    record_metadata.record_type = record_metadata.RECORD_TYPE_IMAGE_METADATA

    create_source_details = CreateObjectStorageSourceDetails()
    create_source_details.source_type = create_source_details.SOURCE_TYPE_OBJECT_STORAGE
    create_source_details.relative_path = img_path

    if img_path in dls_records:
        record_id = dls_records[img_path]
    else:
        create_record_details = CreateRecordDetails(compartment_id=COMPARMENT_OCID,
                                                    dataset_id=dataset_ocid,
                                                    name=img_path,
                                                    record_metadata=record_metadata,
                                                    source_details=create_source_details)

        # Service call to create record of dataset (just as fail-safe this should be done at dataset creation)
        record_info = data_labeling_client.create_record(create_record_details)
        record_id = record_info.data.id

    for annotation in record["annotations"]:
        for entity in annotation["entities"]:
            label_set = []
            label = Label()
            label.label = entity["labels"][0]["label_name"]
            label_set.append(label)

            nv = entity["boundingPolygon"]["normalizedVertices"]
            vertex = [NormalizedVertex(x=float(nv[0]['x']), y=float(nv[0]['y'])), NormalizedVertex(x=float(nv[1]['x']), y=float(nv[1]['y'])),
                     NormalizedVertex(x=float(nv[2]['x']), y=float(nv[2]['y'])), NormalizedVertex(x=float(nv[3]['x']), y=float(nv[3]['y']))]


            polygon = BoundingPolygon(normalized_vertices =vertex)

            image_entity = ImageObjectSelectionEntity(entity_type=ImageObjectSelectionEntity.ENTITY_TYPE_IMAGEOBJECTSELECTION,
                                                     labels=label_set, bounding_polygon=polygon)

            create_annotation_details = CreateAnnotationDetails(compartment_id=COMPARMENT_OCID,
                                                                record_id=record_id,
                                                                entities=[image_entity])

            data_labeling_client.create_annotation(create_annotation_details)