diff --git a/.gitignore b/.gitignore index 51f9834..dec17a7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .vs/PythonSettings.json .vs/VSWorkspaceState.json +*.pyc .idea/ sdk/diffgram/__pycache__/ diff --git a/sdk/diffgram/core/core.py b/sdk/diffgram/core/core.py index c769908..8e2980e 100644 --- a/sdk/diffgram/core/core.py +++ b/sdk/diffgram/core/core.py @@ -240,7 +240,6 @@ def set_default_directory(self, self.directory_id = self.default_directory['id'] self.directory_list = data["directory_list"] - self.session.headers.update( {'directory_id': str(self.directory_id)}) diff --git a/sdk/diffgram/core/diffgram_dataset_iterator.py b/sdk/diffgram/core/diffgram_dataset_iterator.py new file mode 100644 index 0000000..81fd7af --- /dev/null +++ b/sdk/diffgram/core/diffgram_dataset_iterator.py @@ -0,0 +1,137 @@ +from PIL import Image, ImageDraw +from imageio import imread +import numpy as np + +class DiffgramDatasetIterator: + + def __init__(self, project, diffgram_file_id_list): + """ + + :param project (sdk.core.core.Project): A Project object from the Diffgram SDK + :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram. + """ + self.diffgram_file_id_list = diffgram_file_id_list + + self.project = project + self._internal_file_list = [] + self.__validate_file_ids() + self.current_file_index = 0 + + def __iter__(self): + self.current_file_index = 0 + return self + + def __len__(self): + return len(self.diffgram_file_id_list) + + def __getitem__(self, idx): + diffgram_file = self.project.file.get_by_id(self.diffgram_file_id_list[idx], with_instances = True) + instance_data = self.get_file_instances(diffgram_file) + return instance_data + + def __next__(self): + file_id = self.diffgram_file_id_list[self.current_file_index] + diffgram_file = self.project.file.get_by_id(file_id, with_instances = True) + instance_data = self.get_file_instances(diffgram_file) + self.current_file_index += 1 + return instance_data + + def __validate_file_ids(self): + result = self.project.file.file_list_exists(self.diffgram_file_id_list) + if not result: + raise Exception( + 'Some file IDs do not belong to the project. Please provide only files from the same project.') + + def get_image_data(self, diffgram_file): + if hasattr(diffgram_file, 'image'): + image = imread(diffgram_file.image.get('url_signed')) + return image + else: + raise Exception('Pytorch datasets only support images. Please provide only file_ids from images') + + def get_file_instances(self, diffgram_file): + if diffgram_file.type not in ['image', 'frame']: + raise NotImplementedError('File type "{}" is not supported yet'.format(diffgram_file['type'])) + + image = self.get_image_data(diffgram_file) + instance_list = diffgram_file.instance_list + instance_types_in_file = set([x['type'] for x in instance_list]) + # Process the instances of each file + sample = {'image': image, 'diffgram_file': diffgram_file} + has_boxes = False + has_poly = False + if 'box' in instance_types_in_file: + has_boxes = True + x_min_list, x_max_list, y_min_list, y_max_list = self.extract_bbox_values(instance_list, diffgram_file) + sample['x_min_list'] = x_min_list + sample['x_max_list'] = x_max_list + sample['y_min_list'] = y_min_list + sample['y_max_list'] = y_max_list + + if 'polygon' in instance_types_in_file: + has_poly = True + mask_list = self.extract_masks_from_polygon(instance_list, diffgram_file) + sample['polygon_mask_list'] = mask_list + + if len(instance_types_in_file) > 2 and has_boxes and has_boxes: + raise NotImplementedError( + 'SDK only supports boxes and polygon types currently. If you want a new instance type to be supported please contact us!' + ) + + label_id_list, label_name_list = self.extract_labels(instance_list) + sample['label_id_list'] = label_id_list + sample['label_name_list'] = label_name_list + + return sample + + def extract_masks_from_polygon(self, instance_list, diffgram_file, empty_value = 0): + nx, ny = diffgram_file.image['width'], diffgram_file.image['height'] + mask_list = [] + for instance in instance_list: + if instance['type'] != 'polygon': + continue + poly = [(p['x'], p['y']) for p in instance['points']] + + img = Image.new(mode = 'L', size = (nx, ny), color = 0) # mode L = 8-bit pixels, black and white + draw = ImageDraw.Draw(img) + draw.polygon(poly, outline = 1, fill = 1) + mask = np.array(img).astype('float32') + # mask[np.where(mask == 0)] = empty_value + mask_list.append(mask) + return mask_list + + def extract_labels(self, instance_list, allowed_instance_types = None): + label_file_id_list = [] + label_names_list = [] + + for inst in instance_list: + if allowed_instance_types and inst['type'] in allowed_instance_types: + continue + + label_file_id_list.append(inst['label_file']['id']) + label_names_list.append(inst['label_file']['label']['name']) + + return label_file_id_list, label_names_list + + def extract_bbox_values(self, instance_list, diffgram_file): + """ + Creates a pytorch tensor based on the instance type. + For now we are assuming shapes here, but we can extend it + to accept custom shapes specified by the user. + :param instance: + :return: + """ + x_min_list = [] + x_max_list = [] + y_min_list = [] + y_max_list = [] + + for inst in instance_list: + if inst['type'] != 'box': + continue + x_min_list.append(inst['x_min'] / diffgram_file.image['width']) + x_max_list.append(inst['x_max'] / diffgram_file.image['width']) + y_min_list.append(inst['y_min'] / diffgram_file.image['width']) + y_max_list.append(inst['y_max'] / diffgram_file.image['width']) + + return x_min_list, x_max_list, y_min_list, y_max_list diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index a4df272..b178f1c 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -1,6 +1,9 @@ from diffgram.file.file import File from ..regular.regular import refresh_from_dict import logging +from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset +from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset +from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator def get_directory_list(self): @@ -71,14 +74,80 @@ def set_directory_by_name(self, name): str(names_attempted)) -class Directory(): +class Directory(DiffgramDatasetIterator): - def __init__(self, - client): + def __init__(self, client, file_id_list_sliced = None): self.client = client self.id = None + self.file_list_metadata = {} + if file_id_list_sliced is None: + self.file_id_list = self.all_file_ids() + else: + self.file_id_list = file_id_list_sliced + super(Directory, self).__init__(self.client, self.file_id_list) + + def all_files(self): + """ + Get all the files of the directoy. + Warning! This can be an expensive function and take a long time. + :return: + """ + page_num = 1 + result = [] + while page_num is not None: + diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base') + page_num = self.file_list_metadata['next_page'] + result = result + diffgram_files + return result + + def all_file_ids(self): + page_num = 1 + result = [] + while page_num is not None: + diffgram_ids = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only') + page_num = self.file_list_metadata['next_page'] + result = result + diffgram_ids + return result + + def slice(self, query): + from diffgram.core.sliced_directory import SlicedDirectory + # Get the first page to validate syntax. + self.list_files( + limit = 25, + page_num = 1, + file_view_mode = 'ids_only', + query = query, + ) + sliced_dataset = SlicedDirectory( + client = self.client, + query = query, + original_directory = self + ) + return sliced_dataset + + def to_pytorch(self, transform = None): + """ + Transforms the file list inside the dataset into a pytorch dataset. + :return: + """ + file_id_list = self.all_file_ids() + pytorch_dataset = DiffgramPytorchDataset( + project = self.client, + diffgram_file_id_list = file_id_list, + transform = transform + + ) + return pytorch_dataset + + def to_tensorflow(self): + file_id_list = self.all_file_ids() + diffgram_tensorflow_dataset = DiffgramTensorflowDataset( + project = self.client, + diffgram_file_id_list = file_id_list + ) + return diffgram_tensorflow_dataset def new(self, name: str): """ @@ -131,9 +200,12 @@ def new(self, name: str): def list_files( - self, - limit=None, - search_term: str =None): + self, + page_num=1, + limit=100, + search_term: str =None, + file_view_mode: str = 'annotation', + query: str = None): """ Get a list of files in directory (from Diffgram service). @@ -158,7 +230,6 @@ def list_files( else: logging.info("Using Default Dataset ID " + str(self.client.directory_id)) directory_id = self.client.directory_id - #print("directory_id", directory_id) metadata = {'metadata' : { @@ -167,10 +238,10 @@ def list_files( 'annotation_status': "All", 'limit': limit, 'media_type': "All", - 'request_next_page': False, - 'request_previous_page': False, - 'file_view_mode': "annotation", - 'search_term': search_term + 'page': page_num, + 'file_view_mode': file_view_mode, + 'search_term': search_term, + 'query': query } } @@ -190,17 +261,20 @@ def list_files( # Success data = response.json() file_list_json = data.get('file_list') - + self.file_list_metadata = data.get('metadata') # TODO would like this to perhaps be a seperate function # ie part of File_Constructor perhaps - file_list = [] - for file_json in file_list_json: - file = File.new( - client = self.client, - file_json = file_json) - file_list.append(file) - - return file_list + if file_view_mode == 'ids_only': + return file_list_json + else: + file_list = [] + for file_json in file_list_json: + file = File.new( + client = self.client, + file_json = file_json) + file_list.append(file) + + return file_list def get(self, diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py new file mode 100644 index 0000000..629f945 --- /dev/null +++ b/sdk/diffgram/core/sliced_directory.py @@ -0,0 +1,49 @@ +from diffgram.core.directory import Directory +from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset +from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset + + +class SlicedDirectory(Directory): + + def __init__(self, client, original_directory: Directory, query: str): + self.original_directory = original_directory + self.query = query + self.client = client + # Share the same ID from the original directory as this is just an in-memory construct for better semantics. + self.id = original_directory.id + self.file_id_list = self.all_file_ids() + super(Directory, self).__init__(self.client, self.file_id_list) + + def all_file_ids(self): + page_num = 1 + result = [] + while page_num is not None: + diffgram_files = self.list_files(limit = 1000, + page_num = page_num, + file_view_mode = 'ids_only', + query = self.query) + page_num = self.file_list_metadata['next_page'] + result = result + diffgram_files + return result + + def to_pytorch(self, transform = None): + """ + Transforms the file list inside the dataset into a pytorch dataset. + :return: + """ + + pytorch_dataset = DiffgramPytorchDataset( + project = self.client, + diffgram_file_id_list = self.file_id_list, + transform = transform + + ) + return pytorch_dataset + + def to_tensorflow(self): + file_id_list = self.all_file_ids() + diffgram_tensorflow_dataset = DiffgramTensorflowDataset( + project = self.client, + diffgram_file_id_list = file_id_list + ) + return diffgram_tensorflow_dataset diff --git a/sdk/diffgram/file/file.py b/sdk/diffgram/file/file.py index ea32305..4fedf11 100644 --- a/sdk/diffgram/file/file.py +++ b/sdk/diffgram/file/file.py @@ -1,6 +1,5 @@ from ..regular.regular import refresh_from_dict - class File(): """ file literal object @@ -11,11 +10,12 @@ class File(): def __init__( self, - id=None, - client=None): + id = None, + client = None): self.id = id self.client = client + @staticmethod def new( client, file_json): @@ -62,7 +62,8 @@ def update( packet['instance_list'] = instance_list # Current default server side is to not overwrite - # packet['overwrite'] = overwrite + if overwrite: + packet['mode'] = "update_with_existing" self.client.file.from_packet(packet=packet) diff --git a/sdk/diffgram/file/file_constructor.py b/sdk/diffgram/file/file_constructor.py index b950656..2a14f00 100644 --- a/sdk/diffgram/file/file_constructor.py +++ b/sdk/diffgram/file/file_constructor.py @@ -7,436 +7,458 @@ class FileConstructor(): - """ + """ - Construct files and communicate with client + Construct files and communicate with client - Caution class needs client in order to do effective communication - with server + Caution class needs client in order to do effective communication + with server - """ - - def __init__(self, client): - - self.client = client - - - def file_from_response( - self, - file_dict): - """ - file_dict, dict, file information from Project - - returns file, class File object - """ - - file = File(client=self.client) - refresh_from_dict(file, file_dict) - - return file - - - - def from_local( - self, - path: str, - instance_list: list = None, - frame_packet_map: dict = None, - assume_new_instances_machine_made: bool = True, - convert_names_to_label_files: bool = True - ): - """ - Create a Project file from local path - - path, string, file path - - returns file, class File object - """ - - files = {'file': (os.path.basename(path), open(path, 'rb'), 'application/octet-stream')} + """ - headers = { - 'immediate_mode' : 'True', - } + def __init__(self, client): - payload = {} + self.client = client - if instance_list: - payload['instance_list'] = self.__validate_and_format_instance_list( - instance_list = instance_list, - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) - - if frame_packet_map: - payload['frame_packet_map'] = self.__validate_and_format_frame_packet_map( - frame_packet_map = frame_packet_map, - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) - - files['json'] = (None, json.dumps(payload), 'application/json') + def file_from_response( + self, + file_dict): + """ + file_dict, dict, file information from Project - endpoint = "/api/walrus/v1/project/" + self.client.project_string_id \ - + "/input/from_local" + returns file, class File object + """ - response = self.client.session.post( - self.client.host + endpoint, - files = files, - headers = headers) + file = File(client = self.client) + refresh_from_dict(file, file_dict) - self.client.handle_errors(response) - - data = response.json() + return file - #print(data) + def from_local( + self, + path: str, + instance_list: list = None, + frame_packet_map: dict = None, + assume_new_instances_machine_made: bool = True, + convert_names_to_label_files: bool = True + ): + """ + Create a Project file from local path - if data["log"]["success"] is True: - file = self.file_from_response(file_dict = data['file']) - return file - - - - def from_url( - self, - url: str, - media_type: str = "image", - job: Job = None, - job_id: int = None, - video_split_duration: int = None, - instance_list: list = None, # for Images - frame_packet_map: dict = None # for Video - ): - """ - - {'frame_packet_map' : { - 0 : instance_list, # Where the key is the integer of the frame of the video, 0 indexed. - 6 : instance_list, - 9 : instance_list - }, - - instance_example - { 'type': 'box', # options ['tag', 'box', 'polygon'] - label_file_id:, Integer # Project label_file id. - accessible through diffgram.get_label_file_dict() See sample - 'x_max': 128, Integer - 'x_min': 1, - 'y_min': 1, - 'y_max': 128, - 'points': [] # Required for polygon more on this coming soon - 'number': 0 # A number is optional, and only relates to video instances - } - - - """ - - packet = {'media' : {}} - packet['media']['url'] = url - packet['media']['type'] = media_type - - # Existing Instances - packet['frame_packet_map'] = frame_packet_map - packet['instance_list'] = instance_list - - if job: - packet["job_id"] = job.id - else: - packet["job_id"] = job_id - - if video_split_duration: - packet["video_split_duration"] = video_split_duration - - self.from_packet(packet = packet) - - return True - - - - def format_packet(): - raise NotImplementedError - - - @staticmethod - def __media_packet_sanity_checks(packet) -> None: - """ - Relevant to new media, ie not existing media - """ - - if type(packet) != dict: - raise Exception("packet is not a dict") - - if "media" not in packet: - raise Exception(" 'media' key is not defined in packet.") - - if "url" not in packet["media"]: - raise Exception(" 'url' key is not defined in packet['media'] .") - - media_type = packet["media"].get("type", None) - if not media_type: - raise Exception(" 'type' key is not defined in packet['media'] use one of ['image', 'video']") - - - def __validate_existing_instances(): - pass - - def from_packet( - self, - packet, - job=None, - convert_names_to_label_files=True, - assume_new_instances_machine_made=True - ): - """ - Import single packet of data of the form: - - image_packet_example - {'instance_list' : - [instance_alpha, # Array of instance dicts as defined below - instance_bravo, - ... n instances], - 'media' : { - 'url' : "https://something", - 'type' : 'image' # ['image', 'video'] - } - } - - video_packet_example - {'frame_packet_map' : { - 0 : instance_list, - # Where the key is the integer of the frame of the video, 0 indexed. - 6 : instance_list, - 9 : instance_list - }, - 'media' : { - 'url' : "https://something", - 'type' : 'video' - } - } - - instance_example - { 'type': 'box', # options ['tag', 'box', 'polygon'] - label_file_id:, Integer # Project label_file id. - accessible through diffgram.get_label_file_dict() See sample - 'x_max': 128, Integer - 'x_min': 1, - 'y_min': 1, - 'y_max': 128, - 'points': [] # Required for polygon more on this coming soon - 'number': 0 # A number is optional, and only relates to video instances - } - - - Validates basics of packet form - and makes request to /input/packet endpoint. - - """ - file_id = packet.get('file_id') - if not file_id: - FileConstructor.__media_packet_sanity_checks(packet = packet) - - instance = None - - if packet.get("instance_list"): - packet['instance_list'] = self.__validate_and_format_instance_list( - instance_list = packet.get('instance_list'), - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) - - if packet.get("frame_packet_map"): - packet['frame_packet_map'] = self.__validate_and_format_frame_packet_map( - frame_packet_map = packet['frame_packet_map'], - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) - - # Test one of the instances - # QUESTION Should we be testing all? User option maybe? - # (Otherwise invalid ones get discarded when it hits API) - - # TODO due to changes, this no longer tests anything , choose new way to sample - # instance list / packets here. - - if instance: - instance_type = instance.get("type", None) - if not instance_type: - raise Exception(" type is not defined in the first instance \ - of instance_list. Options are 'tag', 'box', 'polygon'.") + path, string, file path - if instance_type not in ['tag', 'box', 'polygon']: - raise Exception(" invalid instance type. Options are 'tag', 'box', 'polygon'.") + returns file, class File object + """ - if "label_file_id" not in instance: - raise Exception(" label_file_id is not defined in the first instance \ - of instance_list. ") + files = {'file': (os.path.basename(path), open(path, 'rb'), 'application/octet-stream')} + + headers = { + 'immediate_mode': 'True', + } + + payload = {} + + if instance_list: + payload['instance_list'] = self.__validate_and_format_instance_list( + instance_list = instance_list, + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) + + if frame_packet_map: + payload['frame_packet_map'] = self.__validate_and_format_frame_packet_map( + frame_packet_map = frame_packet_map, + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) + files['json'] = (None, json.dumps(payload), 'application/json') - if job: - packet["job_id"] = job.id - packet["mode"] = "attach_to_job" + endpoint = "/api/walrus/v1/project/" + self.client.project_string_id \ + + "/input/from_local" + response = self.client.session.post( + self.client.host + endpoint, + files = files, + headers = headers) + self.client.handle_errors(response) - endpoint = "/api/walrus/v1/project/" + \ - self.client.project_string_id + "/input/packet" + data = response.json() - response = self.client.session.post( - self.client.host + endpoint, - json = packet) + # print(data) + + if data["log"]["success"] is True: + file = self.file_from_response(file_dict = data['file']) + return file + + def from_url( + self, + url: str, + media_type: str = "image", + job: Job = None, + job_id: int = None, + video_split_duration: int = None, + instance_list: list = None, # for Images + frame_packet_map: dict = None # for Video + ): + """ + + {'frame_packet_map' : { + 0 : instance_list, # Where the key is the integer of the frame of the video, 0 indexed. + 6 : instance_list, + 9 : instance_list + }, + + instance_example + { 'type': 'box', # options ['tag', 'box', 'polygon'] + label_file_id:, Integer # Project label_file id. + accessible through diffgram.get_label_file_dict() See sample + 'x_max': 128, Integer + 'x_min': 1, + 'y_min': 1, + 'y_max': 128, + 'points': [] # Required for polygon more on this coming soon + 'number': 0 # A number is optional, and only relates to video instances + } + + + """ + + packet = {'media': {}} + packet['media']['url'] = url + packet['media']['type'] = media_type + + # Existing Instances + packet['frame_packet_map'] = frame_packet_map + packet['instance_list'] = instance_list + + if job: + packet["job_id"] = job.id + else: + packet["job_id"] = job_id + + if video_split_duration: + packet["video_split_duration"] = video_split_duration + + self.from_packet(packet = packet) + + return True + + def format_packet(): + raise NotImplementedError + + @staticmethod + def __media_packet_sanity_checks(packet) -> None: + """ + Relevant to new media, ie not existing media + """ + + if type(packet) != dict: + raise Exception("packet is not a dict") + + if "media" not in packet: + raise Exception(" 'media' key is not defined in packet.") + + if "url" not in packet["media"]: + raise Exception(" 'url' key is not defined in packet['media'] .") + + media_type = packet["media"].get("type", None) + if not media_type: + raise Exception(" 'type' key is not defined in packet['media'] use one of ['image', 'video']") + + def __validate_existing_instances(): + pass + + def from_packet( + self, + packet, + job = None, + convert_names_to_label_files = True, + assume_new_instances_machine_made = True + ): + """ + Import single packet of data of the form: + + image_packet_example + {'instance_list' : + [instance_alpha, # Array of instance dicts as defined below + instance_bravo, + ... n instances], + 'media' : { + 'url' : "https://something", + 'type' : 'image' # ['image', 'video'] + } + } + + video_packet_example + {'frame_packet_map' : { + 0 : instance_list, + # Where the key is the integer of the frame of the video, 0 indexed. + 6 : instance_list, + 9 : instance_list + }, + 'media' : { + 'url' : "https://something", + 'type' : 'video' + } + } + + instance_example + { 'type': 'box', # options ['tag', 'box', 'polygon'] + label_file_id:, Integer # Project label_file id. + accessible through diffgram.get_label_file_dict() See sample + 'x_max': 128, Integer + 'x_min': 1, + 'y_min': 1, + 'y_max': 128, + 'points': [] # Required for polygon more on this coming soon + 'number': 0 # A number is optional, and only relates to video instances + } + + + Validates basics of packet form + and makes request to /input/packet endpoint. + + """ + file_id = packet.get('file_id') + if not file_id: + FileConstructor.__media_packet_sanity_checks(packet = packet) + + instance = None + + if packet.get("instance_list"): + packet['instance_list'] = self.__validate_and_format_instance_list( + instance_list = packet.get('instance_list'), + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) + + if packet.get("frame_packet_map"): + packet['frame_packet_map'] = self.__validate_and_format_frame_packet_map( + frame_packet_map = packet['frame_packet_map'], + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) + + # Test one of the instances + # QUESTION Should we be testing all? User option maybe? + # (Otherwise invalid ones get discarded when it hits API) + + # TODO due to changes, this no longer tests anything , choose new way to sample + # instance list / packets here. + + if instance: + instance_type = instance.get("type", None) + if not instance_type: + raise Exception(" type is not defined in the first instance \ + of instance_list. Options are 'tag', 'box', 'polygon'.") + + if instance_type not in ['tag', 'box', 'polygon']: + raise Exception(" invalid instance type. Options are 'tag', 'box', 'polygon'.") + + if "label_file_id" not in instance: + raise Exception(" label_file_id is not defined in the first instance \ + of instance_list. ") - self.client.handle_errors(response) - - data = response.json() + if job: + packet["job_id"] = job.id + packet["mode"] = "attach_to_job" - # TODO better handling input vs file + endpoint = "/api/walrus/v1/project/" + \ + self.client.project_string_id + "/input/packet" - if data["log"]["success"] is True: - - return True + response = self.client.session.post( + self.client.host + endpoint, + json = packet) - # TODO return file data here if in immediate mode - # else return input class? / handle this properly - #file = self.file_from_response(file_dict = data['file']) - #return file + self.client.handle_errors(response) + data = response.json() - def __validate_and_format_frame_packet_map( - self, - frame_packet_map: dict, - assume_new_instances_machine_made: bool = True, - convert_names_to_label_files: bool = True): - """ - Warning: Mutates packet map - """ + # TODO better handling input vs file - if type(frame_packet_map) != dict: - raise Exception("frame_packet_map is not a dict") + if data["log"]["success"] is True: + return True - for frame, instance_list in frame_packet_map.items(): - - if type(frame) != int: - raise Exception("frame is not a integer. The key should be the integer frame number.") + # TODO return file data here if in immediate mode + # else return input class? / handle this properly - if type(instance_list) != list: - raise Exception("instance_list is not a list. The value of the frame should be a list of instance dicts.") + # file = self.file_from_response(file_dict = data['file']) + # return file - frame_packet_map[frame] = self.__validate_and_format_instance_list( - instance_list = instance_list, - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) + def __validate_and_format_frame_packet_map( + self, + frame_packet_map: dict, + assume_new_instances_machine_made: bool = True, + convert_names_to_label_files: bool = True): + """ + Warning: Mutates packet map + """ - return frame_packet_map + if type(frame_packet_map) != dict: + raise Exception("frame_packet_map is not a dict") + for frame, instance_list in frame_packet_map.items(): - def __validate_and_format_instance_list( - self, - instance_list: list, - assume_new_instances_machine_made: bool, - convert_names_to_label_files: bool): + if type(frame) != int: + raise Exception("frame is not a integer. The key should be the integer frame number.") + if type(instance_list) != list: + raise Exception( + "instance_list is not a list. The value of the frame should be a list of instance dicts.") - FileConstructor.sanity_check_instance_list(instance_list) + frame_packet_map[frame] = self.__validate_and_format_instance_list( + instance_list = instance_list, + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) - instance_list = FileConstructor.format_assumptions( - instance_list = instance_list, - assume_new_instances_machine_made = assume_new_instances_machine_made) + return frame_packet_map - if convert_names_to_label_files is True: - instance_list = self.instance_list_label_strings_to_ids( - instance_list = instance_list - ) + def __validate_and_format_instance_list( + self, + instance_list: list, + assume_new_instances_machine_made: bool, + convert_names_to_label_files: bool): - return instance_list + FileConstructor.sanity_check_instance_list(instance_list) + instance_list = FileConstructor.format_assumptions( + instance_list = instance_list, + assume_new_instances_machine_made = assume_new_instances_machine_made) - def instance_list_label_strings_to_ids(self, instance_list: list): + if convert_names_to_label_files is True: + instance_list = self.instance_list_label_strings_to_ids( + instance_list = instance_list + ) - # Convert "name" label (ie == "cat") to Project label_file id - for index, instance in enumerate(instance_list): - - instance = convert_label(self, instance) - instance_list[index] = instance + return instance_list - return instance_list + def instance_list_label_strings_to_ids(self, instance_list: list): - @staticmethod - def __check_for_duplicates_on_instance_list(instance_list): - id_list = [] - duplicates = [] - for elm in instance_list: - if elm.get('id'): - if elm.get('id') not in id_list: - id_list.append(elm.get('id')) - else: - duplicates.append(elm.get('id')) - if len(duplicates) > 0: - raise Exception('Instance list must not have duplicate IDs. \n Duplicate IDs are: {}'.format(str(duplicates))) + # Convert "name" label (ie == "cat") to Project label_file id + for index, instance in enumerate(instance_list): + instance = convert_label(self, instance) + instance_list[index] = instance - @staticmethod - def sanity_check_instance_list(instance_list: list): + return instance_list - if type(instance_list) != list: - raise Exception("instance_list is not array like") + @staticmethod + def __check_for_duplicates_on_instance_list(instance_list): + id_list = [] + duplicates = [] + for elm in instance_list: + if elm.get('id'): + if elm.get('id') not in id_list: + id_list.append(elm.get('id')) + else: + duplicates.append(elm.get('id')) + if len(duplicates) > 0: + raise Exception( + 'Instance list must not have duplicate IDs. \n Duplicate IDs are: {}'.format(str(duplicates))) - if len(instance_list) == 0: - raise Warning("'instance_list' is empty") + @staticmethod + def sanity_check_instance_list(instance_list: list): - FileConstructor.__check_for_duplicates_on_instance_list(instance_list) + if type(instance_list) != list: + raise Exception("instance_list is not array like") - return + if len(instance_list) == 0: + raise Warning("'instance_list' is empty") + FileConstructor.__check_for_duplicates_on_instance_list(instance_list) - @staticmethod - def format_assumptions( - instance_list: list, - assume_new_instances_machine_made: bool): + return - if assume_new_instances_machine_made is True: - for i in range(len(instance_list)): - instance_list[i]['machine_made'] = True + @staticmethod + def format_assumptions( + instance_list: list, + assume_new_instances_machine_made: bool): - return instance_list + if assume_new_instances_machine_made is True: + for i in range(len(instance_list)): + instance_list[i]['machine_made'] = True + return instance_list + + def import_bulk(self): + """ + Import multiple packets + FUTURE + Accept a dict of packets + Each packet is defined as + { packet_id : { packet }} + + """ + raise NotImplementedError + def get_file_list(self, id_list: list, with_instances: bool = False): + """ + returns Diffgram File object + """ - def import_bulk(): - """ - Import multiple packets - FUTURE - Accept a dict of packets - Each packet is defined as - { packet_id : { packet }} + raise NotImplementedError - """ - pass + def file_list_exists(self, id_list): + """ + Verifies that the given ID list exists inside the project. + :param id_list: + :return: Boolean + """ + url = '/api/v1/project/{}/file/exists'.format( + self.client.project_string_id + ) + spec_dict = { + 'file_id_list': id_list + } + response = self.client.session.post( + self.client.host + url, + json = spec_dict) + self.client.handle_errors(response) + + response_json = response.json() - def get_by_id(self, - id: int): - """ - returns Diffgram File object - """ - - endpoint = "/api/v1/file/view" + if response_json.get('result'): + return response_json.get('result').get('exists') - spec_dict = { - 'file_id': id, - 'project_string_id': self.client.project_string_id - } - response = self.client.session.post( - self.client.host + endpoint, - json = spec_dict) - - self.client.handle_errors(response) - response_json = response.json() + def get_by_id(self, + id: int, + with_instances: bool = False): + """ + returns Diffgram File object + """ + + if not with_instances: + endpoint = "/api/v1/file/view" - return File.new( - client = self.client, - file_json = response_json.get('file')) + spec_dict = { + 'file_id': id, + 'project_string_id': self.client.project_string_id, + } + + file_response_key = 'file' + + else: + endpoint = "/api/project/{}/file/{}/annotation/list".format(self.client.project_string_id, id) + spec_dict = { + 'directory_id': self.client.directory_id + } + file_response_key = 'file_serialized' + + response = self.client.session.post( + self.client.host + endpoint, + json = spec_dict) + self.client.handle_errors(response) + response_json = response.json() + file_data = response_json.get(file_response_key) + return File.new( + client = self.client, + file_json = file_data) diff --git a/sdk/diffgram/pytorch_diffgram/__init__.py b/sdk/diffgram/pytorch_diffgram/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py new file mode 100644 index 0000000..4239f51 --- /dev/null +++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py @@ -0,0 +1,47 @@ +from torch.utils.data import Dataset, DataLoader +import torch as torch # type: ignore +from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator + + +class DiffgramPytorchDataset(DiffgramDatasetIterator, Dataset): + + def __init__(self, project, diffgram_file_id_list = None, transform = None): + """ + + :param project (sdk.core.core.Project): A Project object from the Diffgram SDK + :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram. + :param transform (callable, optional): Optional transforms to be applied on a sample + """ + super(DiffgramPytorchDataset, self).__init__(project, diffgram_file_id_list) + + self.diffgram_file_id_list = diffgram_file_id_list + + self.project = project + self.transform = transform + + def __len__(self): + return len(self.diffgram_file_id_list) + + def __get_next_page_of_data(self): + raise NotImplementedError + + def __getitem__(self, idx): + if torch.is_tensor(idx): + idx = idx.tolist() + + diffgram_file = self.project.file.get_by_id(self.diffgram_file_id_list[idx], with_instances = True) + + sample = self.get_file_instances(diffgram_file) + if 'x_min_list' in sample: + sample['x_min_list'] = torch.Tensor(sample['x_min_list']) + if 'x_max_list' in sample: + sample['x_max_list'] = torch.Tensor(sample['x_max_list']) + if 'y_min_list' in sample: + sample['y_min_list'] = torch.Tensor(sample['y_min_list']) + if 'y_max_list' in sample: + sample['y_max_list'] = torch.Tensor(sample['y_max_list']) + + if self.transform: + sample = self.transform(sample) + + return sample diff --git a/sdk/diffgram/tensorflow_diffgram/__init__.py b/sdk/diffgram/tensorflow_diffgram/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py new file mode 100644 index 0000000..7c9bf01 --- /dev/null +++ b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py @@ -0,0 +1,86 @@ +from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator +import os + +try: + import tensorflow as tf # type: ignore +except ModuleNotFoundError: + raise ModuleNotFoundError( + "'tensorflow' module should be installed to convert the Dataset into tensorflow format" + ) + + +class DiffgramTensorflowDataset(DiffgramDatasetIterator): + + def __init__(self, project, diffgram_file_id_list): + """ + + :param project (sdk.core.core.Project): A Project object from the Diffgram SDK + :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram. + :param transform (callable, optional): Optional transforms to be applied on a sample + """ + super(DiffgramTensorflowDataset, self).__init__(project, diffgram_file_id_list) + + self.diffgram_file_id_list = diffgram_file_id_list + + self.project = project + self.__validate_file_ids() + + def int64_feature(self, value): + return tf.train.Feature(int64_list = tf.train.Int64List(value = [value])) + + def int64_list_feature(self, value): + return tf.train.Feature(int64_list = tf.train.Int64List(value = value)) + + def bytes_feature(self, value): + return tf.train.Feature(bytes_list = tf.train.BytesList(value = [value])) + + def bytes_list_feature(self, value): + return tf.train.Feature(bytes_list = tf.train.BytesList(value = value)) + + def float_feature(self, value): + return tf.train.Feature(float_list = tf.train.FloatList(value = [value])) + + def float_list_feature(self, value): + return tf.train.Feature(float_list = tf.train.FloatList(value = value)) + + def __validate_file_ids(self): + result = self.project.file.file_list_exists(self.diffgram_file_id_list) + if not result: + raise Exception( + 'Some file IDs do not belong to the project. Please provide only files from the same project.') + + def __getitem__(self, idx): + tf_example = self.get_tf_train_example(idx) + return tf_example + + def get_tf_train_example(self, idx): + file_id = self.diffgram_file_id_list[idx] + diffgram_file = self.project.file.get_by_id(file_id, with_instances = True) + image = self.get_image_data(diffgram_file) + instance_data = self.get_file_instances(diffgram_file) + filename, file_extension = os.path.splitext(instance_data['diffgram_file'].image['original_filename']) + label_names_bytes = [x.encode() for x in instance_data['label_name_list']] + tf_example_dict = { + 'image/height': self.int64_feature(instance_data['diffgram_file'].image['height']), + 'image/width': self.int64_feature(instance_data['diffgram_file'].image['width']), + 'image/filename': self.bytes_feature(filename.encode()), + 'image/source_id': self.bytes_feature(filename.encode()), + 'image/encoded': self.bytes_feature(image.tobytes()), + 'image/format': self.bytes_feature(file_extension.encode()), + 'image/object/bbox/xmin': self.float_list_feature(instance_data['x_min_list']), + 'image/object/bbox/xmax': self.float_list_feature(instance_data['x_max_list']), + 'image/object/bbox/ymin': self.float_list_feature(instance_data['y_min_list']), + 'image/object/bbox/ymax': self.float_list_feature(instance_data['y_max_list']), + 'image/object/class/text': self.bytes_list_feature(label_names_bytes), + 'image/object/class/label': self.int64_list_feature(instance_data['label_id_list']), + } + tf_example = tf.train.Example(features = tf.train.Features(feature = tf_example_dict)) + return tf_example + + def __next__(self): + tf_example = self.get_tf_train_example(self.current_file_index) + self.current_file_index += 1 + return tf_example + + # def get_dataset_obj(self): + # return tf.data.Dataset.from_generator(self.get_next_elm, output_signature = tf.TensorSpec(shape = (1,))) diff --git a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py new file mode 100644 index 0000000..ab5987b --- /dev/null +++ b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py @@ -0,0 +1,45 @@ +import diffgram +from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset + +project = diffgram.Project(project_string_id = "voc-test", + client_id = "LIVE__p0blrrm6p5fnan5sh8ec", + client_secret = "d14sl5vtg672ms8rg97yp1vc9do1ao3ee2xlzktk29kbk49t8mklpt7bvnmh", + debug = True) + +file = project.file.get_by_id(1554, with_instances = True) + +diffgram_dataset = DiffgramPytorchDataset( + project = project, + diffgram_file_id_list = [1554] +) + + + + + +# Draw +def display_masks(): + import matplotlib.pyplot as plt + from PIL import Image, ImageDraw + img = Image.new("L", [diffgram_dataset[0]['diffgram_file'].image['width'], + diffgram_dataset[0]['diffgram_file'].image['height']], 0) + mask1 = diffgram_dataset[0]['polygon_mask_list'][0] + mask2 = diffgram_dataset[0]['polygon_mask_list'][1] + plt.figure() + plt.subplot(1, 2, 1) + # plt.imshow(img, 'gray', interpolation='none') + plt.imshow(mask1, 'jet', interpolation = 'none', alpha = 0.7) + plt.imshow(mask2, 'Oranges', interpolation = 'none', alpha = 0.7) + plt.show() + + +# Dataset Example + +dataset = project.directory.get('Default') + +pytorch_dataset = dataset.to_pytorch() +tf_dataset = dataset.to_tensorflow() + + +sliced_dataset = dataset.slice(query = 'labels.sheep > 0 or labels.sofa > 0') + diff --git a/sdk/requirements.txt b/sdk/requirements.txt index 05e9482..02704a9 100644 --- a/sdk/requirements.txt +++ b/sdk/requirements.txt @@ -3,4 +3,6 @@ opencv-python>=4.0.0.21 scipy>=1.1.0 six>=1.9.0 tensorflow>=1.12.0 -pillow \ No newline at end of file +pillow +torch +imageio \ No newline at end of file diff --git a/sdk/tests/__init__.py b/sdk/tests/__init__.py new file mode 100644 index 0000000..e69de29