From 3b23f807df252d21763ca303569a698bf606888a Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 29 Jun 2021 14:55:30 -0600 Subject: [PATCH 01/17] feat: add pagination and file_view mode To allow fetching of more than 1000 flles for cases where all files are needed. Also added file_view mode in case we need lighter versions of the serialized files (ie no annotations) --- .gitignore | 1 + sdk/diffgram/core/directory.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 45d4970..3b05b95 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ sdk/diffgram/regular/__pycache__/ sdk/diffgram/task/__pycache__/ sdk/diffgram/utils/__pycache__/ +venv/* \ No newline at end of file diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index 115f3b3..df08d24 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -76,6 +76,7 @@ def __init__(self, self.client = client self.id = None + self.file_list_metadata = {} def new(self, name: str): @@ -129,9 +130,11 @@ def new(self, name: str): def list_files( - self, + self, + page_num=1, limit=None, - search_term: str =None): + search_term: str =None, + file_view_mode: str = 'annotation'): """ Get a list of files in directory (from Diffgram service). @@ -165,9 +168,10 @@ def list_files( 'annotation_status': "All", 'limit': limit, 'media_type': "All", + 'page': page_num, 'request_next_page': False, 'request_previous_page': False, - 'file_view_mode': "annotation", + 'file_view_mode': file_view_mode, 'search_term': search_term } } @@ -188,7 +192,7 @@ def list_files( # Success data = response.json() file_list_json = data.get('file_list') - + self.file_list_metadata = data.get('metadata') # TODO would like this to perhaps be a seperate function # ie part of File_Constructor perhaps file_list = [] From e85f04c833fca73ed9b1e8bb02255d455f6d253c Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 29 Jun 2021 15:29:27 -0600 Subject: [PATCH 02/17] wip: remove request_next_page params --- sdk/diffgram/core/directory.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index df08d24..8eb1e4a 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -132,7 +132,7 @@ def new(self, name: str): def list_files( self, page_num=1, - limit=None, + limit=100, search_term: str =None, file_view_mode: str = 'annotation'): """ @@ -169,8 +169,6 @@ def list_files( 'limit': limit, 'media_type': "All", 'page': page_num, - 'request_next_page': False, - 'request_previous_page': False, 'file_view_mode': file_view_mode, 'search_term': search_term } From b9e032c0db162125966e5d0f2257fec1a4d9ceae Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 30 Jun 2021 14:32:31 -0600 Subject: [PATCH 03/17] feat: initial support to export to pytorch Gives users the ability to export any dataset into a pytorch dataset. Pending support for other instance types different from boxes and video support. --- sdk/add_file_id_to_json.py | 46 +++++++++++++++ sdk/diffgram/core/directory.py | 30 +++++++++- sdk/diffgram/file/file.py | 6 +- sdk/diffgram/file/file_constructor.py | 29 ++++++--- sdk/diffgram/pytorch_diffgram/__init__.py | 0 .../diffgram_pytorch_dataset.py | 59 +++++++++++++++++++ sdk/requirements.txt | 4 +- 7 files changed, 161 insertions(+), 13 deletions(-) create mode 100644 sdk/add_file_id_to_json.py create mode 100644 sdk/diffgram/pytorch_diffgram/__init__.py create mode 100644 sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py diff --git a/sdk/add_file_id_to_json.py b/sdk/add_file_id_to_json.py new file mode 100644 index 0000000..bfcbfbf --- /dev/null +++ b/sdk/add_file_id_to_json.py @@ -0,0 +1,46 @@ +from diffgram.core.core import Project +import json + +project = Project(project_string_id = "coco-dataset", + debug = True, + client_id = "LIVE__rj6whqkwxkups7oczqis", + client_secret = "fr5vy64v2096qad9av0dgw3fr0kjavt4c156soiwx51ntyv9qswpuxkhg0lf") + + +def find_file(file_list, name): + for f in file_list: + if f.original_filename == name: + return f + return None + + +with open('/home/pablo/Downloads/coco2017.json') as json_file: + data = json.load(json_file) + + dataset_default = project.directory.get(name = "Default") + + page_num = 1 + all_files = [] + print('start') + while page_num != None: + print('Current page', page_num) + diffgram_files = dataset_default.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base') + page_num = dataset_default.file_list_metadata['next_page'] + print('{} of {}'.format(page_num, dataset_default.file_list_metadata['total_pages'])) + all_files = all_files + diffgram_files + + print('') + print('Files fetched: ', len(all_files)) + result = [] + for elm in data: + file = find_file(all_files, name = elm['image_name']) + if file: + print('Adding file ID {} to {}'.format(file.id, elm['image_name'])) + elm['file_id'] = file.id + result.append(elm) + else: + print(elm['image_name'], 'not found.') + + s = json.dumps(result). + f = open('/home/pablo/Downloads/coco2017_with_ids.json', 'w') + f.write(s) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index 8eb1e4a..fc247fd 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -1,7 +1,7 @@ from diffgram.file.file import File from ..regular.regular import refresh_from_dict import logging - +from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset def get_directory_list(self): """ @@ -78,6 +78,34 @@ def __init__(self, self.id = None self.file_list_metadata = {} + def all_files(self): + """ + Get all the files of the directoy. + Warning! This can be an expensive function and take a long time. + :return: + """ + page_num = 1 + result = [] + while page_num is not None: + diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base') + page_num = self.file_list_metadata['next_page'] + result = result + diffgram_files + return result + + def to_pytorch(self, transform = None): + """ + Transforms the file list inside the dataset into a pytorch dataset. + :return: + """ + dataset_files = self.all_files() + file_id_list = [file.id for file in dataset_files] + pytorch_dataset = DiffgramPytorchDataset( + project = self.client, + diffgram_file_id_list = file_id_list, + transform = transform + + ) + return pytorch_dataset def new(self, name: str): """ diff --git a/sdk/diffgram/file/file.py b/sdk/diffgram/file/file.py index ea32305..6df65b4 100644 --- a/sdk/diffgram/file/file.py +++ b/sdk/diffgram/file/file.py @@ -1,6 +1,5 @@ from ..regular.regular import refresh_from_dict - class File(): """ file literal object @@ -11,11 +10,12 @@ class File(): def __init__( self, - id=None, - client=None): + id = None, + client = None): self.id = id self.client = client + @staticmethod def new( client, file_json): diff --git a/sdk/diffgram/file/file_constructor.py b/sdk/diffgram/file/file_constructor.py index b950656..3b1b526 100644 --- a/sdk/diffgram/file/file_constructor.py +++ b/sdk/diffgram/file/file_constructor.py @@ -414,29 +414,42 @@ def import_bulk(): def get_by_id(self, - id: int): + id: int, + with_instances: bool = False): """ returns Diffgram File object """ - - endpoint = "/api/v1/file/view" - spec_dict = { - 'file_id': id, - 'project_string_id': self.client.project_string_id + if not with_instances: + endpoint = "/api/v1/file/view" + + spec_dict = { + 'file_id': id, + 'project_string_id': self.client.project_string_id, + } + + + file_response_key = 'file' + + else: + endpoint = "/api/project/{}/file/{}/annotation/list".format(self.client.project_string_id, id) + spec_dict = { + 'directory_id': self.client.directory_id } + file_response_key = 'file_serialized' response = self.client.session.post( self.client.host + endpoint, json = spec_dict) - + self.client.handle_errors(response) response_json = response.json() + file_data = response_json.get(file_response_key) return File.new( client = self.client, - file_json = response_json.get('file')) + file_json = file_data) diff --git a/sdk/diffgram/pytorch_diffgram/__init__.py b/sdk/diffgram/pytorch_diffgram/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py new file mode 100644 index 0000000..716b97a --- /dev/null +++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py @@ -0,0 +1,59 @@ +from torch.utils.data import Dataset, DataLoader +import torch +import os +from imageio import imread +import numpy as np + + +class DiffgramPytorchDataset(Dataset): + + def __init__(self, project, diffgram_file_id_list, transform = None): + """ + + :param project (sdk.core.core.Project): A Project object from the Diffgram SDK + :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram. + :param transform (callable, optional): Optional transforms to be applied on a sample + """ + self.diffgram_file_id_list = diffgram_file_id_list + self.project = project + self.transform = transform + + def __process_instance(self, instance): + """ + Creates a pytorch tensor based on the instance type. + For now we are assuming shapes here, but we can extend it + to accept custom shapes specified by the user. + :param instance: + :return: + """ + if instance['type'] == 'box': + result = np.array([instance['x_min'], instance['y_min'], instance['x_max'], instance['y_max']]) + result = torch.tensor(result) + return result + + def __len__(self): + return len(self.diffgram_file_id_list) + + def __getitem__(self, idx): + if torch.is_tensor(idx): + idx = idx.tolist() + + diffgram_file = self.project.file.get_by_id(idx, with_instances = True) + if hasattr(diffgram_file, 'image'): + image = imread(diffgram_file.image.get('url_signed')) + else: + raise Exception('Pytorch datasets only support images. Please provide only file_ids from images') + + instance_list = diffgram_file.instance_list + + # Process the instances of each file + processed_instance_list = [] + for instance in instance_list: + instnace_tensor = self.__process_instance(instance) + processed_instance_list.append(instnace_tensor) + sample = {'image': image, 'instance_list': instance_list} + + if self.transform: + sample = self.transform(sample) + + return sample diff --git a/sdk/requirements.txt b/sdk/requirements.txt index 05e9482..02704a9 100644 --- a/sdk/requirements.txt +++ b/sdk/requirements.txt @@ -3,4 +3,6 @@ opencv-python>=4.0.0.21 scipy>=1.1.0 six>=1.9.0 tensorflow>=1.12.0 -pillow \ No newline at end of file +pillow +torch +imageio \ No newline at end of file From e4a0ecf58c2b1fada79087690736ed4b6ecc217d Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 26 Jul 2021 14:54:10 -0600 Subject: [PATCH 04/17] wip pytorch --- .../__pycache__/__init__.cpython-38.pyc | Bin 0 -> 172 bytes .../diffgram_pytorch_dataset.cpython-38.pyc | Bin 0 -> 2370 bytes .../pytorch_diffgram/diffgram_pytorch_dataset.py | 1 + 3 files changed, 1 insertion(+) create mode 100644 sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc create mode 100644 sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8fdac839a07c00c36bcf5d3fb8ad798cc4d22242 GIT binary patch literal 172 zcmWIL<>g`kg6-$;B!KA0AOaaM0yz#qT+9L_QW%06G#UL?G8BP?5yY=R{fzwFRQ-a) zq?~+xm!kZFr2Go~l+3iW^rFOE{esGpjQl*^;*@NC5JncyFG|jchYQ5VXXa&=#K-Fu VRNmsS$<0qG%}KQbS@ju+831c;Ehhi~ literal 0 HcmV?d00001 diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6ad21c32779815367fa42401b4528230c5be8060 GIT binary patch literal 2370 zcmZ`)&2Ah;5bmDYnVntRNo0`_a|o>nL`H)36i33Ym>dw0i~zjXH! zuQk5l91s#b0SEiY({LYg?#3}mOI_35Q(aY4_0>1uZfy+-jGzAa^YMN} z$X~d*JbJL%hiN_sA&I0pDQH3qFYzeuz1&ZH_v|G-c=~xzgh{x3*H5C)i0sMWIgx>) zXMQq}-Xju+e}gp=du{(fSYecHhq(Bv5>n~In6_b7XeDHdYL|x(n|+vO1VWL7N|JbT zC_U*vrx1v91%ZUJ|C}U&jN|~Gp`_q+qrG*IO(s(2$VqewgA{t_H3>m=9o@ zTOb;*A)GW6ByD_d-n9PhD@*>cUQ zvD2LBs$h1e*eW{q=-|t%X5}(+KFM;$^UQz;lxVPILXR`6gVa@zGVwaIZ9zN*C;2D)L;6bM-!}YMWK1$ie*A%OzVY8fGgtR!r1&o+-&H2!I(; z)VW$4KaBnM=F356(#75OmnmuoYm0HO?HQF%bRY7qLlgi;@;lLp&VR-3tSZ!Q4Y5_b z2fC`q)$#7i_-?(hv#R_6^1X}m@;MfnuMbuW4mY|t9r#A|qK!Du%FJ?(4h=>?`XLSJ z$=e67#LQ@=pCVRP1L29E;IMNa77dkj2fE~mr$4mhoWkDU!PB0Tfs^45nf5N|sb{@& zzwvO@V3+hy{#oY}LG}Pz!YTvFUVZ?xsx*~mV`w2)21HR-8Y{|FF}tYMi)y2RPeHe= zp0Q_8I}OvAd6AVL?auk}()+~&5QYt~>6xeZ zZNKr3qVs_SaGudqzl`iq250m~3g;WAy(u{jn%)8V{`P6ukn_zZ1nBmg;C!p;UrP7N8iAi{`e)F()*Pdl|w$8q3gRBr!l?>-P=O&w$N}e1p z37g5TV-0XlzF=lv*Oj)+T`(i|FjvqCNWnj3Qgu3xFEhDh!A>*w^bTaB-CC905mS6! zzX7MoATzu}Q5)I{N!HO)+e1^yV5%%1FL)-~{!AEQt=5?1&Y}-*+rcNt*d9Qj`Ym)5 z^HN5^;ZEiu1z7#`^c@O zC=}=Ikn^IFbNGYtc9Zj`bCGv%^d`8|48?5}@1Vezxg@Y(bpQf;(=}-14L72#$akml zFuCD4U^KT`Zbk?djZLGGm0f9DuQEyBVIZqq)!G1lnDs($qYB!- Date: Wed, 4 Aug 2021 09:28:50 -0600 Subject: [PATCH 05/17] wip: adding support for bounding box to pytorch --- sdk/diffgram/core/core.py | 1 - sdk/diffgram/file/file_constructor.py | 761 +++++++++--------- .../diffgram_pytorch_dataset.py | 48 +- sdk/tests/__init__.py | 0 4 files changed, 421 insertions(+), 389 deletions(-) create mode 100644 sdk/tests/__init__.py diff --git a/sdk/diffgram/core/core.py b/sdk/diffgram/core/core.py index c769908..8e2980e 100644 --- a/sdk/diffgram/core/core.py +++ b/sdk/diffgram/core/core.py @@ -240,7 +240,6 @@ def set_default_directory(self, self.directory_id = self.default_directory['id'] self.directory_list = data["directory_list"] - self.session.headers.update( {'directory_id': str(self.directory_id)}) diff --git a/sdk/diffgram/file/file_constructor.py b/sdk/diffgram/file/file_constructor.py index 3b1b526..0c4c930 100644 --- a/sdk/diffgram/file/file_constructor.py +++ b/sdk/diffgram/file/file_constructor.py @@ -7,449 +7,458 @@ class FileConstructor(): - """ + """ - Construct files and communicate with client + Construct files and communicate with client - Caution class needs client in order to do effective communication - with server + Caution class needs client in order to do effective communication + with server - """ - - def __init__(self, client): - - self.client = client - - - def file_from_response( - self, - file_dict): - """ - file_dict, dict, file information from Project - - returns file, class File object - """ - - file = File(client=self.client) - refresh_from_dict(file, file_dict) - - return file - - - - def from_local( - self, - path: str, - instance_list: list = None, - frame_packet_map: dict = None, - assume_new_instances_machine_made: bool = True, - convert_names_to_label_files: bool = True - ): - """ - Create a Project file from local path - - path, string, file path - - returns file, class File object - """ - - files = {'file': (os.path.basename(path), open(path, 'rb'), 'application/octet-stream')} + """ - headers = { - 'immediate_mode' : 'True', - } + def __init__(self, client): - payload = {} + self.client = client - if instance_list: - payload['instance_list'] = self.__validate_and_format_instance_list( - instance_list = instance_list, - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) - - if frame_packet_map: - payload['frame_packet_map'] = self.__validate_and_format_frame_packet_map( - frame_packet_map = frame_packet_map, - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) - - files['json'] = (None, json.dumps(payload), 'application/json') + def file_from_response( + self, + file_dict): + """ + file_dict, dict, file information from Project - endpoint = "/api/walrus/v1/project/" + self.client.project_string_id \ - + "/input/from_local" + returns file, class File object + """ - response = self.client.session.post( - self.client.host + endpoint, - files = files, - headers = headers) + file = File(client = self.client) + refresh_from_dict(file, file_dict) - self.client.handle_errors(response) - - data = response.json() + return file - #print(data) + def from_local( + self, + path: str, + instance_list: list = None, + frame_packet_map: dict = None, + assume_new_instances_machine_made: bool = True, + convert_names_to_label_files: bool = True + ): + """ + Create a Project file from local path - if data["log"]["success"] is True: - file = self.file_from_response(file_dict = data['file']) - return file - - - - def from_url( - self, - url: str, - media_type: str = "image", - job: Job = None, - job_id: int = None, - video_split_duration: int = None, - instance_list: list = None, # for Images - frame_packet_map: dict = None # for Video - ): - """ - - {'frame_packet_map' : { - 0 : instance_list, # Where the key is the integer of the frame of the video, 0 indexed. - 6 : instance_list, - 9 : instance_list - }, - - instance_example - { 'type': 'box', # options ['tag', 'box', 'polygon'] - label_file_id:, Integer # Project label_file id. - accessible through diffgram.get_label_file_dict() See sample - 'x_max': 128, Integer - 'x_min': 1, - 'y_min': 1, - 'y_max': 128, - 'points': [] # Required for polygon more on this coming soon - 'number': 0 # A number is optional, and only relates to video instances - } - - - """ - - packet = {'media' : {}} - packet['media']['url'] = url - packet['media']['type'] = media_type - - # Existing Instances - packet['frame_packet_map'] = frame_packet_map - packet['instance_list'] = instance_list - - if job: - packet["job_id"] = job.id - else: - packet["job_id"] = job_id - - if video_split_duration: - packet["video_split_duration"] = video_split_duration - - self.from_packet(packet = packet) - - return True - - - - def format_packet(): - raise NotImplementedError - - - @staticmethod - def __media_packet_sanity_checks(packet) -> None: - """ - Relevant to new media, ie not existing media - """ - - if type(packet) != dict: - raise Exception("packet is not a dict") - - if "media" not in packet: - raise Exception(" 'media' key is not defined in packet.") - - if "url" not in packet["media"]: - raise Exception(" 'url' key is not defined in packet['media'] .") - - media_type = packet["media"].get("type", None) - if not media_type: - raise Exception(" 'type' key is not defined in packet['media'] use one of ['image', 'video']") - - - def __validate_existing_instances(): - pass - - def from_packet( - self, - packet, - job=None, - convert_names_to_label_files=True, - assume_new_instances_machine_made=True - ): - """ - Import single packet of data of the form: - - image_packet_example - {'instance_list' : - [instance_alpha, # Array of instance dicts as defined below - instance_bravo, - ... n instances], - 'media' : { - 'url' : "https://something", - 'type' : 'image' # ['image', 'video'] - } - } - - video_packet_example - {'frame_packet_map' : { - 0 : instance_list, - # Where the key is the integer of the frame of the video, 0 indexed. - 6 : instance_list, - 9 : instance_list - }, - 'media' : { - 'url' : "https://something", - 'type' : 'video' - } - } - - instance_example - { 'type': 'box', # options ['tag', 'box', 'polygon'] - label_file_id:, Integer # Project label_file id. - accessible through diffgram.get_label_file_dict() See sample - 'x_max': 128, Integer - 'x_min': 1, - 'y_min': 1, - 'y_max': 128, - 'points': [] # Required for polygon more on this coming soon - 'number': 0 # A number is optional, and only relates to video instances - } - - - Validates basics of packet form - and makes request to /input/packet endpoint. - - """ - file_id = packet.get('file_id') - if not file_id: - FileConstructor.__media_packet_sanity_checks(packet = packet) - - instance = None - - if packet.get("instance_list"): - packet['instance_list'] = self.__validate_and_format_instance_list( - instance_list = packet.get('instance_list'), - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) - - if packet.get("frame_packet_map"): - packet['frame_packet_map'] = self.__validate_and_format_frame_packet_map( - frame_packet_map = packet['frame_packet_map'], - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) - - # Test one of the instances - # QUESTION Should we be testing all? User option maybe? - # (Otherwise invalid ones get discarded when it hits API) - - # TODO due to changes, this no longer tests anything , choose new way to sample - # instance list / packets here. - - if instance: - instance_type = instance.get("type", None) - if not instance_type: - raise Exception(" type is not defined in the first instance \ - of instance_list. Options are 'tag', 'box', 'polygon'.") + path, string, file path - if instance_type not in ['tag', 'box', 'polygon']: - raise Exception(" invalid instance type. Options are 'tag', 'box', 'polygon'.") + returns file, class File object + """ - if "label_file_id" not in instance: - raise Exception(" label_file_id is not defined in the first instance \ - of instance_list. ") + files = {'file': (os.path.basename(path), open(path, 'rb'), 'application/octet-stream')} + headers = { + 'immediate_mode': 'True', + } - if job: - packet["job_id"] = job.id - packet["mode"] = "attach_to_job" + payload = {} + if instance_list: + payload['instance_list'] = self.__validate_and_format_instance_list( + instance_list = instance_list, + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) + if frame_packet_map: + payload['frame_packet_map'] = self.__validate_and_format_frame_packet_map( + frame_packet_map = frame_packet_map, + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) - endpoint = "/api/walrus/v1/project/" + \ - self.client.project_string_id + "/input/packet" + files['json'] = (None, json.dumps(payload), 'application/json') - response = self.client.session.post( - self.client.host + endpoint, - json = packet) + endpoint = "/api/walrus/v1/project/" + self.client.project_string_id \ + + "/input/from_local" - self.client.handle_errors(response) - - data = response.json() + response = self.client.session.post( + self.client.host + endpoint, + files = files, + headers = headers) - # TODO better handling input vs file + self.client.handle_errors(response) - if data["log"]["success"] is True: - - return True + data = response.json() - # TODO return file data here if in immediate mode - # else return input class? / handle this properly - #file = self.file_from_response(file_dict = data['file']) - #return file + # print(data) + if data["log"]["success"] is True: + file = self.file_from_response(file_dict = data['file']) + return file + + def from_url( + self, + url: str, + media_type: str = "image", + job: Job = None, + job_id: int = None, + video_split_duration: int = None, + instance_list: list = None, # for Images + frame_packet_map: dict = None # for Video + ): + """ + + {'frame_packet_map' : { + 0 : instance_list, # Where the key is the integer of the frame of the video, 0 indexed. + 6 : instance_list, + 9 : instance_list + }, + + instance_example + { 'type': 'box', # options ['tag', 'box', 'polygon'] + label_file_id:, Integer # Project label_file id. + accessible through diffgram.get_label_file_dict() See sample + 'x_max': 128, Integer + 'x_min': 1, + 'y_min': 1, + 'y_max': 128, + 'points': [] # Required for polygon more on this coming soon + 'number': 0 # A number is optional, and only relates to video instances + } + + + """ + + packet = {'media': {}} + packet['media']['url'] = url + packet['media']['type'] = media_type + + # Existing Instances + packet['frame_packet_map'] = frame_packet_map + packet['instance_list'] = instance_list + + if job: + packet["job_id"] = job.id + else: + packet["job_id"] = job_id + + if video_split_duration: + packet["video_split_duration"] = video_split_duration + + self.from_packet(packet = packet) + + return True + + def format_packet(): + raise NotImplementedError + + @staticmethod + def __media_packet_sanity_checks(packet) -> None: + """ + Relevant to new media, ie not existing media + """ + + if type(packet) != dict: + raise Exception("packet is not a dict") + + if "media" not in packet: + raise Exception(" 'media' key is not defined in packet.") + + if "url" not in packet["media"]: + raise Exception(" 'url' key is not defined in packet['media'] .") + + media_type = packet["media"].get("type", None) + if not media_type: + raise Exception(" 'type' key is not defined in packet['media'] use one of ['image', 'video']") + + def __validate_existing_instances(): + pass + + def from_packet( + self, + packet, + job = None, + convert_names_to_label_files = True, + assume_new_instances_machine_made = True + ): + """ + Import single packet of data of the form: + + image_packet_example + {'instance_list' : + [instance_alpha, # Array of instance dicts as defined below + instance_bravo, + ... n instances], + 'media' : { + 'url' : "https://something", + 'type' : 'image' # ['image', 'video'] + } + } + + video_packet_example + {'frame_packet_map' : { + 0 : instance_list, + # Where the key is the integer of the frame of the video, 0 indexed. + 6 : instance_list, + 9 : instance_list + }, + 'media' : { + 'url' : "https://something", + 'type' : 'video' + } + } + + instance_example + { 'type': 'box', # options ['tag', 'box', 'polygon'] + label_file_id:, Integer # Project label_file id. + accessible through diffgram.get_label_file_dict() See sample + 'x_max': 128, Integer + 'x_min': 1, + 'y_min': 1, + 'y_max': 128, + 'points': [] # Required for polygon more on this coming soon + 'number': 0 # A number is optional, and only relates to video instances + } + + + Validates basics of packet form + and makes request to /input/packet endpoint. + + """ + file_id = packet.get('file_id') + if not file_id: + FileConstructor.__media_packet_sanity_checks(packet = packet) + + instance = None + + if packet.get("instance_list"): + packet['instance_list'] = self.__validate_and_format_instance_list( + instance_list = packet.get('instance_list'), + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) + + if packet.get("frame_packet_map"): + packet['frame_packet_map'] = self.__validate_and_format_frame_packet_map( + frame_packet_map = packet['frame_packet_map'], + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) + + # Test one of the instances + # QUESTION Should we be testing all? User option maybe? + # (Otherwise invalid ones get discarded when it hits API) + + # TODO due to changes, this no longer tests anything , choose new way to sample + # instance list / packets here. + + if instance: + instance_type = instance.get("type", None) + if not instance_type: + raise Exception(" type is not defined in the first instance \ + of instance_list. Options are 'tag', 'box', 'polygon'.") + + if instance_type not in ['tag', 'box', 'polygon']: + raise Exception(" invalid instance type. Options are 'tag', 'box', 'polygon'.") + + if "label_file_id" not in instance: + raise Exception(" label_file_id is not defined in the first instance \ + of instance_list. ") - def __validate_and_format_frame_packet_map( - self, - frame_packet_map: dict, - assume_new_instances_machine_made: bool = True, - convert_names_to_label_files: bool = True): - """ - Warning: Mutates packet map - """ + if job: + packet["job_id"] = job.id + packet["mode"] = "attach_to_job" - if type(frame_packet_map) != dict: - raise Exception("frame_packet_map is not a dict") + endpoint = "/api/walrus/v1/project/" + \ + self.client.project_string_id + "/input/packet" - for frame, instance_list in frame_packet_map.items(): - - if type(frame) != int: - raise Exception("frame is not a integer. The key should be the integer frame number.") + response = self.client.session.post( + self.client.host + endpoint, + json = packet) - if type(instance_list) != list: - raise Exception("instance_list is not a list. The value of the frame should be a list of instance dicts.") + self.client.handle_errors(response) - frame_packet_map[frame] = self.__validate_and_format_instance_list( - instance_list = instance_list, - assume_new_instances_machine_made = assume_new_instances_machine_made, - convert_names_to_label_files = convert_names_to_label_files - ) + data = response.json() - return frame_packet_map + # TODO better handling input vs file + if data["log"]["success"] is True: + return True - def __validate_and_format_instance_list( - self, - instance_list: list, - assume_new_instances_machine_made: bool, - convert_names_to_label_files: bool): + # TODO return file data here if in immediate mode + # else return input class? / handle this properly + # file = self.file_from_response(file_dict = data['file']) + # return file - FileConstructor.sanity_check_instance_list(instance_list) + def __validate_and_format_frame_packet_map( + self, + frame_packet_map: dict, + assume_new_instances_machine_made: bool = True, + convert_names_to_label_files: bool = True): + """ + Warning: Mutates packet map + """ - instance_list = FileConstructor.format_assumptions( - instance_list = instance_list, - assume_new_instances_machine_made = assume_new_instances_machine_made) + if type(frame_packet_map) != dict: + raise Exception("frame_packet_map is not a dict") - if convert_names_to_label_files is True: - instance_list = self.instance_list_label_strings_to_ids( - instance_list = instance_list - ) + for frame, instance_list in frame_packet_map.items(): - return instance_list + if type(frame) != int: + raise Exception("frame is not a integer. The key should be the integer frame number.") + if type(instance_list) != list: + raise Exception( + "instance_list is not a list. The value of the frame should be a list of instance dicts.") - def instance_list_label_strings_to_ids(self, instance_list: list): + frame_packet_map[frame] = self.__validate_and_format_instance_list( + instance_list = instance_list, + assume_new_instances_machine_made = assume_new_instances_machine_made, + convert_names_to_label_files = convert_names_to_label_files + ) - # Convert "name" label (ie == "cat") to Project label_file id - for index, instance in enumerate(instance_list): - - instance = convert_label(self, instance) - instance_list[index] = instance + return frame_packet_map - return instance_list + def __validate_and_format_instance_list( + self, + instance_list: list, + assume_new_instances_machine_made: bool, + convert_names_to_label_files: bool): - @staticmethod - def __check_for_duplicates_on_instance_list(instance_list): - id_list = [] - duplicates = [] - for elm in instance_list: - if elm.get('id'): - if elm.get('id') not in id_list: - id_list.append(elm.get('id')) - else: - duplicates.append(elm.get('id')) - if len(duplicates) > 0: - raise Exception('Instance list must not have duplicate IDs. \n Duplicate IDs are: {}'.format(str(duplicates))) + FileConstructor.sanity_check_instance_list(instance_list) - @staticmethod - def sanity_check_instance_list(instance_list: list): + instance_list = FileConstructor.format_assumptions( + instance_list = instance_list, + assume_new_instances_machine_made = assume_new_instances_machine_made) - if type(instance_list) != list: - raise Exception("instance_list is not array like") + if convert_names_to_label_files is True: + instance_list = self.instance_list_label_strings_to_ids( + instance_list = instance_list + ) - if len(instance_list) == 0: - raise Warning("'instance_list' is empty") + return instance_list - FileConstructor.__check_for_duplicates_on_instance_list(instance_list) + def instance_list_label_strings_to_ids(self, instance_list: list): - return + # Convert "name" label (ie == "cat") to Project label_file id + for index, instance in enumerate(instance_list): + instance = convert_label(self, instance) + instance_list[index] = instance + return instance_list - @staticmethod - def format_assumptions( - instance_list: list, - assume_new_instances_machine_made: bool): + @staticmethod + def __check_for_duplicates_on_instance_list(instance_list): + id_list = [] + duplicates = [] + for elm in instance_list: + if elm.get('id'): + if elm.get('id') not in id_list: + id_list.append(elm.get('id')) + else: + duplicates.append(elm.get('id')) + if len(duplicates) > 0: + raise Exception( + 'Instance list must not have duplicate IDs. \n Duplicate IDs are: {}'.format(str(duplicates))) - if assume_new_instances_machine_made is True: - for i in range(len(instance_list)): - instance_list[i]['machine_made'] = True + @staticmethod + def sanity_check_instance_list(instance_list: list): - return instance_list + if type(instance_list) != list: + raise Exception("instance_list is not array like") + if len(instance_list) == 0: + raise Warning("'instance_list' is empty") + FileConstructor.__check_for_duplicates_on_instance_list(instance_list) - def import_bulk(): - """ - Import multiple packets - FUTURE - Accept a dict of packets - Each packet is defined as - { packet_id : { packet }} + return - """ - pass + @staticmethod + def format_assumptions( + instance_list: list, + assume_new_instances_machine_made: bool): + if assume_new_instances_machine_made is True: + for i in range(len(instance_list)): + instance_list[i]['machine_made'] = True - def get_by_id(self, - id: int, - with_instances: bool = False): - """ - returns Diffgram File object - """ + return instance_list + + def import_bulk: + """ + Import multiple packets + FUTURE + Accept a dict of packets + Each packet is defined as + { packet_id : { packet }} + + """ + pass - if not with_instances: - endpoint = "/api/v1/file/view" + def get_file_list(self, id_list: list, with_instances: bool = False): + """ + returns Diffgram File object + """ - spec_dict = { - 'file_id': id, - 'project_string_id': self.client.project_string_id, - } + raise NotImplementedError + def file_list_exists(self, id_list): + """ + Verifies that the given ID list exists inside the project. + :param id_list: + :return: Boolean + """ + url = '/api/v1/project/{}/file/exists'.format( + self.client.project_string_id + ) + spec_dict = { + 'file_id_list': id_list + } + response = self.client.session.post( + self.client.host + url, + json = spec_dict) - file_response_key = 'file' + self.client.handle_errors(response) + + response_json = response.json() - else: - endpoint = "/api/project/{}/file/{}/annotation/list".format(self.client.project_string_id, id) - spec_dict = { - 'directory_id': self.client.directory_id - } - file_response_key = 'file_serialized' + if response_json.get('result'): + return response_json.get('result').get('exists') - response = self.client.session.post( - self.client.host + endpoint, - json = spec_dict) - self.client.handle_errors(response) - response_json = response.json() - file_data = response_json.get(file_response_key) + def get_by_id(self, + id: int, + with_instances: bool = False): + """ + returns Diffgram File object + """ + + if not with_instances: + endpoint = "/api/v1/file/view" - return File.new( - client = self.client, - file_json = file_data) + spec_dict = { + 'file_id': id, + 'project_string_id': self.client.project_string_id, + } + + file_response_key = 'file' + + else: + endpoint = "/api/project/{}/file/{}/annotation/list".format(self.client.project_string_id, id) + spec_dict = { + 'directory_id': self.client.directory_id + } + file_response_key = 'file_serialized' + + response = self.client.session.post( + self.client.host + endpoint, + json = spec_dict) + self.client.handle_errors(response) + response_json = response.json() + file_data = response_json.get(file_response_key) + return File.new( + client = self.client, + file_json = file_data) diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py index 200230f..71a06db 100644 --- a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py +++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py @@ -7,7 +7,7 @@ class DiffgramPytorchDataset(Dataset): - def __init__(self, project, diffgram_file_id_list, transform = None): + def __init__(self, project, diffgram_file_id_list = None, transform = None): """ :param project (sdk.core.core.Project): A Project object from the Diffgram SDK @@ -15,10 +15,17 @@ def __init__(self, project, diffgram_file_id_list, transform = None): :param transform (callable, optional): Optional transforms to be applied on a sample """ self.diffgram_file_id_list = diffgram_file_id_list + self.__validate_file_ids() self.project = project self.transform = transform + self._internal_file_list = [] - def __process_instance(self, instance): + + def __validate_file_ids(self): + url = '/api/' + raise NotImplementedError + + def __extract_bbox_values(self, instance_list): """ Creates a pytorch tensor based on the instance type. For now we are assuming shapes here, but we can extend it @@ -26,15 +33,27 @@ def __process_instance(self, instance): :param instance: :return: """ - if instance['type'] == 'box': - result = np.array([instance['x_min'], instance['y_min'], instance['x_max'], instance['y_max']]) - result = torch.tensor(result) + x_min_list = [] + x_max_list = [] + y_min_list = [] + y_max_list = [] + + for inst in instance_list: + if inst['type'] != 'box': + continue + x_min_list.append(inst['x_min']) + x_max_list.append(inst['x_max']) + y_min_list.append(inst['y_min']) + y_max_list.append(inst['y_max']) - return result + return x_min_list, x_max_list, y_min_list, y_max_list def __len__(self): return len(self.diffgram_file_id_list) + def __get_next_page_of_data(self): + raise NotImplementedError + def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() @@ -46,15 +65,20 @@ def __getitem__(self, idx): raise Exception('Pytorch datasets only support images. Please provide only file_ids from images') instance_list = diffgram_file.instance_list - + instance_types_in_file = set([x['type'] for x in instance_list]) # Process the instances of each file processed_instance_list = [] - for instance in instance_list: - instnace_tensor = self.__process_instance(instance) - processed_instance_list.append(instnace_tensor) - sample = {'image': image, 'instance_list': instance_list} + + sample = {'image': image} + if 'box' in instance_types_in_file: + x_min_list, x_max_list, y_min_list, y_max_list = self.__extract_bbox_values(instance_list) + sample['x_min_list'] = torch.Tensor(x_min_list) + sample['x_max_list'] = torch.Tensor(x_max_list) + sample['y_min_list'] = torch.Tensor(y_min_list) + sample['y_max_list'] = torch.Tensor(y_max_list) + if 'polygon' in instance_types_in_file: if self.transform: sample = self.transform(sample) - return sample + return sample \ No newline at end of file diff --git a/sdk/tests/__init__.py b/sdk/tests/__init__.py new file mode 100644 index 0000000..e69de29 From 1d00f6bc6429413ee8746be4c9a0e35bc928222e Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 4 Aug 2021 18:03:16 -0600 Subject: [PATCH 06/17] wip: slice class and segmentation mask --- pytorch_test.py | 33 +++++++++++ sdk/diffgram/core/directory.py | 29 +++++++++- sdk/diffgram/core/sliced_directory.py | 37 ++++++++++++ sdk/diffgram/file/file_constructor.py | 4 +- .../__pycache__/__init__.cpython-38.pyc | Bin 172 -> 172 bytes .../diffgram_pytorch_dataset.cpython-38.pyc | Bin 2370 -> 4466 bytes .../diffgram_pytorch_dataset.py | 54 ++++++++++++------ 7 files changed, 136 insertions(+), 21 deletions(-) create mode 100644 pytorch_test.py create mode 100644 sdk/diffgram/core/sliced_directory.py diff --git a/pytorch_test.py b/pytorch_test.py new file mode 100644 index 0000000..d7c55e2 --- /dev/null +++ b/pytorch_test.py @@ -0,0 +1,33 @@ +import diffgram +from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset + +project = diffgram.Project(project_string_id = "voc-test", + client_id = "LIVE__p0blrrm6p5fnan5sh8ec", + client_secret = "d14sl5vtg672ms8rg97yp1vc9do1ao3ee2xlzktk29kbk49t8mklpt7bvnmh", + debug = True) + +file = project.file.get_by_id(1554, with_instances = True) + +diffgram_dataset = DiffgramPytorchDataset( + project = project, + diffgram_file_id_list = [1554] +) + + + + + +# Draw +import matplotlib.pyplot as plt +from PIL import Image, ImageDraw +img = Image.new("L", [diffgram_dataset[0]['diffgram_file'].image['width'], diffgram_dataset[0]['diffgram_file'].image['height']], 0) +mask1 = diffgram_dataset[0]['polygon_mask_list'][0] +mask2 = diffgram_dataset[0]['polygon_mask_list'][1] +print(mask1) +for x in mask1: + print(x) +plt.figure() +plt.subplot(1,2,1) +# plt.imshow(img, 'gray', interpolation='none') +plt.imshow(mask1, 'jet', interpolation='none', alpha=0.7) +plt.imshow(mask2, 'Oranges', interpolation='none', alpha=0.7) \ No newline at end of file diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index fc247fd..901bda4 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -92,13 +92,35 @@ def all_files(self): result = result + diffgram_files return result + def all_file_ids(self): + page_num = 1 + result = [] + while page_num is not None: + diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only') + page_num = self.file_list_metadata['next_page'] + result = result + diffgram_files + return result + + def slice(self, query): + from diffgram.core.sliced_directory import SlicedDirectory + result = self.list_files( + limit = 25, + page_num = 1, + file_view_mode = 'ids_only' + ) + sliced_dataset = SlicedDirectory( + query = query, + original_directory = self + ) + return sliced_dataset + def to_pytorch(self, transform = None): """ Transforms the file list inside the dataset into a pytorch dataset. :return: """ - dataset_files = self.all_files() - file_id_list = [file.id for file in dataset_files] + from diffgram.core.sliced_directory import SlicedDirectory + file_id_list = self.all_file_ids() pytorch_dataset = DiffgramPytorchDataset( project = self.client, diffgram_file_id_list = file_id_list, @@ -162,7 +184,8 @@ def list_files( page_num=1, limit=100, search_term: str =None, - file_view_mode: str = 'annotation'): + file_view_mode: str = 'annotation', + query: str = None): """ Get a list of files in directory (from Diffgram service). diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py new file mode 100644 index 0000000..187b0bb --- /dev/null +++ b/sdk/diffgram/core/sliced_directory.py @@ -0,0 +1,37 @@ +from diffgram.core.directory import Directory +from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset + +class SlicedDirectory(Directory): + + def __init__(self, client, original_directory: Directory, query: str): + self.original_directory = original_directory + self.query = query + self.client = client + + def all_file_ids(self): + page_num = 1 + result = [] + while page_num is not None: + diffgram_files = self.list_files(limit = 1000, + page_num = page_num, + file_view_mode = 'ids_only', + query = self.query) + page_num = self.file_list_metadata['next_page'] + result = result + diffgram_files + return result + + + def to_pytorch(self, transform = None): + """ + Transforms the file list inside the dataset into a pytorch dataset. + :return: + """ + file_id_list = self.all_file_ids() + pytorch_dataset = DiffgramPytorchDataset( + project = self.client, + diffgram_file_id_list = file_id_list, + transform = transform + + ) + return pytorch_dataset + diff --git a/sdk/diffgram/file/file_constructor.py b/sdk/diffgram/file/file_constructor.py index 0c4c930..2a14f00 100644 --- a/sdk/diffgram/file/file_constructor.py +++ b/sdk/diffgram/file/file_constructor.py @@ -383,7 +383,7 @@ def format_assumptions( return instance_list - def import_bulk: + def import_bulk(self): """ Import multiple packets FUTURE @@ -392,7 +392,7 @@ def import_bulk: { packet_id : { packet }} """ - pass + raise NotImplementedError def get_file_list(self, id_list: list, with_instances: bool = False): """ diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc index 8fdac839a07c00c36bcf5d3fb8ad798cc4d22242..956dd7bfa87685db1edb53c6b09a34f90b73a6a5 100644 GIT binary patch delta 20 acmZ3(xQ3A@l$V!_0SGQ@aU@RUSpWbmDg=W7 delta 20 acmZ3(xQ3A@l$V!_0SLCAzmqVLX8`~+WCh;< diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc index 6ad21c32779815367fa42401b4528230c5be8060..7cde116072e495af96c8f60b8f6b8e7fede4b1a9 100644 GIT binary patch literal 4466 zcmbVP%WvGq8Rzh|+|}w~`KiQk*$0xW>qth7_TZ|n11m}07>WU>7a{>m?QkW^+$B9T z8)*eDP*lG9P@u;iB%l|c`#%)@8|KPEf>-&*E>1#xR z>1#$U{M~$}-;Ub-*=UwopIO}F{%ec-Dcg6VIc~qOI*o6!i`B7pV=YmMOqFg?;aQRJ zRIGQH_Orf76OIwy(|)p<>ZbXviDaisQ%^n)Dv$BWUxK7o#JCmN{0g_Z^O}KamwT@* zFulkdJV1-bo4kb-29+)i#W%WAd4$@8#GuuoWuVms}s z@B_(TE_Vx&*5rBJ+<6#&98St%v0+B`M9~k`RvJ!u4PUH%es)f@-)sfi5-a2 zf`c#-8<`S`7=?pjefF)viqQo@hdSA|OJ(^41cw`29Os*(&i%jHN1F^-U_IBsJ@ zX&klT+diXr?|W+(n&I+vP8XJR^pGi%KNxciQK+nKYnvUwD7T$MHf9x*x0TD;KKmW~ zgiLXL@2eL$;8f=_Up_KZC#FJUq0Zw8 z9nZ*ByJS<*!jTEa=`KuA8hPvHC%fI$*h68|Y|Z-mx8p|D z;LZ||@1-N|zq(T+9@-j9)|$1?#(`qnHV-Pd@_IJG&l_jlRL+50HKCN2^7erYRcqU? z0)P@=>5aoI)i?;K|DefdF=|G6d``8hnSHhoOs@QcnQG>cG3(Wx%7-H7*R{8kakZuW ztu))*QrcCcGKI?&`Dn8koU;=TCwqZU-UcC%`uZB#2e)ugM#t^hzqenxWFwmGYBn-@ zK*e6ZuUUCUHM+G*n9?o!<)im#RYFD|qT2iHQeC9G@ZzqD`-yxhV}hD^!tC;DUI0<# zBNV2qMt)hq)1_w5jLtfacKZcS;l0^js=aQJ7vfveCIyyasPb%pPFP$XHF|lGs0SZm z2KMT-wU<#?5hMfc4ALDz3Xfb0Fxo2x_7OLz&mNT8ON2;9+E1hrsFSAyRLU|P@J_on z*V$|!RWj(-@TzBzyxmy+OxiEih%9%QV#Qs2Bi$rUYE%oRv$ad-*G|^o)Gh}v;)h`3 zjHIL6F`VZz$%U9;G-|OM@P>f3;Sr1WHP!-{EU*Q84lR2R{tumW30S5N!Q_#T`~eks zgM-h-ckCI$cm_;~+whF{UR|r41FQBG_!Q&rlJ%}dHe9);-7`LRvfVT7{<(JFw8Otn zuR}YKmsNg`3^&;O=TTIlQ2PW)g-T_ZgcB4FRXUJ`2seOfCKwTZO|=n{x{qTXE$}JE z4vL*{2N6u*lXBS4fRb`6DY5QWDi8oShKdGslR*eQ!AinRonix+nsf>9!tPKiMDWRU zS*G2rM`3YeWKee~Q*q3|BukjG_wZ=Vhay!&F*t4z*P#*ZAW~^>HwK5|Dm73!LDPtu z%rDsm;QknWf^wHmV60i(qlOP@M$62ut(3~i?xbv%I4)1yU1IL6y&sgpR^Y-=@!GYq-eUB=WAEMjP9r_B+$ne?5ID-pD4r?`ga~>stha0(N4|?T6MEjV z`p6XOYrZMY=tdmF%j5VdDw0;R*xsE>*)c=UF+fZ_q50%1%Bth7_ysCRaIdZx>M3PC z{S?Uy|3rvFh{xzVE4-U=yqT(a085HXKvrDzVx%w0GmM>bwtOjP7f$V(17SWrbZ~-d z!|A_JP}TvWz#$Wh74n8d3o!w?!?wHa;g5g^XJ*`fV~=gssE}jt+u~Cdzzv%RE$$vd z$YW;^s2T22j)C^}*j2OK=Z)8Ja|(wTQA31-`N1FUaf3H2@1-yPiX?ds?rR^~U%BI8 z+#I(M_N;^XDmY|-jn*o0H%~zkwV6q+Woqq74IvSIv#S=LGi&}GYcAAlj@wlmeeU=Tt4gh~&I{EWydk)IQxa2Z{h#BUSQ4Lo!f zwPzCEsF}&QhFk5cg2H;8apO8_=_Xm~#t71>Gs4DJA`_*AAaR)OwSytz3RMZ8v_sd% zdzYs9(+GJ2A|n4wgZBEtcg{MWk(fI{XSNRaPR4g@aTvi6Khce;&BPQoBuvQMFru4C zmAk1#d^xhJ8aMs=+H;{W@6$0)>r>AdB&gN&1^OjzYay-&_ASswJHVA;(RQhYU1M{T zmq0Z@>~Gn7KlpC_<351G1-&zcA22S9ZzRrBKU`%XzsIpux zlVb{UCE*ZR1JQONqlWRgtS~mEJzRIok$4}ykte$u?qRkpwe$SxGm{pYjL@uq5mP!? Ut>y8EVBT=H05R9X4%mY8Uzp6SzW@LL delta 1187 zcmZuv&5IOA6t9o|sOkA|oS2ETMiUm7*eoPB&4-APl?VY5l6YtgW$#q&Y@3;$Np(qP zq=tp;9)ggNrLQ@RM{fcD1Fs%(F9gB=Ai3xz#8*AL8&MnjS5>dxd-dM${r*{f)M>oVJKy^pW%uV3hXg@h;kJtTaIcWF`+1+iv-{*Db%sr@CDTH^d;03sJuWY$6dvRGovuT0lxbIJXQwl~nrBmpE#^!@CxMoEF;+2EH7LfiEH8AL_oWS# zEba`oa-A)q4!fko`9zKTvMBhW>pSnlMd9c`tbtc?gzv_#{r2mQ<+JGACo#Sj6HA9O zuV{>M_3&%Y*pPGZ;C!|GtiHbTDuJ#{5B`e4-^z21GdsYbAiok|ConMjU7{}Q(2(0~ zQ4_%0E}qkz=}7py_-73E`rI3!xo^A;^!>?sV9=sr0^l?>{-SBZ`xv7iZ-53sIDe2i zGw60p-2<VkS9LO3LPI) zC%~H2vVt$Ew}BqJn@U>N+scMAsSTv&n=_sX8*ZmXs19R=x3+d=+LsfZjdQi) zo{9NJwzV}->FD~*jkf&HV>@pS%LeIWB6Cs6H?A$GPBSr0n&KdyyArCGs?5oI2XmC- zsL`qw<>sCobM576Ummqnq@xKW0O5S`igIk7bErGha>V%>L>6ZN5U7Qb4q%5c5>O8_ z*f(%QO5Bb;&`MaIZ?A1Mz*T+d;G~1I4*pkWwR7@RK7dnVB}Yc0I%_goS>vdD+CGg> il>fF*f8IIteOnUCE{?|H&QM;$E|W{%hO!ux#pp}dULR}# diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py index 71a06db..e912a6c 100644 --- a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py +++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py @@ -3,6 +3,8 @@ import os from imageio import imread import numpy as np +import scipy as sp +from PIL import Image, ImageDraw class DiffgramPytorchDataset(Dataset): @@ -15,17 +17,37 @@ def __init__(self, project, diffgram_file_id_list = None, transform = None): :param transform (callable, optional): Optional transforms to be applied on a sample """ self.diffgram_file_id_list = diffgram_file_id_list - self.__validate_file_ids() + self.project = project self.transform = transform self._internal_file_list = [] - + self.__validate_file_ids() def __validate_file_ids(self): - url = '/api/' - raise NotImplementedError - - def __extract_bbox_values(self, instance_list): + result = self.project.file.file_list_exists(self.diffgram_file_id_list) + if not result: + raise Exception( + 'Some file IDs do not belong to the project. Please provide only files from the same project.') + + def __extract_masks_from_polygon(self, instance_list, diffgram_file, empty_value = 0): + nx, ny = diffgram_file.image['width'], diffgram_file.image['height'] + mask_list = [] + for instance in instance_list: + if instance['type'] != 'polygon': + continue + poly = [(p['x'], p['y']) for p in instance['points']] + + img = Image.new(mode = 'L', size = (nx, ny), color = 0) # mode L = 8-bit pixels, black and white + draw = ImageDraw.Draw(img) + print() + draw.polygon(poly, outline = 1, fill = 1) + mask = np.array(img).astype('float32') + # mask[np.where(mask == 0)] = empty_value + print('mask', len(mask)) + mask_list.append(mask) + return mask_list + + def __extract_bbox_values(self, instance_list, diffgram_file): """ Creates a pytorch tensor based on the instance type. For now we are assuming shapes here, but we can extend it @@ -41,10 +63,10 @@ def __extract_bbox_values(self, instance_list): for inst in instance_list: if inst['type'] != 'box': continue - x_min_list.append(inst['x_min']) - x_max_list.append(inst['x_max']) - y_min_list.append(inst['y_min']) - y_max_list.append(inst['y_max']) + x_min_list.append(inst['x_min'] / diffgram_file.image['width']) + x_max_list.append(inst['x_max'] / diffgram_file.image['width']) + y_min_list.append(inst['y_min'] / diffgram_file.image['width']) + y_max_list.append(inst['y_max'] / diffgram_file.image['width']) return x_min_list, x_max_list, y_min_list, y_max_list @@ -58,7 +80,7 @@ def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() - diffgram_file = self.project.file.get_by_id(idx, with_instances = True) + diffgram_file = self.project.file.get_by_id(self.diffgram_file_id_list[idx], with_instances = True) if hasattr(diffgram_file, 'image'): image = imread(diffgram_file.image.get('url_signed')) else: @@ -68,17 +90,17 @@ def __getitem__(self, idx): instance_types_in_file = set([x['type'] for x in instance_list]) # Process the instances of each file processed_instance_list = [] - - sample = {'image': image} + sample = {'image': image, 'diffgram_file': diffgram_file} if 'box' in instance_types_in_file: - x_min_list, x_max_list, y_min_list, y_max_list = self.__extract_bbox_values(instance_list) + x_min_list, x_max_list, y_min_list, y_max_list = self.__extract_bbox_values(instance_list, diffgram_file) sample['x_min_list'] = torch.Tensor(x_min_list) sample['x_max_list'] = torch.Tensor(x_max_list) sample['y_min_list'] = torch.Tensor(y_min_list) sample['y_max_list'] = torch.Tensor(y_max_list) if 'polygon' in instance_types_in_file: - + mask_list = self.__extract_masks_from_polygon(instance_list, diffgram_file) + sample['polygon_mask_list'] = mask_list if self.transform: sample = self.transform(sample) - return sample \ No newline at end of file + return sample From c9e953cbbb556fb05d5dcaf88de5205ac0b8bc49 Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 9 Aug 2021 14:21:58 -0600 Subject: [PATCH 07/17] wip: adding dataset iterator class --- pytorch_test.py | 14 +- .../core/diffgram_dataset_iterator.py | 129 ++++++++++++++++++ sdk/diffgram/core/directory.py | 1 + .../diffgram_pytorch_dataset.cpython-38.pyc | Bin 4466 -> 4425 bytes .../diffgram_pytorch_dataset.py | 94 ++++--------- sdk/diffgram/tensorflow_diffgram/__init__.py | 0 .../diffgram_tensorflow_dataset.py | 80 +++++++++++ 7 files changed, 243 insertions(+), 75 deletions(-) create mode 100644 sdk/diffgram/core/diffgram_dataset_iterator.py create mode 100644 sdk/diffgram/tensorflow_diffgram/__init__.py create mode 100644 sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py diff --git a/pytorch_test.py b/pytorch_test.py index d7c55e2..83fe139 100644 --- a/pytorch_test.py +++ b/pytorch_test.py @@ -23,11 +23,17 @@ img = Image.new("L", [diffgram_dataset[0]['diffgram_file'].image['width'], diffgram_dataset[0]['diffgram_file'].image['height']], 0) mask1 = diffgram_dataset[0]['polygon_mask_list'][0] mask2 = diffgram_dataset[0]['polygon_mask_list'][1] -print(mask1) -for x in mask1: - print(x) plt.figure() plt.subplot(1,2,1) # plt.imshow(img, 'gray', interpolation='none') plt.imshow(mask1, 'jet', interpolation='none', alpha=0.7) -plt.imshow(mask2, 'Oranges', interpolation='none', alpha=0.7) \ No newline at end of file +plt.imshow(mask2, 'Oranges', interpolation='none', alpha=0.7) +plt.show() + + +# Dataset Example + +dataset = project.directory.get('Default') + +sliced_dataset = dataset.slice(query = 'labels.sheep > 0 or labels.sofa > 0') + diff --git a/sdk/diffgram/core/diffgram_dataset_iterator.py b/sdk/diffgram/core/diffgram_dataset_iterator.py new file mode 100644 index 0000000..53e831f --- /dev/null +++ b/sdk/diffgram/core/diffgram_dataset_iterator.py @@ -0,0 +1,129 @@ +from PIL import Image, ImageDraw +from imageio import imread + + +class DiffgramDatasetIterator: + + def __init__(self, project, diffgram_file_id_list): + """ + + :param project (sdk.core.core.Project): A Project object from the Diffgram SDK + :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram. + """ + self.diffgram_file_id_list = diffgram_file_id_list + + self.project = project + self._internal_file_list = [] + self.__validate_file_ids() + self.current_file_index = 0 + + def __iter__(self): + self.current_file_index = 0 + return self + + def __next__(self): + file_id = self.diffgram_file_id_list[self.current_file_index] + diffgram_file = self.project.file.get_by_id(file_id, with_instances = True) + instance_data = self.get_file_instances(diffgram_file) + self.current_file_index += 1 + return instance_data + + def __validate_file_ids(self): + result = self.project.file.file_list_exists(self.diffgram_file_id_list) + if not result: + raise Exception( + 'Some file IDs do not belong to the project. Please provide only files from the same project.') + + def get_image_data(self, diffgram_file): + if hasattr(diffgram_file, 'image'): + image = imread(diffgram_file.image.get('url_signed')) + return image + else: + raise Exception('Pytorch datasets only support images. Please provide only file_ids from images') + + def get_file_instances(self, diffgram_file): + if diffgram_file['type'] not in ['image', 'frame']: + raise NotImplementedError('File type "{}" is not supported yet'.format(diffgram_file['type'])) + + image = self.get_image_data(diffgram_file) + instance_list = diffgram_file.instance_list + instance_types_in_file = set([x['type'] for x in instance_list]) + # Process the instances of each file + sample = {'image': image, 'diffgram_file': diffgram_file} + has_boxes = False + has_poly = False + if 'box' in instance_types_in_file: + has_boxes = True + x_min_list, x_max_list, y_min_list, y_max_list = self.extract_bbox_values(instance_list, diffgram_file) + sample['x_min_list'] = x_min_list + sample['x_max_list'] = x_max_list + sample['y_min_list'] = y_min_list + sample['y_max_list'] = y_max_list + + if 'polygon' in instance_types_in_file: + has_poly = True + mask_list = self.extract_masks_from_polygon(instance_list, diffgram_file) + sample['polygon_mask_list'] = mask_list + + if len(instance_types_in_file) > 2 and has_boxes and has_boxes: + raise NotImplementedError( + 'SDK only supports boxes and polygon types currently. If you want a new instance type to be supported please contact us!' + ) + + label_id_list, label_name_list = self.extract_labels(instance_list) + sample['label_id_list'] = label_id_list + sample['label_name_list'] = label_name_list + + return sample + + def extract_masks_from_polygon(self, instance_list, diffgram_file, empty_value = 0): + nx, ny = diffgram_file.image['width'], diffgram_file.image['height'] + mask_list = [] + for instance in instance_list: + if instance['type'] != 'polygon': + continue + poly = [(p['x'], p['y']) for p in instance['points']] + + img = Image.new(mode = 'L', size = (nx, ny), color = 0) # mode L = 8-bit pixels, black and white + draw = ImageDraw.Draw(img) + draw.polygon(poly, outline = 1, fill = 1) + mask = np.array(img).astype('float32') + # mask[np.where(mask == 0)] = empty_value + mask_list.append(mask) + return mask_list + + def extract_labels(self, instance_list, allowed_instance_types = None): + label_file_id_list = [] + label_names_list = [] + + for inst in instance_list: + if allowed_instance_types and inst['type'] in allowed_instance_types: + continue + + label_file_id_list.append(inst['label_file']['id']) + label_names_list.append(inst['label_file']['label']['name']) + + return label_file_id_list, label_names_list + + def extract_bbox_values(self, instance_list, diffgram_file): + """ + Creates a pytorch tensor based on the instance type. + For now we are assuming shapes here, but we can extend it + to accept custom shapes specified by the user. + :param instance: + :return: + """ + x_min_list = [] + x_max_list = [] + y_min_list = [] + y_max_list = [] + + for inst in instance_list: + if inst['type'] != 'box': + continue + x_min_list.append(inst['x_min'] / diffgram_file.image['width']) + x_max_list.append(inst['x_max'] / diffgram_file.image['width']) + y_min_list.append(inst['y_min'] / diffgram_file.image['width']) + y_max_list.append(inst['y_max'] / diffgram_file.image['width']) + + return x_min_list, x_max_list, y_min_list, y_max_list diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index 901bda4..e737e07 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -109,6 +109,7 @@ def slice(self, query): file_view_mode = 'ids_only' ) sliced_dataset = SlicedDirectory( + client = self.client, query = query, original_directory = self ) diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc index 7cde116072e495af96c8f60b8f6b8e7fede4b1a9..afdb50b38f9e7da81a9ef35d57cfffae642e6b76 100644 GIT binary patch delta 417 zcmZ9GO-lk%6o%)F&WCfo;}!d)4?znW!-#TGi4+w?K~O~0DjP_OGO??hYFf4mAvpd3 zT}4ZA?GI=bwVYkh52#kP2)fs0=fK0^@V@7~Ux}v#-KEr0M4hch-CB#LHMqyDmVhE2 zXiGDmb*g#h5fL4(1wg6l4Z{mHp)fnuS=O|gCi9>!KAhSS7^HJm5gki$DN1dI4UCh3NdR8#>4uBPxa zIN1Is%K%Bt(hlt74_%APM?57lD={ZAFR>tz#k_sUq{a^%7pgoemkvJLx1{Uox*g0% ya=rhZQkfRn;;@M?kvfrC_`1XrmRUyMlq1DA8p96j0fw(E1uOW=MqnI=qI-Y3s8z(F3+|En!0qwA6lLrdFHFpL>@9Ey<3obY={rnYJ^A7d&BYH?dPbgrP3e z=7^_!44^$dotk>7GiKs&GRXq2J%F Date: Mon, 9 Aug 2021 14:27:41 -0600 Subject: [PATCH 08/17] fix: remove file --- sdk/add_file_id_to_json.py | 46 ------------------- .../tensorflow_diffgram/pytorch_test.py | 0 2 files changed, 46 deletions(-) delete mode 100644 sdk/add_file_id_to_json.py rename pytorch_test.py => sdk/diffgram/tensorflow_diffgram/pytorch_test.py (100%) diff --git a/sdk/add_file_id_to_json.py b/sdk/add_file_id_to_json.py deleted file mode 100644 index bfcbfbf..0000000 --- a/sdk/add_file_id_to_json.py +++ /dev/null @@ -1,46 +0,0 @@ -from diffgram.core.core import Project -import json - -project = Project(project_string_id = "coco-dataset", - debug = True, - client_id = "LIVE__rj6whqkwxkups7oczqis", - client_secret = "fr5vy64v2096qad9av0dgw3fr0kjavt4c156soiwx51ntyv9qswpuxkhg0lf") - - -def find_file(file_list, name): - for f in file_list: - if f.original_filename == name: - return f - return None - - -with open('/home/pablo/Downloads/coco2017.json') as json_file: - data = json.load(json_file) - - dataset_default = project.directory.get(name = "Default") - - page_num = 1 - all_files = [] - print('start') - while page_num != None: - print('Current page', page_num) - diffgram_files = dataset_default.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base') - page_num = dataset_default.file_list_metadata['next_page'] - print('{} of {}'.format(page_num, dataset_default.file_list_metadata['total_pages'])) - all_files = all_files + diffgram_files - - print('') - print('Files fetched: ', len(all_files)) - result = [] - for elm in data: - file = find_file(all_files, name = elm['image_name']) - if file: - print('Adding file ID {} to {}'.format(file.id, elm['image_name'])) - elm['file_id'] = file.id - result.append(elm) - else: - print(elm['image_name'], 'not found.') - - s = json.dumps(result). - f = open('/home/pablo/Downloads/coco2017_with_ids.json', 'w') - f.write(s) diff --git a/pytorch_test.py b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py similarity index 100% rename from pytorch_test.py rename to sdk/diffgram/tensorflow_diffgram/pytorch_test.py From 060f2eb91f6d25303b71bffdc756788c606f137a Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 9 Aug 2021 14:30:41 -0600 Subject: [PATCH 09/17] ignore pyc --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3b05b95..0a12786 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .vs/PythonSettings.json .vs/VSWorkspaceState.json +*.pyc .idea/ sdk/diffgram/__pycache__/ From 4ece9b6e352aa90af6b106eed3d2f3339ea55fbf Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 9 Aug 2021 14:31:43 -0600 Subject: [PATCH 10/17] fix: remove pyc --- .../__pycache__/__init__.cpython-38.pyc | Bin 172 -> 0 bytes .../diffgram_pytorch_dataset.cpython-38.pyc | Bin 4425 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc delete mode 100644 sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 956dd7bfa87685db1edb53c6b09a34f90b73a6a5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 172 zcmWIL<>g`kg3DSQi6Hthh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o10SKO;XkRlguH zDJNgwr6|83DZfHLB{MB8y(lqPzo4=tBR@~KI3-&jgptMbi;^?q;R5mTnR%Hd@$q^E VmA5!-a`RJ4b5iXMahzHxq3H9iC(r*khOybDB9FbQ^dPz+SCn@El?1ZQqRfEMUuAMD5clm3QzZD01i1&Y43-x*5sy#Y#M4(HA@=X~cfziqdh7M@od zOYyr!%laENPChOgk5T0BLAb?PX7yRf`gUkDYTKC;I_B$!F1}7y>wBR$8LNlB8Eb@1 zeBErW-wIp(cGzatQ;XNQ_txTG!rnOHJhxw3o%%QUi`B7peIr(}Oq6a?<#`eFL~M4L z_R_vcVvZTLXZ?6P(GBz25b<7@mY#eZG#;bKUw|Z5$hZ~Se2LrKdCS1G%WH2fFuly{ z+(%E1H+U009-reaeCxc;=kfLV0$;?p!5LV*qOWhHz23Hn`!7bS5Z#>#tD9Yt)%2md zjq(IVeix*)ODnQU2FgkY)ZVs0Q9E>xEXa#Cx8s}4vYO3c@;n^G_)jnp#ctA7!5zt8 zt#%8MROCg~-FX=NJebsjV$00ziJ~82)!A+NKW-Peh~}vmL`LQJSkn z;E%@#)OVvP)2Fo;MPJ8R%43yOlT$7`u6AXT^+W^e6Fw2LmG_otrj-aQs*wu$tAmmDBOD=BQPjc@r7_gH zXM2p^gKuqI2*cIsnl6wudkBhT4+f3_Dy6k+?Xp9Fa@RRxV^%VGOSzoAVSj*~5EM5L zzI=%TPIWE=UIcleprowGw@F=uuZi4OgBMu>gQfP@DNmrkY-E;Bph9z@&f^80nvtt^ z2~*+X41!Uz4+u&lZ{7H8znd7WbZkLF(4I)-FjGba6`4mjbbVfbA7P*p8e3#8vzh&$ zgS(ds?VS1r$?ud+69XQK5#k?FDeYa@?Mn5vS77jCcU;@H#$L%btgo_hy{vO*1)lGf zBksSxU3on8)mN+y>kS+GitXCmFWs`%v&sFubH)wj9J*x#N@_{udWgnxP?P8I$F>Ell|HyJZQGB*~rKo4SV&zW`i?o(9L!7AKjuqc=QSF zN?y=|SPq_ED)(d`7TZ-(KbEg#L~bRTFuS^*74RPN5h~MNLvK*Ps-%FXq)q@{&n!1)!@DOx*?c~XxAhm`rg%NR` zI_-R*Yq1dVNPDp~GW6h@lAL#16%gBLE>)a&E61tZGdnhTo=f@zH6lz76KuARGSUq? zW96^>>1yrLX|hZ1!FvBM_PEX) zW$l$G{)*gp9_DKw*1$8+Fkq4bX!+(vVqxLcrzht}Mr)ikZvq=o2*vG%&f z=gppf$DWJTp5s>8!q~z(fniDf10zdi9dQ#;8O~yH^9v|uelJx!$g(P5E{$D;&7sI5 znQrF^KiHh)i2-Q=HxgN;HgY%^6hZ|ikjTr^p9qO$l`K~?Pr-&1L&`7__$n$U0=K3S zcoKP;F>eDDSnmp^i5o;tV_1BQzTcr72i+h3FPx+*=||B;F%~}|?_fNFctEuu5&1EZ zheUotgu-OFGzrxvdK>r9S=Kd^$A*nmMwPqOo+>DUSBW%knU-!4QnyBsPL=4@cVZbU zB?O7X{9ZelB9^F0{z*G@8@zXE_CAe{CtO76ePO)4e(=Fn=aUbU>UY{ztannrUx~vA zhWH7uPcaiw*pPsbxj2M3kO6lSiTE-@RpmFm>NazsFh8YZp4O+HGoGMQ)2}fu?}1o| z>ArmvblLWC6Iixg>S0&e{NyF4>f?UUv=6@hVf`b`$F;gjrJ8i#$dRK*k#0s2(yAe@ zg^_MW(d%KHRU=}7Se+w6E=fEi@)#ssy;RBlS^S;+y z^l$i%@0hX9LbzxkwmMX4CRfQY1(}j?h>&N~b|J&MvADD_n9?=eVh1DfJ&cAm*-df1 nvgJTKFP=R&d7w!F&HfiLrTz6v9*@Y)8_pIX=1SN;TXg;l+qk2n From 15241d42012a4d6a4101fff90888f06dd4d029aa Mon Sep 17 00:00:00 2001 From: Pablo Date: Mon, 9 Aug 2021 16:24:22 -0600 Subject: [PATCH 11/17] wip: added sliced dataset to pytorch functionality --- .../core/diffgram_dataset_iterator.py | 4 +-- sdk/diffgram/core/directory.py | 34 +++++++++++-------- sdk/diffgram/core/sliced_directory.py | 6 ++-- .../diffgram_pytorch_dataset.py | 19 +++-------- .../tensorflow_diffgram/pytorch_test.py | 26 ++++++++------ 5 files changed, 44 insertions(+), 45 deletions(-) diff --git a/sdk/diffgram/core/diffgram_dataset_iterator.py b/sdk/diffgram/core/diffgram_dataset_iterator.py index 53e831f..15dc9a9 100644 --- a/sdk/diffgram/core/diffgram_dataset_iterator.py +++ b/sdk/diffgram/core/diffgram_dataset_iterator.py @@ -1,6 +1,6 @@ from PIL import Image, ImageDraw from imageio import imread - +import numpy as np class DiffgramDatasetIterator: @@ -42,7 +42,7 @@ def get_image_data(self, diffgram_file): raise Exception('Pytorch datasets only support images. Please provide only file_ids from images') def get_file_instances(self, diffgram_file): - if diffgram_file['type'] not in ['image', 'frame']: + if diffgram_file.type not in ['image', 'frame']: raise NotImplementedError('File type "{}" is not supported yet'.format(diffgram_file['type'])) image = self.get_image_data(diffgram_file) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index e737e07..14c2d80 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -96,17 +96,19 @@ def all_file_ids(self): page_num = 1 result = [] while page_num is not None: - diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only') + diffgram_ids = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only') page_num = self.file_list_metadata['next_page'] - result = result + diffgram_files + result = result + diffgram_ids return result def slice(self, query): from diffgram.core.sliced_directory import SlicedDirectory - result = self.list_files( + # Get the first page to validate syntax. + self.list_files( limit = 25, page_num = 1, - file_view_mode = 'ids_only' + file_view_mode = 'ids_only', + query = query, ) sliced_dataset = SlicedDirectory( client = self.client, @@ -120,7 +122,6 @@ def to_pytorch(self, transform = None): Transforms the file list inside the dataset into a pytorch dataset. :return: """ - from diffgram.core.sliced_directory import SlicedDirectory file_id_list = self.all_file_ids() pytorch_dataset = DiffgramPytorchDataset( project = self.client, @@ -211,7 +212,6 @@ def list_files( else: logging.info("Using Default Dataset ID " + str(self.client.directory_id)) directory_id = self.client.directory_id - #print("directory_id", directory_id) metadata = {'metadata' : { @@ -222,7 +222,8 @@ def list_files( 'media_type': "All", 'page': page_num, 'file_view_mode': file_view_mode, - 'search_term': search_term + 'search_term': search_term, + 'query': query } } @@ -245,14 +246,17 @@ def list_files( self.file_list_metadata = data.get('metadata') # TODO would like this to perhaps be a seperate function # ie part of File_Constructor perhaps - file_list = [] - for file_json in file_list_json: - file = File.new( - client = self.client, - file_json = file_json) - file_list.append(file) - - return file_list + if file_view_mode == 'ids_only': + return file_list_json + else: + file_list = [] + for file_json in file_list_json: + file = File.new( + client = self.client, + file_json = file_json) + file_list.append(file) + + return file_list def get(self, diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py index 187b0bb..cb84d3c 100644 --- a/sdk/diffgram/core/sliced_directory.py +++ b/sdk/diffgram/core/sliced_directory.py @@ -1,17 +1,21 @@ from diffgram.core.directory import Directory from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset + class SlicedDirectory(Directory): def __init__(self, client, original_directory: Directory, query: str): self.original_directory = original_directory self.query = query self.client = client + # Share the same ID from the original directory as this is just an in-memory construct for better semantics. + self.id = original_directory.id def all_file_ids(self): page_num = 1 result = [] while page_num is not None: + print('slcied query', self.query) diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only', @@ -20,7 +24,6 @@ def all_file_ids(self): result = result + diffgram_files return result - def to_pytorch(self, transform = None): """ Transforms the file list inside the dataset into a pytorch dataset. @@ -34,4 +37,3 @@ def to_pytorch(self, transform = None): ) return pytorch_dataset - diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py index 14ccb95..4239f51 100644 --- a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py +++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py @@ -1,8 +1,5 @@ -import os - -import numpy as np -import scipy as sp - +from torch.utils.data import Dataset, DataLoader +import torch as torch # type: ignore from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator @@ -15,20 +12,12 @@ def __init__(self, project, diffgram_file_id_list = None, transform = None): :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram. :param transform (callable, optional): Optional transforms to be applied on a sample """ - super(DiffgramDatasetIterator, self).__init__(project, diffgram_file_id_list) - global torch, Dataset, DataLoader - try: - import torch as torch # type: ignore - from torch.utils.data import Dataset, DataLoader - except ModuleNotFoundError: - raise ModuleNotFoundError( - "'torch' module should be installed to convert the Dataset into pytorch format" - ) + super(DiffgramPytorchDataset, self).__init__(project, diffgram_file_id_list) + self.diffgram_file_id_list = diffgram_file_id_list self.project = project self.transform = transform - self.__validate_file_ids() def __len__(self): return len(self.diffgram_file_id_list) diff --git a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py index 83fe139..616ccad 100644 --- a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py +++ b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py @@ -18,22 +18,26 @@ # Draw -import matplotlib.pyplot as plt -from PIL import Image, ImageDraw -img = Image.new("L", [diffgram_dataset[0]['diffgram_file'].image['width'], diffgram_dataset[0]['diffgram_file'].image['height']], 0) -mask1 = diffgram_dataset[0]['polygon_mask_list'][0] -mask2 = diffgram_dataset[0]['polygon_mask_list'][1] -plt.figure() -plt.subplot(1,2,1) -# plt.imshow(img, 'gray', interpolation='none') -plt.imshow(mask1, 'jet', interpolation='none', alpha=0.7) -plt.imshow(mask2, 'Oranges', interpolation='none', alpha=0.7) -plt.show() +def display_masks(): + import matplotlib.pyplot as plt + from PIL import Image, ImageDraw + img = Image.new("L", [diffgram_dataset[0]['diffgram_file'].image['width'], + diffgram_dataset[0]['diffgram_file'].image['height']], 0) + mask1 = diffgram_dataset[0]['polygon_mask_list'][0] + mask2 = diffgram_dataset[0]['polygon_mask_list'][1] + plt.figure() + plt.subplot(1, 2, 1) + # plt.imshow(img, 'gray', interpolation='none') + plt.imshow(mask1, 'jet', interpolation = 'none', alpha = 0.7) + plt.imshow(mask2, 'Oranges', interpolation = 'none', alpha = 0.7) + plt.show() # Dataset Example dataset = project.directory.get('Default') +pytorch_dataset = dataset.to_pytorch() + sliced_dataset = dataset.slice(query = 'labels.sheep > 0 or labels.sofa > 0') From 04edb30d0ffbd7ad898820dc6406916890a44fa3 Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 24 Aug 2021 08:18:54 -0600 Subject: [PATCH 12/17] wip: pytorch tensorflow --- sdk/diffgram/core/directory.py | 10 +++++ sdk/diffgram/core/sliced_directory.py | 11 ++++- .../diffgram_tensorflow_dataset.py | 44 ++++++++++--------- .../tensorflow_diffgram/pytorch_test.py | 2 + 4 files changed, 46 insertions(+), 21 deletions(-) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index 14c2d80..7478644 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -2,6 +2,7 @@ from ..regular.regular import refresh_from_dict import logging from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset +from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset def get_directory_list(self): """ @@ -131,6 +132,15 @@ def to_pytorch(self, transform = None): ) return pytorch_dataset + def to_tensorflow(self): + file_id_list = self.all_file_ids() + diffgram_tensorflow_dataset = DiffgramTensorflowDataset( + project = self.client, + diffgram_file_id_list = file_id_list + ) + tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj() + return tf_dataset + def new(self, name: str): """ Create a new directory and update directory list. diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py index cb84d3c..be36c27 100644 --- a/sdk/diffgram/core/sliced_directory.py +++ b/sdk/diffgram/core/sliced_directory.py @@ -1,5 +1,6 @@ from diffgram.core.directory import Directory from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset +from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset class SlicedDirectory(Directory): @@ -15,7 +16,6 @@ def all_file_ids(self): page_num = 1 result = [] while page_num is not None: - print('slcied query', self.query) diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only', @@ -37,3 +37,12 @@ def to_pytorch(self, transform = None): ) return pytorch_dataset + + def to_tensorflow(self): + file_id_list = self.all_file_ids() + diffgram_tensorflow_dataset = DiffgramTensorflowDataset( + project = self.client, + diffgram_file_id_list = file_id_list + ) + tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj() + return tf_dataset diff --git a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py index 36a9dc4..3109d6c 100644 --- a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py +++ b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py @@ -1,24 +1,23 @@ from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator import os - +try: + import tensorflow as tf # type: ignore +except ModuleNotFoundError: + raise ModuleNotFoundError( + "'tensorflow' module should be installed to convert the Dataset into tensorflow format" + ) class DiffgramTensorflowDataset(DiffgramDatasetIterator): - def __init__(self, project, diffgram_file_id_list = None): + def __init__(self, project, diffgram_file_id_list): """ :param project (sdk.core.core.Project): A Project object from the Diffgram SDK :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram. :param transform (callable, optional): Optional transforms to be applied on a sample """ - super(DiffgramDatasetIterator, self).__init__(project, diffgram_file_id_list) - global tf - try: - import tensorflow as tf # type: ignore - except ModuleNotFoundError: - raise ModuleNotFoundError( - "'tensorflow' module should be installed to convert the Dataset into tensorflow format" - ) + super(DiffgramTensorflowDataset, self).__init__(project, diffgram_file_id_list) + self.diffgram_file_id_list = diffgram_file_id_list self.project = project @@ -52,24 +51,29 @@ def __iter__(self): self.current_file_index = 0 return self + def get_next_elm(self): + yield self.__next__() + def __next__(self): file_id = self.diffgram_file_id_list[self.current_file_index] diffgram_file = self.project.file.get_by_id(file_id, with_instances = True) + print('AAA', diffgram_file.id) + image = self.get_image_data(diffgram_file) instance_data = self.get_file_instances(diffgram_file) - filename, file_extension = os.path.splitext(instance_data['diffgram_file']['image']['original_filename']) - print('instance_data', instance_data) + filename, file_extension = os.path.splitext(instance_data['diffgram_file'].image['original_filename']) + label_names_bytes = [x.encode() for x in instance_data['label_name_list']] tf_example_dict = { - 'image/height': self.int64_feature(instance_data['diffgram_file']['height']), - 'image/width': self.int64_feature(instance_data['diffgram_file']['width']), - 'image/filename': self.bytes_feature(filename), - 'image/source_id': self.bytes_feature(filename), - 'image/encoded': self.bytes_feature(instance_data['image']), - 'image/format': self.bytes_feature(file_extension), + 'image/height': self.int64_feature(instance_data['diffgram_file'].image['height']), + 'image/width': self.int64_feature(instance_data['diffgram_file'].image['width']), + 'image/filename': self.bytes_feature(filename.encode()), + 'image/source_id': self.bytes_feature(filename.encode()), + 'image/encoded': self.bytes_feature(image.tobytes()), + 'image/format': self.bytes_feature(file_extension.encode()), 'image/object/bbox/xmin': self.float_list_feature(instance_data['x_min_list']), 'image/object/bbox/xmax': self.float_list_feature(instance_data['x_max_list']), 'image/object/bbox/ymin': self.float_list_feature(instance_data['y_min_list']), 'image/object/bbox/ymax': self.float_list_feature(instance_data['y_max_list']), - 'image/object/class/text': self.bytes_list_feature(instance_data['label_name_list']), + 'image/object/class/text': self.bytes_list_feature(label_names_bytes), 'image/object/class/label': self.int64_list_feature(instance_data['label_id_list']), } tf_example = tf.train.Example(features = tf.train.Features(feature = tf_example_dict)) @@ -77,4 +81,4 @@ def __next__(self): return tf_example def get_dataset_obj(self): - return tf.data.Dataset.from_generator(self.__iter__) + return tf.data.Dataset.from_generator(self.get_next_elm, output_signature = tf.TensorSpec(shape=(1,))) diff --git a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py index 616ccad..ab5987b 100644 --- a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py +++ b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py @@ -38,6 +38,8 @@ def display_masks(): dataset = project.directory.get('Default') pytorch_dataset = dataset.to_pytorch() +tf_dataset = dataset.to_tensorflow() + sliced_dataset = dataset.slice(query = 'labels.sheep > 0 or labels.sofa > 0') From 4d61f77b67e06760805dfa0fe383a4c94c2ecea6 Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 24 Aug 2021 15:12:01 -0600 Subject: [PATCH 13/17] wip: implemented len and getitem on iterators --- sdk/diffgram/core/diffgram_dataset_iterator.py | 8 ++++++++ sdk/diffgram/core/directory.py | 13 ++++++++++--- sdk/diffgram/core/sliced_directory.py | 6 ++++-- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/sdk/diffgram/core/diffgram_dataset_iterator.py b/sdk/diffgram/core/diffgram_dataset_iterator.py index 15dc9a9..81fd7af 100644 --- a/sdk/diffgram/core/diffgram_dataset_iterator.py +++ b/sdk/diffgram/core/diffgram_dataset_iterator.py @@ -21,6 +21,14 @@ def __iter__(self): self.current_file_index = 0 return self + def __len__(self): + return len(self.diffgram_file_id_list) + + def __getitem__(self, idx): + diffgram_file = self.project.file.get_by_id(self.diffgram_file_id_list[idx], with_instances = True) + instance_data = self.get_file_instances(diffgram_file) + return instance_data + def __next__(self): file_id = self.diffgram_file_id_list[self.current_file_index] diffgram_file = self.project.file.get_by_id(file_id, with_instances = True) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index 7478644..4ddc57f 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -3,6 +3,8 @@ import logging from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset +from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator + def get_directory_list(self): """ @@ -70,14 +72,19 @@ def set_directory_by_name(self, name): str(names_attempted)) -class Directory(): +class Directory(DiffgramDatasetIterator): - def __init__(self, - client): + def __init__(self, client, file_id_list_sliced = None): self.client = client self.id = None self.file_list_metadata = {} + + if file_id_list_sliced is None: + self.file_id_list = self.all_file_ids() + else: + self.file_id_list = file_id_list_sliced + super(Directory, self).__init__(self.client, self.file_id_list) def all_files(self): """ diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py index be36c27..514a204 100644 --- a/sdk/diffgram/core/sliced_directory.py +++ b/sdk/diffgram/core/sliced_directory.py @@ -11,6 +11,8 @@ def __init__(self, client, original_directory: Directory, query: str): self.client = client # Share the same ID from the original directory as this is just an in-memory construct for better semantics. self.id = original_directory.id + self.file_id_list = self.all_file_ids() + super(Directory, self).__init__(self.client, self.file_id_list) def all_file_ids(self): page_num = 1 @@ -29,10 +31,10 @@ def to_pytorch(self, transform = None): Transforms the file list inside the dataset into a pytorch dataset. :return: """ - file_id_list = self.all_file_ids() + pytorch_dataset = DiffgramPytorchDataset( project = self.client, - diffgram_file_id_list = file_id_list, + diffgram_file_id_list = self.file_id_list, transform = transform ) From 066f10dc53e27fd653d2243952d9069f22318801 Mon Sep 17 00:00:00 2001 From: Pablo Date: Tue, 24 Aug 2021 15:38:16 -0600 Subject: [PATCH 14/17] fix: tf dataset get item --- sdk/diffgram/core/directory.py | 5 ++-- sdk/diffgram/core/sliced_directory.py | 3 +-- .../diffgram_tensorflow_dataset.py | 24 ++++++++++--------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index 4ddc57f..057563b 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -79,7 +79,7 @@ def __init__(self, client, file_id_list_sliced = None): self.client = client self.id = None self.file_list_metadata = {} - + if file_id_list_sliced is None: self.file_id_list = self.all_file_ids() else: @@ -145,8 +145,7 @@ def to_tensorflow(self): project = self.client, diffgram_file_id_list = file_id_list ) - tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj() - return tf_dataset + return diffgram_tensorflow_dataset def new(self, name: str): """ diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py index 514a204..629f945 100644 --- a/sdk/diffgram/core/sliced_directory.py +++ b/sdk/diffgram/core/sliced_directory.py @@ -46,5 +46,4 @@ def to_tensorflow(self): project = self.client, diffgram_file_id_list = file_id_list ) - tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj() - return tf_dataset + return diffgram_tensorflow_dataset diff --git a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py index 3109d6c..7c9bf01 100644 --- a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py +++ b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py @@ -1,5 +1,6 @@ from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator import os + try: import tensorflow as tf # type: ignore except ModuleNotFoundError: @@ -7,6 +8,7 @@ "'tensorflow' module should be installed to convert the Dataset into tensorflow format" ) + class DiffgramTensorflowDataset(DiffgramDatasetIterator): def __init__(self, project, diffgram_file_id_list): @@ -47,17 +49,13 @@ def __validate_file_ids(self): raise Exception( 'Some file IDs do not belong to the project. Please provide only files from the same project.') - def __iter__(self): - self.current_file_index = 0 - return self - - def get_next_elm(self): - yield self.__next__() + def __getitem__(self, idx): + tf_example = self.get_tf_train_example(idx) + return tf_example - def __next__(self): - file_id = self.diffgram_file_id_list[self.current_file_index] + def get_tf_train_example(self, idx): + file_id = self.diffgram_file_id_list[idx] diffgram_file = self.project.file.get_by_id(file_id, with_instances = True) - print('AAA', diffgram_file.id) image = self.get_image_data(diffgram_file) instance_data = self.get_file_instances(diffgram_file) filename, file_extension = os.path.splitext(instance_data['diffgram_file'].image['original_filename']) @@ -77,8 +75,12 @@ def __next__(self): 'image/object/class/label': self.int64_list_feature(instance_data['label_id_list']), } tf_example = tf.train.Example(features = tf.train.Features(feature = tf_example_dict)) + return tf_example + + def __next__(self): + tf_example = self.get_tf_train_example(self.current_file_index) self.current_file_index += 1 return tf_example - def get_dataset_obj(self): - return tf.data.Dataset.from_generator(self.get_next_elm, output_signature = tf.TensorSpec(shape=(1,))) + # def get_dataset_obj(self): + # return tf.data.Dataset.from_generator(self.get_next_elm, output_signature = tf.TensorSpec(shape = (1,))) From 6c3be64e7d43ca907f626916e7e4a339ac8dc0a3 Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 25 Aug 2021 09:31:48 -0600 Subject: [PATCH 15/17] feat: add explore() function --- sdk/diffgram/core/directory.py | 9 +++++++++ sdk/diffgram/core/sliced_directory.py | 17 ++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index 057563b..a1f8d17 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -109,6 +109,15 @@ def all_file_ids(self): result = result + diffgram_ids return result + def explore(self): + message = '{}/studio/annotate/{}/explorer?dataset_id={}'.format( + self.client.host, + self.project.project_string_id, + self.id + ) + print('\033[92m' + 'To Explore your dataset visit:' + '\033[0m') + print('\033[96m' + message + '\033[0m') + def slice(self, query): from diffgram.core.sliced_directory import SlicedDirectory # Get the first page to validate syntax. diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py index 629f945..5dd64a1 100644 --- a/sdk/diffgram/core/sliced_directory.py +++ b/sdk/diffgram/core/sliced_directory.py @@ -1,7 +1,7 @@ from diffgram.core.directory import Directory from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset - +import urllib class SlicedDirectory(Directory): @@ -26,6 +26,21 @@ def all_file_ids(self): result = result + diffgram_files return result + def explore(self): + + + payload = {'dataset_id': self.original_directory.id, 'query': self.query} + params = urllib.parse.urlencode(payload, quote_via = urllib.parse.quote) + + message = '{}/studio/annotate/{}/explorer?{}'.format( + self.client.host, + self.project.project_string_id, + params + + ) + print('\033[92m' + 'To Explore your dataset visit:' + '\033[0m') + print('\033[96m' + message + '\033[0m') + def to_pytorch(self, transform = None): """ Transforms the file list inside the dataset into a pytorch dataset. From c6aa1042211dbf34604ae0c5f97d3b90ea780bbb Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 25 Aug 2021 09:31:48 -0600 Subject: [PATCH 16/17] Revert "feat: add explore() function" This reverts commit 6c3be64e7d43ca907f626916e7e4a339ac8dc0a3. --- sdk/diffgram/core/directory.py | 9 --------- sdk/diffgram/core/sliced_directory.py | 17 +---------------- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index a1f8d17..057563b 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -109,15 +109,6 @@ def all_file_ids(self): result = result + diffgram_ids return result - def explore(self): - message = '{}/studio/annotate/{}/explorer?dataset_id={}'.format( - self.client.host, - self.project.project_string_id, - self.id - ) - print('\033[92m' + 'To Explore your dataset visit:' + '\033[0m') - print('\033[96m' + message + '\033[0m') - def slice(self, query): from diffgram.core.sliced_directory import SlicedDirectory # Get the first page to validate syntax. diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py index 5dd64a1..629f945 100644 --- a/sdk/diffgram/core/sliced_directory.py +++ b/sdk/diffgram/core/sliced_directory.py @@ -1,7 +1,7 @@ from diffgram.core.directory import Directory from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset -import urllib + class SlicedDirectory(Directory): @@ -26,21 +26,6 @@ def all_file_ids(self): result = result + diffgram_files return result - def explore(self): - - - payload = {'dataset_id': self.original_directory.id, 'query': self.query} - params = urllib.parse.urlencode(payload, quote_via = urllib.parse.quote) - - message = '{}/studio/annotate/{}/explorer?{}'.format( - self.client.host, - self.project.project_string_id, - params - - ) - print('\033[92m' + 'To Explore your dataset visit:' + '\033[0m') - print('\033[96m' + message + '\033[0m') - def to_pytorch(self, transform = None): """ Transforms the file list inside the dataset into a pytorch dataset. From 68fedf85a4211cc64c138099a3bf0a055a942f27 Mon Sep 17 00:00:00 2001 From: Pablo Date: Wed, 25 Aug 2021 10:19:45 -0600 Subject: [PATCH 17/17] feat: add overwrite flag --- sdk/diffgram/file/file.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/diffgram/file/file.py b/sdk/diffgram/file/file.py index 6df65b4..4fedf11 100644 --- a/sdk/diffgram/file/file.py +++ b/sdk/diffgram/file/file.py @@ -62,7 +62,8 @@ def update( packet['instance_list'] = instance_list # Current default server side is to not overwrite - # packet['overwrite'] = overwrite + if overwrite: + packet['mode'] = "update_with_existing" self.client.file.from_packet(packet=packet)