From 3b23f807df252d21763ca303569a698bf606888a Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Tue, 29 Jun 2021 14:55:30 -0600
Subject: [PATCH 01/17] feat: add pagination and file_view mode

To allow fetching of more than 1000 flles for cases where all files are needed. Also added file_view mode in case we need lighter versions of the serialized files (ie no annotations)
---
 .gitignore                     |  1 +
 sdk/diffgram/core/directory.py | 12 ++++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 45d4970..3b05b95 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,4 @@ sdk/diffgram/regular/__pycache__/
 sdk/diffgram/task/__pycache__/
 
 sdk/diffgram/utils/__pycache__/
+venv/*
\ No newline at end of file
diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index 115f3b3..df08d24 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -76,6 +76,7 @@ def __init__(self,
 
 		self.client = client
 		self.id = None
+		self.file_list_metadata = {}
 
 
 	def new(self, name: str):
@@ -129,9 +130,11 @@ def new(self, name: str):
 
 
 	def list_files(
-			self, 
+			self,
+			page_num=1,
 			limit=None,
-			search_term: str =None):
+			search_term: str =None,
+			file_view_mode: str = 'annotation'):
 		"""
 		Get a list of files in directory (from Diffgram service). 
 	
@@ -165,9 +168,10 @@ def list_files(
 				'annotation_status': "All",
 				'limit': limit,
 				'media_type': "All",
+				'page': page_num,
 				'request_next_page': False,
 				'request_previous_page': False,
-				'file_view_mode': "annotation",
+				'file_view_mode': file_view_mode,
 				'search_term': search_term
 			}
 		}
@@ -188,7 +192,7 @@ def list_files(
 		# Success
 		data = response.json()
 		file_list_json = data.get('file_list')
-
+		self.file_list_metadata = data.get('metadata')
 		# TODO would like this to perhaps be a seperate function
 		# ie part of File_Constructor perhaps
 		file_list = []

From e85f04c833fca73ed9b1e8bb02255d455f6d253c Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Tue, 29 Jun 2021 15:29:27 -0600
Subject: [PATCH 02/17] wip: remove request_next_page params

---
 sdk/diffgram/core/directory.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index df08d24..8eb1e4a 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -132,7 +132,7 @@ def new(self, name: str):
 	def list_files(
 			self,
 			page_num=1,
-			limit=None,
+			limit=100,
 			search_term: str =None,
 			file_view_mode: str = 'annotation'):
 		"""
@@ -169,8 +169,6 @@ def list_files(
 				'limit': limit,
 				'media_type': "All",
 				'page': page_num,
-				'request_next_page': False,
-				'request_previous_page': False,
 				'file_view_mode': file_view_mode,
 				'search_term': search_term
 			}

From b9e032c0db162125966e5d0f2257fec1a4d9ceae Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Wed, 30 Jun 2021 14:32:31 -0600
Subject: [PATCH 03/17] feat: initial support to export to pytorch

Gives users the ability to export any dataset into a pytorch dataset. Pending support for other instance types different from boxes and video support.
---
 sdk/add_file_id_to_json.py                    | 46 +++++++++++++++
 sdk/diffgram/core/directory.py                | 30 +++++++++-
 sdk/diffgram/file/file.py                     |  6 +-
 sdk/diffgram/file/file_constructor.py         | 29 ++++++---
 sdk/diffgram/pytorch_diffgram/__init__.py     |  0
 .../diffgram_pytorch_dataset.py               | 59 +++++++++++++++++++
 sdk/requirements.txt                          |  4 +-
 7 files changed, 161 insertions(+), 13 deletions(-)
 create mode 100644 sdk/add_file_id_to_json.py
 create mode 100644 sdk/diffgram/pytorch_diffgram/__init__.py
 create mode 100644 sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py

diff --git a/sdk/add_file_id_to_json.py b/sdk/add_file_id_to_json.py
new file mode 100644
index 0000000..bfcbfbf
--- /dev/null
+++ b/sdk/add_file_id_to_json.py
@@ -0,0 +1,46 @@
+from diffgram.core.core import Project
+import json
+
+project = Project(project_string_id = "coco-dataset",
+                  debug = True,
+                  client_id = "LIVE__rj6whqkwxkups7oczqis",
+                  client_secret = "fr5vy64v2096qad9av0dgw3fr0kjavt4c156soiwx51ntyv9qswpuxkhg0lf")
+
+
+def find_file(file_list, name):
+    for f in file_list:
+        if f.original_filename == name:
+            return f
+    return None
+
+
+with open('/home/pablo/Downloads/coco2017.json') as json_file:
+    data = json.load(json_file)
+
+    dataset_default = project.directory.get(name = "Default")
+
+    page_num = 1
+    all_files = []
+    print('start')
+    while page_num != None:
+        print('Current page', page_num)
+        diffgram_files = dataset_default.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base')
+        page_num = dataset_default.file_list_metadata['next_page']
+        print('{} of {}'.format(page_num, dataset_default.file_list_metadata['total_pages']))
+        all_files = all_files + diffgram_files
+
+    print('')
+    print('Files fetched: ', len(all_files))
+    result = []
+    for elm in data:
+        file = find_file(all_files, name = elm['image_name'])
+        if file:
+            print('Adding file ID {} to {}'.format(file.id, elm['image_name']))
+            elm['file_id'] = file.id
+            result.append(elm)
+        else:
+            print(elm['image_name'], 'not found.')
+
+    s = json.dumps(result).
+    f = open('/home/pablo/Downloads/coco2017_with_ids.json', 'w')
+    f.write(s)
diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index 8eb1e4a..fc247fd 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -1,7 +1,7 @@
 from diffgram.file.file import File
 from ..regular.regular import refresh_from_dict
 import logging
-
+from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
 
 def get_directory_list(self):
 	"""
@@ -78,6 +78,34 @@ def __init__(self,
 		self.id = None
 		self.file_list_metadata = {}
 
+	def all_files(self):
+		"""
+			Get all the files of the directoy.
+			Warning! This can be an expensive function and take a long time.
+		:return:
+		"""
+		page_num = 1
+		result = []
+		while page_num is not None:
+			diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base')
+			page_num = self.file_list_metadata['next_page']
+			result = result + diffgram_files
+		return result
+
+	def to_pytorch(self, transform = None):
+		"""
+			Transforms the file list inside the dataset into a pytorch dataset.
+		:return:
+		"""
+		dataset_files = self.all_files()
+		file_id_list = [file.id for file in dataset_files]
+		pytorch_dataset = DiffgramPytorchDataset(
+			project = self.client,
+			diffgram_file_id_list = file_id_list,
+			transform = transform
+
+		)
+		return pytorch_dataset
 
 	def new(self, name: str):
 		"""
diff --git a/sdk/diffgram/file/file.py b/sdk/diffgram/file/file.py
index ea32305..6df65b4 100644
--- a/sdk/diffgram/file/file.py
+++ b/sdk/diffgram/file/file.py
@@ -1,6 +1,5 @@
 from ..regular.regular import refresh_from_dict
 
-
 class File():
     """
     file literal object
@@ -11,11 +10,12 @@ class File():
 
     def __init__(
             self,
-            id=None,
-            client=None):
+            id = None,
+            client = None):
         self.id = id
         self.client = client
 
+    @staticmethod
     def new(
             client,
             file_json):
diff --git a/sdk/diffgram/file/file_constructor.py b/sdk/diffgram/file/file_constructor.py
index b950656..3b1b526 100644
--- a/sdk/diffgram/file/file_constructor.py
+++ b/sdk/diffgram/file/file_constructor.py
@@ -414,29 +414,42 @@ def import_bulk():
 
 
 	def get_by_id(self, 
-				  id: int):
+				  id: int,
+				  with_instances: bool = False):
 		"""
 		returns Diffgram File object
 		"""
-	
-		endpoint = "/api/v1/file/view"
 
-		spec_dict = {
-			'file_id': id,
-			'project_string_id': self.client.project_string_id
+		if not with_instances:
+			endpoint = "/api/v1/file/view"
+
+			spec_dict = {
+				'file_id': id,
+				'project_string_id': self.client.project_string_id,
+				}
+
+
+			file_response_key = 'file'
+
+		else:
+			endpoint = "/api/project/{}/file/{}/annotation/list".format(self.client.project_string_id, id)
+			spec_dict = {
+				'directory_id': self.client.directory_id
 			}
+			file_response_key = 'file_serialized'
 
 		response = self.client.session.post(
 			self.client.host + endpoint,
 			json = spec_dict)
-		
+
 		self.client.handle_errors(response)
 
 		response_json = response.json()
+		file_data = response_json.get(file_response_key)
 
 		return File.new(
 			client = self.client,
-			file_json = response_json.get('file'))
+			file_json = file_data)
 
 
diff --git a/sdk/diffgram/pytorch_diffgram/__init__.py b/sdk/diffgram/pytorch_diffgram/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
new file mode 100644
index 0000000..716b97a
--- /dev/null
+++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
@@ -0,0 +1,59 @@
+from torch.utils.data import Dataset, DataLoader
+import torch
+import os
+from imageio import imread
+import numpy as np
+
+
+class DiffgramPytorchDataset(Dataset):
+
+    def __init__(self, project, diffgram_file_id_list, transform = None):
+        """
+
+        :param project (sdk.core.core.Project): A Project object from the Diffgram SDK
+        :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram.
+        :param transform (callable, optional): Optional transforms to be applied on a sample
+        """
+        self.diffgram_file_id_list = diffgram_file_id_list
+        self.project = project
+        self.transform = transform
+
+    def __process_instance(self, instance):
+        """
+            Creates a pytorch tensor based on the instance type.
+            For now we are assuming shapes here, but we can extend it
+            to accept custom shapes specified by the user.
+        :param instance:
+        :return:
+        """
+        if instance['type'] == 'box':
+            result = np.array([instance['x_min'], instance['y_min'], instance['x_max'], instance['y_max']])
+            result = torch.tensor(result)
+        return result
+
+    def __len__(self):
+        return len(self.diffgram_file_id_list)
+
+    def __getitem__(self, idx):
+        if torch.is_tensor(idx):
+            idx = idx.tolist()
+
+        diffgram_file = self.project.file.get_by_id(idx, with_instances = True)
+        if hasattr(diffgram_file, 'image'):
+            image = imread(diffgram_file.image.get('url_signed'))
+        else:
+            raise Exception('Pytorch datasets only support images. Please provide only file_ids from images')
+
+        instance_list = diffgram_file.instance_list
+
+        # Process the instances of each file
+        processed_instance_list = []
+        for instance in instance_list:
+            instnace_tensor = self.__process_instance(instance)
+            processed_instance_list.append(instnace_tensor)
+        sample = {'image': image, 'instance_list': instance_list}
+
+        if self.transform:
+            sample = self.transform(sample)
+
+        return sample
diff --git a/sdk/requirements.txt b/sdk/requirements.txt
index 05e9482..02704a9 100644
--- a/sdk/requirements.txt
+++ b/sdk/requirements.txt
@@ -3,4 +3,6 @@ opencv-python>=4.0.0.21
 scipy>=1.1.0
 six>=1.9.0
 tensorflow>=1.12.0
-pillow
\ No newline at end of file
+pillow
+torch
+imageio
\ No newline at end of file

From e4a0ecf58c2b1fada79087690736ed4b6ecc217d Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Mon, 26 Jul 2021 14:54:10 -0600
Subject: [PATCH 04/17] wip pytorch

---
 .../__pycache__/__init__.cpython-38.pyc          | Bin 0 -> 172 bytes
 .../diffgram_pytorch_dataset.cpython-38.pyc      | Bin 0 -> 2370 bytes
 .../pytorch_diffgram/diffgram_pytorch_dataset.py |   1 +
 3 files changed, 1 insertion(+)
 create mode 100644 sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc
 create mode 100644 sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc

diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8fdac839a07c00c36bcf5d3fb8ad798cc4d22242
GIT binary patch
literal 172
zcmWIL<>g`kg6-$;B!KA0AOaaM0yz#qT+9L_QW%06G#UL?G8BP?5yY=R{fzwFRQ-a)
zq?~+xm!kZFr2Go~l+3iW^rFOE{esGpjQl*^;*@NC5JncyFG|jchYQ5VXXa&=#K-Fu
VRNmsS$<0qG%}KQbS@ju+831c;Ehhi~

literal 0
HcmV?d00001

diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ad21c32779815367fa42401b4528230c5be8060
GIT binary patch
literal 2370
zcmZ`)&2Ah;5bmDYnVntRNo0`_a|o>nL`H)36<HB7iun`BL<(>i33Ym>dw0i~zjXH!
zuQk5l91s#b0SEiY({LYg<HQ4S;Y3x>?#3}mOI_35Q(aY4_0>1uZfy+-jGzAa^YMN}
z$X~d*JbJL%hiN_sA&I0pDQH3qFYzeuz1&ZH_v|G-c=~xzgh{x3*H5C)i0sMWIgx>)
zXMQq}-Xju+e}gp=du{(fSYecHhq(Bv5>n~In6_b7XeDHdYL|x(n|+vO1VWL7N|JbT
zC_U*vrx1v91%ZUJ|C}U&jN|~Gp`_q+qrG*IO(s(<iiZnZ>2$VqewgA{t_H3>m=9o@
zTOb;*A)GW6ByD_<o-^l(+mw*ukS)hvEx<CXb#<gt%kCNZWRzA~b>d-n9PhD@*>cUQ
zvD2LBs$h1e*eW{q=-|t%X5}(+KFM;$^UQz;lxVPILXR`6g<i07UW}Dy)r6rU`{Lj|
z!<>Va@zGVwaIZ9zN*C;2D)L;6bM-!}YMWK1$ie*A%OzVY8fGgtR!r1&o+-&H2!I(;
z)VW$4KaBnM=F356(#75OmnmuoYm0HO?HQF%bRY7qLlgi;@;lLp&VR-3tSZ!Q4Y5_b
z2fC`q)$#7i_-?(hv#R_6^1X}m@;MfnuMbuW4mY|t9r#A|qK!Du%FJ?(4h=>?`XLSJ
z$=e67#LQ@=pCVRP1L29E;IMNa77dkj2fE~mr$4mhoWkDU!PB0Tfs^45nf5N|sb{@&
zzwvO@V3+hy{#oY}LG}Pz!YTvFUVZ?xsx*~mV`w2)21HR-8Y{|FF}tYMi)y2RPeHe=
zp0Q_8I}OvAd6AV<W@e%WyECPs^y9fjg;bPG9fJeOGW${(P<)Z5s<teh8w=Q7UN^N$
zvk3xXyl{A)8>L?au<VajczbJ`ds^AKE?*q9Jq$hZp~`J=%pt@!Sac8A2{?qEI0n#o
zW!(lsYq4kpSJEWxQWE=m0O@a|mAf|7%FJ`?0M&^NP)N2p2RNq67^r`jxSMcp5Pl@u
zq<86%p2V-kz}34cP{MlPdd(z=D@z)3M9+|n=pnG5pR3ZLJ*F<)E5WsW&Ow9oyKvVy
zoY9jzuZ_j2z63{10n(X+<F76Ngy03$+YLSUF32g6+L3?MI|>k}()+~&5QYt~>6xeZ
zZNKr3qVs_SaGudqzl`iq250m~3g;WAy(u{jn%)8V{`P6ukn_zZ1nBmg;C!p;Ur<V(
zeg||3T>P7N8iAi{`e)F()<mEcH4y^*@SB(>*Pdl|w$8q3gRBr!l?>-P=O&w$N}e1p
z37g5TV-0XlzF=lv*Oj)+T`(i|FjvqCNWnj3Qgu3xFEhDh!A>*w^bTaB-CC905mS6!
zzX7MoATzu}Q5)I{N!HO)+e1^yV5%%1FL)-~{!AEQt=5?1&Y}-*+rcNt*d9Qj`Ym)5
z^H<mG|Ilkg;8bWi$A0mk!?2&p<77)-{sA~=&Uo_X5`s#uQ|P`%BaD;l_=TlN)%uG%
zp1z8-4bCC*%&LO(cj2PB1A+kdBX1kzwikJ!-=h(%H|fv|VV>N5^;ZEiu1z7#`^c@O
zC=}=Ikn^IFbNGYtc9Zj`bCGv%^d`8|48?5}@1Vezxg@Y(bpQf;(=}-14L72#$akml
zFuCD4U^KT`Zbk?djZLGGm0f9DuQEyBVIZqq)!G1lnDs($qYB!-<j=WBr)_k<i~1w%
PXoKfBAQcg$BJ%$OaFT^e

literal 0
HcmV?d00001

diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
index 716b97a..200230f 100644
--- a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
+++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
@@ -29,6 +29,7 @@ def __process_instance(self, instance):
         if instance['type'] == 'box':
             result = np.array([instance['x_min'], instance['y_min'], instance['x_max'], instance['y_max']])
             result = torch.tensor(result)
+
         return result
 
     def __len__(self):

From a9c16ca5d8ab0c71ae38de8ac492c7227f4ebaa7 Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Wed, 4 Aug 2021 09:28:50 -0600
Subject: [PATCH 05/17] wip: adding support for bounding box to pytorch

---
 sdk/diffgram/core/core.py                     |   1 -
 sdk/diffgram/file/file_constructor.py         | 761 +++++++++---------
 .../diffgram_pytorch_dataset.py               |  48 +-
 sdk/tests/__init__.py                         |   0
 4 files changed, 421 insertions(+), 389 deletions(-)
 create mode 100644 sdk/tests/__init__.py

diff --git a/sdk/diffgram/core/core.py b/sdk/diffgram/core/core.py
index c769908..8e2980e 100644
--- a/sdk/diffgram/core/core.py
+++ b/sdk/diffgram/core/core.py
@@ -240,7 +240,6 @@ def set_default_directory(self,
 			self.directory_id = self.default_directory['id']
 
 			self.directory_list = data["directory_list"]
-
 		self.session.headers.update(
 			{'directory_id': str(self.directory_id)})
 
diff --git a/sdk/diffgram/file/file_constructor.py b/sdk/diffgram/file/file_constructor.py
index 3b1b526..0c4c930 100644
--- a/sdk/diffgram/file/file_constructor.py
+++ b/sdk/diffgram/file/file_constructor.py
@@ -7,449 +7,458 @@
 
 
 class FileConstructor():
-	"""
+    """
 
-	Construct files and communicate with client
+    Construct files and communicate with client
 
-	Caution class needs client in order to do effective communication
-	with server
+    Caution class needs client in order to do effective communication
+    with server
 
 
-	"""
-
-	def __init__(self, client):
-
-		self.client = client
-
-
-	def file_from_response(
-			self, 
-			file_dict):
-			"""
-			file_dict, dict, file information from Project
-
-			returns file, class File object
-			"""
-
-			file = File(client=self.client)
-			refresh_from_dict(file, file_dict)
-
-			return file
-
-
-
-	def from_local(
-			self,
-			path: str,
-			instance_list: list = None,
-			frame_packet_map: dict = None,
-			assume_new_instances_machine_made: bool = True,
-			convert_names_to_label_files: bool = True
-			):
-		"""
-		Create a Project file from local path
-
-		path, string, file path
-
-		returns file, class File object
-		"""
-		
-		files = {'file': (os.path.basename(path), open(path, 'rb'), 'application/octet-stream')}
+    """
 
-		headers = {
-			'immediate_mode' : 'True',
-		}
+    def __init__(self, client):
 
-		payload = {}
+        self.client = client
 
-		if instance_list:					
-			payload['instance_list'] = self.__validate_and_format_instance_list(
-				instance_list = instance_list,
-				assume_new_instances_machine_made = assume_new_instances_machine_made,
-				convert_names_to_label_files = convert_names_to_label_files
-				)
-				
-		if frame_packet_map:
-			payload['frame_packet_map'] = self.__validate_and_format_frame_packet_map(
-				frame_packet_map = frame_packet_map,
-				assume_new_instances_machine_made = assume_new_instances_machine_made,
-				convert_names_to_label_files = convert_names_to_label_files
-				)
-		
-		files['json'] = (None, json.dumps(payload), 'application/json')
+    def file_from_response(
+            self,
+            file_dict):
+        """
+        file_dict, dict, file information from Project
 
-		endpoint = "/api/walrus/v1/project/" +  self.client.project_string_id \
-			+ "/input/from_local"
+        returns file, class File object
+        """
 
-		response = self.client.session.post(
-			self.client.host + endpoint,
-			files = files,
-			headers = headers)
+        file = File(client = self.client)
+        refresh_from_dict(file, file_dict)
 
-		self.client.handle_errors(response)
-		
-		data = response.json()
+        return file
 
-		#print(data)
+    def from_local(
+            self,
+            path: str,
+            instance_list: list = None,
+            frame_packet_map: dict = None,
+            assume_new_instances_machine_made: bool = True,
+            convert_names_to_label_files: bool = True
+    ):
+        """
+        Create a Project file from local path
 
-		if data["log"]["success"] is True:
-			file = self.file_from_response(file_dict = data['file'])
-			return file
-		
-		
-
-	def from_url(
-			self,
-			url: str, 
-			media_type: str = "image",
-			job: Job = None,
-			job_id: int = None,
-			video_split_duration: int = None,
-			instance_list: list = None,		# for Images
-			frame_packet_map: dict = None	# for Video
-			):
-		"""
-
-		{'frame_packet_map' : {
-			0 : instance_list,    # Where the key is the integer of the frame of the video, 0 indexed.
-			6 : instance_list,
-			9 : instance_list
-		},
-
-		instance_example
-		{  'type': 'box', # options ['tag', 'box', 'polygon']
-			label_file_id:, Integer   # Project label_file id. 
-								accessible through diffgram.get_label_file_dict() See sample
-			'x_max': 128, Integer
-			'x_min': 1,
-			'y_min': 1,
-			'y_max': 128,
-			'points': [] # Required for polygon more on this coming soon
-			'number': 0  # A number is optional, and only relates to video instances
-		}
-
-
-		"""
-
-		packet = {'media' : {}}
-		packet['media']['url'] = url
-		packet['media']['type'] = media_type
-
-		# Existing Instances
-		packet['frame_packet_map'] = frame_packet_map
-		packet['instance_list'] = instance_list
-
-		if job:
-			packet["job_id"] = job.id
-		else:
-			packet["job_id"] = job_id
-
-		if video_split_duration:
-			packet["video_split_duration"] = video_split_duration
-
-		self.from_packet(packet = packet)
-		
-		return True
-		
-
-
-	def format_packet():
-		raise NotImplementedError
-
-
-	@staticmethod
-	def __media_packet_sanity_checks(packet) -> None:
-		"""
-		Relevant to new media, ie not existing media
-		"""
-
-		if type(packet) != dict:
-			raise Exception("packet is not a dict")
-
-		if "media" not in packet:
-			raise Exception(" 'media' key is not defined in packet.")  
-
-		if "url" not in packet["media"]:
-			raise Exception(" 'url' key is not defined in packet['media'] .")
-
-		media_type = packet["media"].get("type", None)
-		if not media_type:
-			raise Exception(" 'type' key is not defined in packet['media'] use one of ['image', 'video']")
-
-
-	def __validate_existing_instances():
-		pass
-
-	def from_packet(
-			self, 
-			packet,
-			job=None,
-			convert_names_to_label_files=True,
-			assume_new_instances_machine_made=True
-		):
-		"""
-		Import single packet of data of the form:
-
-		image_packet_example
-		{'instance_list' : 
-			[instance_alpha,    # Array of instance dicts as defined below
-				instance_bravo,
-				... n instances],
-		'media' : {
-			'url' : "https://something",
-			'type' : 'image'   # ['image', 'video']
-			}
-		}
-
-		video_packet_example 
-		{'frame_packet_map' : {
-			0 : instance_list,    
-		# Where the key is the integer of the frame of the video, 0 indexed.
-			6 : instance_list,
-			9 : instance_list
-		},
-		'media' : {
-			'url' : "https://something",
-			'type' : 'video'
-			}
-		}
-
-		instance_example
-		{  'type': 'box', # options ['tag', 'box', 'polygon']
-			label_file_id:, Integer   # Project label_file id. 
-								accessible through diffgram.get_label_file_dict() See sample
-			'x_max': 128, Integer
-			'x_min': 1,
-			'y_min': 1,
-			'y_max': 128,
-			'points': [] # Required for polygon more on this coming soon
-			'number': 0  # A number is optional, and only relates to video instances
-		}
-
-
-		Validates basics of packet form
-		and makes request to /input/packet endpoint.
-
-		"""
-		file_id = packet.get('file_id')
-		if not file_id:
-			FileConstructor.__media_packet_sanity_checks(packet = packet)
-
-		instance = None
-		
-		if packet.get("instance_list"):					
-			packet['instance_list'] = self.__validate_and_format_instance_list(
-				instance_list = packet.get('instance_list'),
-				assume_new_instances_machine_made = assume_new_instances_machine_made,
-				convert_names_to_label_files = convert_names_to_label_files
-				)
-				
-		if packet.get("frame_packet_map"):
-			packet['frame_packet_map'] = self.__validate_and_format_frame_packet_map(
-				frame_packet_map = packet['frame_packet_map'],
-				assume_new_instances_machine_made = assume_new_instances_machine_made,
-				convert_names_to_label_files = convert_names_to_label_files
-				)
-
-		# Test one of the instances
-		# QUESTION Should we be testing all? User option maybe?
-		# (Otherwise invalid ones get discarded when it hits API)
-
-		# TODO due to changes, this no longer tests anything , choose new way to sample
-		# instance list / packets here.
-
-		if instance:
-			instance_type = instance.get("type", None)
-			if not instance_type:
-				raise Exception(" type is not defined in the first instance \
-									of instance_list. Options are 'tag', 'box', 'polygon'.")
+        path, string, file path
 
-			if instance_type not in ['tag', 'box', 'polygon']:
-				raise Exception(" invalid instance type. Options are 'tag', 'box', 'polygon'.")
+        returns file, class File object
+        """
 
-			if "label_file_id" not in instance:
-				raise Exception(" label_file_id is not defined in the first instance \
-									of instance_list. ")
+        files = {'file': (os.path.basename(path), open(path, 'rb'), 'application/octet-stream')}
 
+        headers = {
+            'immediate_mode': 'True',
+        }
 
-		if job:
-			packet["job_id"] = job.id
-			packet["mode"] = "attach_to_job"
+        payload = {}
 
+        if instance_list:
+            payload['instance_list'] = self.__validate_and_format_instance_list(
+                instance_list = instance_list,
+                assume_new_instances_machine_made = assume_new_instances_machine_made,
+                convert_names_to_label_files = convert_names_to_label_files
+            )
 
+        if frame_packet_map:
+            payload['frame_packet_map'] = self.__validate_and_format_frame_packet_map(
+                frame_packet_map = frame_packet_map,
+                assume_new_instances_machine_made = assume_new_instances_machine_made,
+                convert_names_to_label_files = convert_names_to_label_files
+            )
 
-		endpoint = "/api/walrus/v1/project/" + \
-			self.client.project_string_id + "/input/packet"
+        files['json'] = (None, json.dumps(payload), 'application/json')
 
-		response = self.client.session.post(
-			self.client.host + endpoint, 
-			json = packet)
+        endpoint = "/api/walrus/v1/project/" + self.client.project_string_id \
+                   + "/input/from_local"
 
-		self.client.handle_errors(response)
-		
-		data = response.json()
+        response = self.client.session.post(
+            self.client.host + endpoint,
+            files = files,
+            headers = headers)
 
-		# TODO better handling input vs file
+        self.client.handle_errors(response)
 
-		if data["log"]["success"] is True:
-			
-			return True
+        data = response.json()
 
-			# TODO return file data here if in immediate mode
-			# else return input class? / handle this properly
-			#file = self.file_from_response(file_dict = data['file'])
-			#return file
+        # print(data)
 
+        if data["log"]["success"] is True:
+            file = self.file_from_response(file_dict = data['file'])
+            return file
+
+    def from_url(
+            self,
+            url: str,
+            media_type: str = "image",
+            job: Job = None,
+            job_id: int = None,
+            video_split_duration: int = None,
+            instance_list: list = None,  # for Images
+            frame_packet_map: dict = None  # for Video
+    ):
+        """
+
+        {'frame_packet_map' : {
+            0 : instance_list,    # Where the key is the integer of the frame of the video, 0 indexed.
+            6 : instance_list,
+            9 : instance_list
+        },
+
+        instance_example
+        {  'type': 'box', # options ['tag', 'box', 'polygon']
+            label_file_id:, Integer   # Project label_file id.
+                                accessible through diffgram.get_label_file_dict() See sample
+            'x_max': 128, Integer
+            'x_min': 1,
+            'y_min': 1,
+            'y_max': 128,
+            'points': [] # Required for polygon more on this coming soon
+            'number': 0  # A number is optional, and only relates to video instances
+        }
+
+
+        """
+
+        packet = {'media': {}}
+        packet['media']['url'] = url
+        packet['media']['type'] = media_type
+
+        # Existing Instances
+        packet['frame_packet_map'] = frame_packet_map
+        packet['instance_list'] = instance_list
+
+        if job:
+            packet["job_id"] = job.id
+        else:
+            packet["job_id"] = job_id
+
+        if video_split_duration:
+            packet["video_split_duration"] = video_split_duration
+
+        self.from_packet(packet = packet)
+
+        return True
+
+    def format_packet():
+        raise NotImplementedError
+
+    @staticmethod
+    def __media_packet_sanity_checks(packet) -> None:
+        """
+        Relevant to new media, ie not existing media
+        """
+
+        if type(packet) != dict:
+            raise Exception("packet is not a dict")
+
+        if "media" not in packet:
+            raise Exception(" 'media' key is not defined in packet.")
+
+        if "url" not in packet["media"]:
+            raise Exception(" 'url' key is not defined in packet['media'] .")
+
+        media_type = packet["media"].get("type", None)
+        if not media_type:
+            raise Exception(" 'type' key is not defined in packet['media'] use one of ['image', 'video']")
+
+    def __validate_existing_instances():
+        pass
+
+    def from_packet(
+            self,
+            packet,
+            job = None,
+            convert_names_to_label_files = True,
+            assume_new_instances_machine_made = True
+    ):
+        """
+        Import single packet of data of the form:
+
+        image_packet_example
+        {'instance_list' :
+            [instance_alpha,    # Array of instance dicts as defined below
+                instance_bravo,
+                ... n instances],
+        'media' : {
+            'url' : "https://something",
+            'type' : 'image'   # ['image', 'video']
+            }
+        }
+
+        video_packet_example
+        {'frame_packet_map' : {
+            0 : instance_list,
+        # Where the key is the integer of the frame of the video, 0 indexed.
+            6 : instance_list,
+            9 : instance_list
+        },
+        'media' : {
+            'url' : "https://something",
+            'type' : 'video'
+            }
+        }
+
+        instance_example
+        {  'type': 'box', # options ['tag', 'box', 'polygon']
+            label_file_id:, Integer   # Project label_file id.
+                                accessible through diffgram.get_label_file_dict() See sample
+            'x_max': 128, Integer
+            'x_min': 1,
+            'y_min': 1,
+            'y_max': 128,
+            'points': [] # Required for polygon more on this coming soon
+            'number': 0  # A number is optional, and only relates to video instances
+        }
+
+
+        Validates basics of packet form
+        and makes request to /input/packet endpoint.
+
+        """
+        file_id = packet.get('file_id')
+        if not file_id:
+            FileConstructor.__media_packet_sanity_checks(packet = packet)
+
+        instance = None
+
+        if packet.get("instance_list"):
+            packet['instance_list'] = self.__validate_and_format_instance_list(
+                instance_list = packet.get('instance_list'),
+                assume_new_instances_machine_made = assume_new_instances_machine_made,
+                convert_names_to_label_files = convert_names_to_label_files
+            )
+
+        if packet.get("frame_packet_map"):
+            packet['frame_packet_map'] = self.__validate_and_format_frame_packet_map(
+                frame_packet_map = packet['frame_packet_map'],
+                assume_new_instances_machine_made = assume_new_instances_machine_made,
+                convert_names_to_label_files = convert_names_to_label_files
+            )
+
+        # Test one of the instances
+        # QUESTION Should we be testing all? User option maybe?
+        # (Otherwise invalid ones get discarded when it hits API)
+
+        # TODO due to changes, this no longer tests anything , choose new way to sample
+        # instance list / packets here.
+
+        if instance:
+            instance_type = instance.get("type", None)
+            if not instance_type:
+                raise Exception(" type is not defined in the first instance \
+									of instance_list. Options are 'tag', 'box', 'polygon'.")
+
+            if instance_type not in ['tag', 'box', 'polygon']:
+                raise Exception(" invalid instance type. Options are 'tag', 'box', 'polygon'.")
+
+            if "label_file_id" not in instance:
+                raise Exception(" label_file_id is not defined in the first instance \
+									of instance_list. ")
 
-	def __validate_and_format_frame_packet_map(
-			self, 
-			frame_packet_map: dict,
-			assume_new_instances_machine_made: bool = True,
-			convert_names_to_label_files: bool = True):
-		"""
-		Warning: Mutates packet map
-		"""
+        if job:
+            packet["job_id"] = job.id
+            packet["mode"] = "attach_to_job"
 
-		if type(frame_packet_map) != dict:
-			raise Exception("frame_packet_map is not a dict")
+        endpoint = "/api/walrus/v1/project/" + \
+                   self.client.project_string_id + "/input/packet"
 
-		for frame, instance_list in frame_packet_map.items():
-					
-			if type(frame) != int:
-				raise Exception("frame is not a integer. The key should be the integer frame number.")
+        response = self.client.session.post(
+            self.client.host + endpoint,
+            json = packet)
 
-			if type(instance_list) != list:
-				raise Exception("instance_list is not a list. The value of the frame should be a list of instance dicts.")
+        self.client.handle_errors(response)
 
-			frame_packet_map[frame] = self.__validate_and_format_instance_list(
-				instance_list = instance_list,
-				assume_new_instances_machine_made = assume_new_instances_machine_made,
-				convert_names_to_label_files = convert_names_to_label_files
-				)
+        data = response.json()
 
-		return frame_packet_map
+        # TODO better handling input vs file
 
+        if data["log"]["success"] is True:
+            return True
 
-	def __validate_and_format_instance_list(
-			self,
-			instance_list: list,
-			assume_new_instances_machine_made: bool,
-			convert_names_to_label_files: bool):
+        # TODO return file data here if in immediate mode
+        # else return input class? / handle this properly
 
+    # file = self.file_from_response(file_dict = data['file'])
+    # return file
 
-		FileConstructor.sanity_check_instance_list(instance_list)
+    def __validate_and_format_frame_packet_map(
+            self,
+            frame_packet_map: dict,
+            assume_new_instances_machine_made: bool = True,
+            convert_names_to_label_files: bool = True):
+        """
+        Warning: Mutates packet map
+        """
 
-		instance_list = FileConstructor.format_assumptions(
-			instance_list = instance_list,
-			assume_new_instances_machine_made = assume_new_instances_machine_made)
+        if type(frame_packet_map) != dict:
+            raise Exception("frame_packet_map is not a dict")
 
-		if convert_names_to_label_files is True:
-			instance_list = self.instance_list_label_strings_to_ids(
-				instance_list = instance_list
-				)
+        for frame, instance_list in frame_packet_map.items():
 
-		return instance_list
+            if type(frame) != int:
+                raise Exception("frame is not a integer. The key should be the integer frame number.")
 
+            if type(instance_list) != list:
+                raise Exception(
+                    "instance_list is not a list. The value of the frame should be a list of instance dicts.")
 
-	def instance_list_label_strings_to_ids(self, instance_list: list):
+            frame_packet_map[frame] = self.__validate_and_format_instance_list(
+                instance_list = instance_list,
+                assume_new_instances_machine_made = assume_new_instances_machine_made,
+                convert_names_to_label_files = convert_names_to_label_files
+            )
 
-		# Convert "name" label (ie == "cat") to Project label_file id
-		for index, instance in enumerate(instance_list):
-	
-			instance = convert_label(self, instance)
-			instance_list[index] = instance
+        return frame_packet_map
 
-		return instance_list
+    def __validate_and_format_instance_list(
+            self,
+            instance_list: list,
+            assume_new_instances_machine_made: bool,
+            convert_names_to_label_files: bool):
 
-	@staticmethod
-	def __check_for_duplicates_on_instance_list(instance_list):
-		id_list = []
-		duplicates = []
-		for elm in instance_list:
-			if elm.get('id'):
-				if elm.get('id') not in id_list:
-					id_list.append(elm.get('id'))
-				else:
-					duplicates.append(elm.get('id'))
-		if len(duplicates) > 0:
-			raise Exception('Instance list must not have duplicate IDs. \n Duplicate IDs are: {}'.format(str(duplicates)))
+        FileConstructor.sanity_check_instance_list(instance_list)
 
-	@staticmethod
-	def sanity_check_instance_list(instance_list: list):
+        instance_list = FileConstructor.format_assumptions(
+            instance_list = instance_list,
+            assume_new_instances_machine_made = assume_new_instances_machine_made)
 
-		if type(instance_list) != list:
-			raise Exception("instance_list is not array like")
+        if convert_names_to_label_files is True:
+            instance_list = self.instance_list_label_strings_to_ids(
+                instance_list = instance_list
+            )
 
-		if len(instance_list) == 0:
-			raise Warning("'instance_list' is empty")
+        return instance_list
 
-		FileConstructor.__check_for_duplicates_on_instance_list(instance_list)
+    def instance_list_label_strings_to_ids(self, instance_list: list):
 
-		return
+        # Convert "name" label (ie == "cat") to Project label_file id
+        for index, instance in enumerate(instance_list):
+            instance = convert_label(self, instance)
+            instance_list[index] = instance
 
+        return instance_list
 
-	@staticmethod
-	def format_assumptions(
-			instance_list: list,
-			assume_new_instances_machine_made: bool):
+    @staticmethod
+    def __check_for_duplicates_on_instance_list(instance_list):
+        id_list = []
+        duplicates = []
+        for elm in instance_list:
+            if elm.get('id'):
+                if elm.get('id') not in id_list:
+                    id_list.append(elm.get('id'))
+                else:
+                    duplicates.append(elm.get('id'))
+        if len(duplicates) > 0:
+            raise Exception(
+                'Instance list must not have duplicate IDs. \n Duplicate IDs are: {}'.format(str(duplicates)))
 
-		if assume_new_instances_machine_made is True:
-			for i in range(len(instance_list)):
-				instance_list[i]['machine_made'] = True
+    @staticmethod
+    def sanity_check_instance_list(instance_list: list):
 
-		return instance_list
+        if type(instance_list) != list:
+            raise Exception("instance_list is not array like")
 
+        if len(instance_list) == 0:
+            raise Warning("'instance_list' is empty")
 
+        FileConstructor.__check_for_duplicates_on_instance_list(instance_list)
 
-	def import_bulk():
-		"""
-		Import multiple packets
-		FUTURE	
-			Accept a dict of packets
-			Each packet is defined as
-			{ packet_id : { packet }}
+        return
 
-		"""
-		pass
+    @staticmethod
+    def format_assumptions(
+            instance_list: list,
+            assume_new_instances_machine_made: bool):
 
+        if assume_new_instances_machine_made is True:
+            for i in range(len(instance_list)):
+                instance_list[i]['machine_made'] = True
 
-	def get_by_id(self, 
-				  id: int,
-				  with_instances: bool = False):
-		"""
-		returns Diffgram File object
-		"""
+        return instance_list
+
+    def import_bulk:
+        """
+        Import multiple packets
+        FUTURE
+            Accept a dict of packets
+            Each packet is defined as
+            { packet_id : { packet }}
+
+        """
+        pass
 
-		if not with_instances:
-			endpoint = "/api/v1/file/view"
+    def get_file_list(self, id_list: list, with_instances: bool = False):
+        """
+        returns Diffgram File object
+        """
 
-			spec_dict = {
-				'file_id': id,
-				'project_string_id': self.client.project_string_id,
-				}
+        raise NotImplementedError
 
+    def file_list_exists(self, id_list):
+        """
+            Verifies that the given ID list exists inside the project.
+        :param id_list:
+        :return: Boolean
+        """
+        url = '/api/v1/project/{}/file/exists'.format(
+            self.client.project_string_id
+        )
+        spec_dict = {
+            'file_id_list': id_list
+        }
+        response = self.client.session.post(
+            self.client.host + url,
+            json = spec_dict)
 
-			file_response_key = 'file'
+        self.client.handle_errors(response)
+
+        response_json = response.json()
 
-		else:
-			endpoint = "/api/project/{}/file/{}/annotation/list".format(self.client.project_string_id, id)
-			spec_dict = {
-				'directory_id': self.client.directory_id
-			}
-			file_response_key = 'file_serialized'
+        if response_json.get('result'):
+            return response_json.get('result').get('exists')
 
-		response = self.client.session.post(
-			self.client.host + endpoint,
-			json = spec_dict)
 
-		self.client.handle_errors(response)
 
-		response_json = response.json()
-		file_data = response_json.get(file_response_key)
+    def get_by_id(self,
+                  id: int,
+                  with_instances: bool = False):
+        """
+        returns Diffgram File object
+        """
+
+        if not with_instances:
+            endpoint = "/api/v1/file/view"
 
-		return File.new(
-			client = self.client,
-			file_json = file_data)
+            spec_dict = {
+                'file_id': id,
+                'project_string_id': self.client.project_string_id,
+            }
+
+            file_response_key = 'file'
+
+        else:
+            endpoint = "/api/project/{}/file/{}/annotation/list".format(self.client.project_string_id, id)
+            spec_dict = {
+                'directory_id': self.client.directory_id
+            }
+            file_response_key = 'file_serialized'
+
+        response = self.client.session.post(
+            self.client.host + endpoint,
+            json = spec_dict)
 
+        self.client.handle_errors(response)
 
+        response_json = response.json()
+        file_data = response_json.get(file_response_key)
 
+        return File.new(
+            client = self.client,
+            file_json = file_data)
diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
index 200230f..71a06db 100644
--- a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
+++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
@@ -7,7 +7,7 @@
 
 class DiffgramPytorchDataset(Dataset):
 
-    def __init__(self, project, diffgram_file_id_list, transform = None):
+    def __init__(self, project, diffgram_file_id_list = None, transform = None):
         """
 
         :param project (sdk.core.core.Project): A Project object from the Diffgram SDK
@@ -15,10 +15,17 @@ def __init__(self, project, diffgram_file_id_list, transform = None):
         :param transform (callable, optional): Optional transforms to be applied on a sample
         """
         self.diffgram_file_id_list = diffgram_file_id_list
+        self.__validate_file_ids()
         self.project = project
         self.transform = transform
+        self._internal_file_list = []
 
-    def __process_instance(self, instance):
+
+    def __validate_file_ids(self):
+        url = '/api/'
+        raise NotImplementedError
+
+    def __extract_bbox_values(self, instance_list):
         """
             Creates a pytorch tensor based on the instance type.
             For now we are assuming shapes here, but we can extend it
@@ -26,15 +33,27 @@ def __process_instance(self, instance):
         :param instance:
         :return:
         """
-        if instance['type'] == 'box':
-            result = np.array([instance['x_min'], instance['y_min'], instance['x_max'], instance['y_max']])
-            result = torch.tensor(result)
+        x_min_list = []
+        x_max_list = []
+        y_min_list = []
+        y_max_list = []
+
+        for inst in instance_list:
+            if inst['type'] != 'box':
+                continue
+            x_min_list.append(inst['x_min'])
+            x_max_list.append(inst['x_max'])
+            y_min_list.append(inst['y_min'])
+            y_max_list.append(inst['y_max'])
 
-        return result
+        return x_min_list, x_max_list, y_min_list, y_max_list
 
     def __len__(self):
         return len(self.diffgram_file_id_list)
 
+    def __get_next_page_of_data(self):
+        raise NotImplementedError
+
     def __getitem__(self, idx):
         if torch.is_tensor(idx):
             idx = idx.tolist()
@@ -46,15 +65,20 @@ def __getitem__(self, idx):
             raise Exception('Pytorch datasets only support images. Please provide only file_ids from images')
 
         instance_list = diffgram_file.instance_list
-
+        instance_types_in_file = set([x['type'] for x in instance_list])
         # Process the instances of each file
         processed_instance_list = []
-        for instance in instance_list:
-            instnace_tensor = self.__process_instance(instance)
-            processed_instance_list.append(instnace_tensor)
-        sample = {'image': image, 'instance_list': instance_list}
+
+        sample = {'image': image}
+        if 'box' in instance_types_in_file:
+            x_min_list, x_max_list, y_min_list, y_max_list = self.__extract_bbox_values(instance_list)
+            sample['x_min_list'] = torch.Tensor(x_min_list)
+            sample['x_max_list'] = torch.Tensor(x_max_list)
+            sample['y_min_list'] = torch.Tensor(y_min_list)
+            sample['y_max_list'] = torch.Tensor(y_max_list)
+        if 'polygon' in instance_types_in_file:
 
         if self.transform:
             sample = self.transform(sample)
 
-        return sample
+        return sample
\ No newline at end of file
diff --git a/sdk/tests/__init__.py b/sdk/tests/__init__.py
new file mode 100644
index 0000000..e69de29

From 1d00f6bc6429413ee8746be4c9a0e35bc928222e Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Wed, 4 Aug 2021 18:03:16 -0600
Subject: [PATCH 06/17] wip: slice class and segmentation mask

---
 pytorch_test.py                               |  33 +++++++++++
 sdk/diffgram/core/directory.py                |  29 +++++++++-
 sdk/diffgram/core/sliced_directory.py         |  37 ++++++++++++
 sdk/diffgram/file/file_constructor.py         |   4 +-
 .../__pycache__/__init__.cpython-38.pyc       | Bin 172 -> 172 bytes
 .../diffgram_pytorch_dataset.cpython-38.pyc   | Bin 2370 -> 4466 bytes
 .../diffgram_pytorch_dataset.py               |  54 ++++++++++++------
 7 files changed, 136 insertions(+), 21 deletions(-)
 create mode 100644 pytorch_test.py
 create mode 100644 sdk/diffgram/core/sliced_directory.py

diff --git a/pytorch_test.py b/pytorch_test.py
new file mode 100644
index 0000000..d7c55e2
--- /dev/null
+++ b/pytorch_test.py
@@ -0,0 +1,33 @@
+import diffgram
+from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
+
+project = diffgram.Project(project_string_id = "voc-test",
+                  client_id = "LIVE__p0blrrm6p5fnan5sh8ec",
+                  client_secret = "d14sl5vtg672ms8rg97yp1vc9do1ao3ee2xlzktk29kbk49t8mklpt7bvnmh",
+                  debug = True)
+
+file = project.file.get_by_id(1554, with_instances = True)
+
+diffgram_dataset = DiffgramPytorchDataset(
+    project = project,
+    diffgram_file_id_list = [1554]
+)
+
+
+
+
+
+# Draw
+import matplotlib.pyplot as plt
+from PIL import Image, ImageDraw
+img = Image.new("L", [diffgram_dataset[0]['diffgram_file'].image['width'], diffgram_dataset[0]['diffgram_file'].image['height']], 0)
+mask1 = diffgram_dataset[0]['polygon_mask_list'][0]
+mask2 = diffgram_dataset[0]['polygon_mask_list'][1]
+print(mask1)
+for x in mask1:
+    print(x)
+plt.figure()
+plt.subplot(1,2,1)
+# plt.imshow(img, 'gray', interpolation='none')
+plt.imshow(mask1, 'jet', interpolation='none', alpha=0.7)
+plt.imshow(mask2, 'Oranges', interpolation='none', alpha=0.7)
\ No newline at end of file
diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index fc247fd..901bda4 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -92,13 +92,35 @@ def all_files(self):
 			result = result + diffgram_files
 		return result
 
+	def all_file_ids(self):
+		page_num = 1
+		result = []
+		while page_num is not None:
+			diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only')
+			page_num = self.file_list_metadata['next_page']
+			result = result + diffgram_files
+		return result
+
+	def slice(self, query):
+		from diffgram.core.sliced_directory import SlicedDirectory
+		result = self.list_files(
+			limit = 25,
+			page_num = 1,
+			file_view_mode = 'ids_only'
+		)
+		sliced_dataset = SlicedDirectory(
+			query = query,
+			original_directory = self
+		)
+		return sliced_dataset
+
 	def to_pytorch(self, transform = None):
 		"""
 			Transforms the file list inside the dataset into a pytorch dataset.
 		:return:
 		"""
-		dataset_files = self.all_files()
-		file_id_list = [file.id for file in dataset_files]
+		from diffgram.core.sliced_directory import SlicedDirectory
+		file_id_list = self.all_file_ids()
 		pytorch_dataset = DiffgramPytorchDataset(
 			project = self.client,
 			diffgram_file_id_list = file_id_list,
@@ -162,7 +184,8 @@ def list_files(
 			page_num=1,
 			limit=100,
 			search_term: str =None,
-			file_view_mode: str = 'annotation'):
+			file_view_mode: str = 'annotation',
+			query: str = None):
 		"""
 		Get a list of files in directory (from Diffgram service). 
 	
diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py
new file mode 100644
index 0000000..187b0bb
--- /dev/null
+++ b/sdk/diffgram/core/sliced_directory.py
@@ -0,0 +1,37 @@
+from diffgram.core.directory import Directory
+from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
+
+class SlicedDirectory(Directory):
+
+    def __init__(self, client, original_directory: Directory, query: str):
+        self.original_directory = original_directory
+        self.query = query
+        self.client = client
+
+    def all_file_ids(self):
+        page_num = 1
+        result = []
+        while page_num is not None:
+            diffgram_files = self.list_files(limit = 1000,
+                                             page_num = page_num,
+                                             file_view_mode = 'ids_only',
+                                             query = self.query)
+            page_num = self.file_list_metadata['next_page']
+            result = result + diffgram_files
+        return result
+
+
+    def to_pytorch(self, transform = None):
+        """
+            Transforms the file list inside the dataset into a pytorch dataset.
+        :return:
+        """
+        file_id_list = self.all_file_ids()
+        pytorch_dataset = DiffgramPytorchDataset(
+            project = self.client,
+            diffgram_file_id_list = file_id_list,
+            transform = transform
+
+        )
+        return pytorch_dataset
+
diff --git a/sdk/diffgram/file/file_constructor.py b/sdk/diffgram/file/file_constructor.py
index 0c4c930..2a14f00 100644
--- a/sdk/diffgram/file/file_constructor.py
+++ b/sdk/diffgram/file/file_constructor.py
@@ -383,7 +383,7 @@ def format_assumptions(
 
         return instance_list
 
-    def import_bulk:
+    def import_bulk(self):
         """
         Import multiple packets
         FUTURE
@@ -392,7 +392,7 @@ def import_bulk:
             { packet_id : { packet }}
 
         """
-        pass
+        raise NotImplementedError
 
     def get_file_list(self, id_list: list, with_instances: bool = False):
         """
diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc
index 8fdac839a07c00c36bcf5d3fb8ad798cc4d22242..956dd7bfa87685db1edb53c6b09a34f90b73a6a5 100644
GIT binary patch
delta 20
acmZ3(xQ3A@l$V!_0SGQ@aU@RUSpWbmDg=W7

delta 20
acmZ3(xQ3A@l$V!_0SLCAzmqVLX8`~+WCh;<

diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc
index 6ad21c32779815367fa42401b4528230c5be8060..7cde116072e495af96c8f60b8f6b8e7fede4b1a9 100644
GIT binary patch
literal 4466
zcmbVP%WvGq8Rzh|+|}w~`KiQk*$0xW>qth7_TZ|n11m}07>WU>7a{>m?QkW^+$B9T
z8)*eDP*lG9P@u;iB%l|c`#%)@8|K<xa&Lj6m-hD!clA~SlmtJ{d%pP|zweu0&(5|i
zJb(WFO!Ain%laEtPChOwkMYUhfN+bm-0HK4_3g-JRJU^{a?Ib2T>PEf>-&*E>1#xR
z>1#$U{M~$}-;Ub-*=UwopIO}F{%ec-Dcg6VIc~qOI*o6!i`B7pV=YmMOqFg?;aQRJ
zRIGQH_Orf76OIwy(|)p<>ZbXviDaisQ%^n)Dv$BWUxK7o#JCmN{0g_Z^O}KamwT@*
zFulkdJV1-bo4kb<pU?0%{tZ6M=kO2sJYT@S$r)I@s&A}iz22rs`p-wI5Z$c_tLt5o
z)%2mVjqeFQ`6H0ZuB_Oq7$~b8P<zt?Md`pjv>-29+)i#W%WAd4$@8#GuuoWuVms}s
z@B_(TE_Vx&*5rBJ+<6#&98St%v0+B`M9~k`RvJ!u4PUH%es)<tIY->f@-)sfi5-a2
zf`c#-8<`S`7=?pje<KxP(F<u{_;l@k8JZ2Ii!YyD8S@6RSBQT2K{v_sWFt@Sg+-~d
zVvyum{)<U5JQ^rNRfHR9n3QFnr93RaKq!-bnWsmKuXj9s{WQ}VH+<{HL`1rIv|@BM
z&IT$K*zx#)`ffaB`n2}q_?slpc%ssJaK>fF)viqQo@hdSA|OJ(^41cw`29Os*(&<!
zN{Pc2D{G=CH;UbrDR(R7NNp8^523IX`cAf!&BR9y(?X0Hbx&>i%jHN1F^-U_IBsJ@
zX&klT+diXr?|W+(n&I+vP8XJR^pGi%KNxciQK+nKYnvUwD7T$MHf9x*x0TD;KKmW~
zgiLXL@2eL$;8f=_<V83r6qJ+~gH2Kw+1Etw%i;4pg~L+)n~bN>Up_KZC#FJUq0Zw8
z9nZ*ByJS<*!jTEa=`KuA8hPvHC%fI$*h<G1Bn0h?R1R}xR8W&SG(*?t_4f`sYN4?O
z<}#bv|Jl2HsnE`;Uy%Gx`7|-$<1<416AG2R4ZmHg|Mn6be(a9DO>68|Y|Z-mx8p|D
z;LZ||@1-N|zq(T+9@-j9)|$1?#(`qnHV-Pd@_IJG&l_jlRL+50HKCN2^7erYRcqU?
z0)P@=>5aoI)i?;K|DefdF=|G6d``8hnSHhoOs@QcnQG>cG3(Wx%7-H7*R{8kakZuW
ztu))*QrcCcGKI?&`Dn8koU;=TCwqZU-UcC%`uZB#2e)ugM#t^hzqenxWFwmGYBn-@
zK*e6ZuUUCUHM+G*n9?o!<)im#RYFD|qT2iHQeC9G@ZzqD`-yxhV}hD^!tC;DUI0<#
zBNV2qMt)hq)1_w5jLtfacKZcS;l0^js=aQJ7vfveCIyyasPb%pPFP$XHF|lGs0SZm
z2KMT-wU<#?5hMfc4ALDz3Xfb0Fxo2x_7OLz&mNT8ON2;9+E1hrsFSAyRLU|P@J_on
z*V$|!RWj(-@TzBzyxmy+OxiEih%9%QV#Qs2Bi$rUYE%oRv$ad-*G|^o)Gh}v;)h`3
zjHIL6F`VZz$%U9;G-|OM@P>f3;Sr1WHP!-{EU*Q84lR2R{tumW30S5N!Q_#T`~eks
zgM-h-ckCI$cm_;~+whF{UR|r41FQBG_!Q&rlJ%}dHe9);-7`LRvfVT7{<(JFw8Otn
zuR}YKmsNg`3^&;O=TTIlQ2PW)g-T_ZgcB4FRXUJ`2seOfCKwTZO|=n{x{qTXE$}JE
z4vL*{2N6u*lXBS4fRb`6DY5QWDi8oShKdGslR*eQ!AinRonix+nsf>9!tPKiMDWRU
zS*G2rM`3YeWKee~Q*q3|BukjG_wZ=Vhay!&F*t4z*P#*ZAW~^>HwK5|Dm73!LDPtu
z%rDsm;QknWf^wHmV60i(qlOP@M$62ut(3~i?xbv%I4)1yU1IL6y<R`I*A1{#`+@uj
z>&sgpR^Y-=@!GYq-eUB=WAEMjP9r_B+$ne?5ID-pD4r?`ga~>stha0(N4|?T6MEjV
z`p6XOYrZMY=tdmF%j5VdDw0;R*xsE>*)c=UF+fZ_q50%1%Bth7_ysCRaIdZx>M3PC
z{S?Uy|3rvFh{xzVE4-U=yqT(a085HXKvrDzVx%w0GmM>bwtOjP7f$V(17SWrbZ~-d
z!|A_JP}TvWz#$Wh74n8d3o!w?!?wHa;g5g^XJ*`fV~=gssE}jt+u~Cdzzv%RE$$vd
z$YW;^s2T22j)C^}*j2OK=Z)8Ja|(wTQA31-`N1FUaf3H2@1-yPiX?ds?rR^~U%BI8
z+#I(M_N;^XDmY|-jn*o0H%~zkwV6q+Woqq74IvSIv#S=LGi&}GYcAAlj@wlmee<hi
z3|GWI&~v3~AhaTU11uKSzl37ub~3evbgYJRY5XE;4MiTyY;%zEz4b{#8Il%ob&+)r
zB!^{L2o;*RA}`OAVq~0kR$Y%gWj3T3Qici3*HAEFxjhZblW@$8dmBT6DKBA|xJl%+
z4U2ct7U4UN#6SICc9N;IAIBF3UTi|>U=Tt4gh~&I{EWydk)IQxa2Z{h#BUSQ4Lo!f
zwPzCEsF}&QhFk5cg2H;8apO8_=_Xm~#t71>Gs4DJA`_*AAaR)OwSytz3RMZ8v_sd%
zdzYs9(+GJ2A|n4wgZBEtcg{MWk(fI{XSNRaPR4g@aTvi6Khce;&BPQoBuvQMFru4C
zmAk1#d^xhJ8aMs=+H;{W@6$0)>r>AdB&gN&1^OjzYay-&_ASswJHVA;(RQhYU1M{T
zmq0Z@>~Gn7KlpC_<351<cb#9g=wdQJG9Smf701Z6hPXe*x*f-_hDl!ch<RdlhREkc
zensSCkZAc*DPM?6Vx49lpV^cMj70;4KwMHSuv>G1-&zcA22S9ZzRrBKU`%XzsIpux
zlVb{UCE*ZR1JQONqlWRgtS~mEJzRIok$4}ykte$u?qRkpwe$SxGm{pYjL@uq5mP!?
Ut>y8EVBT=H05R9X4%mY8Uzp6SzW@LL

delta 1187
zcmZuv&5IOA6t9o|sOkA|oS2ETMiUm7*eoPB&4-APl?VY5l6YtgW$#q&Y@3;$Np(qP
zq=tp;9)ggNrLQ@RM{fcD1Fs%(F9gB=Ai3xz#8*AL8&MnjS5>dxd-dM${r*{f)M<U!
zY(@yaU;cV>>oVJKy^pW%uV3hXg@h;kJtTaIcWF`+<Qo(R`@es*|9Iv6$KWqff(1&5
zhy)RI4<~dPka!{x;T}qS!9)$#Kwt>1+iv-{*Db%sr@CDTH^d;03<ku80+NUK@EXRn
z{FAJoiaqNUa=4{Ja8-c=2Jir>sJuWY$6dvRGovuT0lxbI<nC=|d8Rq<6K6m8D6B5W
zm+%)C0Sp$nhXnb7s82OoU^vsBvw9W!6$U*t@b2R|(PTjlaXaJs56c(q`HuDUY$6l7
zIi6a7%12pl{n>JXQwl~nrBmpE<P+<sN~JUF>#^!@CxMoEF;+2EH7LfiEH8AL_oWS#
zEba`oa-A)q4!fko`9zKTvMBhW>pSnlMd9c`tbtc?gzv_#{r2mQ<+JGACo#Sj6HA9O
zuV{>M_3&%Y*pPGZ;C!|GtiHbTDuJ#{5B`e4-^z21GdsYbAiok|ConMjU7{}Q(2(0~
zQ4_%0E}qkz=}7py_-73E`rI3!xo^A;^!>?sV9=sr0^l?>{-SBZ`xv7iZ-53sIDe2i
zGw60p-2<<dVc^9KbIf1+HpWTor%Z1jKVqV=el|)6vb@y1xf2uBgF0>VkS9LO3LPI)
zC%~H2vVt$Ew}BqJn@U>N+scMAsSTv&n=_sX8*ZmXs<l$C>19R=x3+d=+LsfZjdQi)
zo{9NJwzV}->FD~*jkf&HV>@pS%LeIWB6Cs6H?A$GPBSr0n&KdyyArCGs?5oI2XmC-
zsL`qw<>sCobM576Ummqnq@xKW0O5S`igIk7bErGha>V%>L>6ZN5U7Qb4q%5c5>O8_
z*f(%QO5Bb;&`MaIZ?A1Mz*T+d;G~1I4*pkWwR7@RK7dnVB}Yc0I%_goS>vdD+CGg>
il>fF*f8IIteOnUCE{?|H&QM;$E|W{%hO!ux#pp}dULR}#

diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
index 71a06db..e912a6c 100644
--- a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
+++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
@@ -3,6 +3,8 @@
 import os
 from imageio import imread
 import numpy as np
+import scipy as sp
+from PIL import Image, ImageDraw
 
 
 class DiffgramPytorchDataset(Dataset):
@@ -15,17 +17,37 @@ def __init__(self, project, diffgram_file_id_list = None, transform = None):
         :param transform (callable, optional): Optional transforms to be applied on a sample
         """
         self.diffgram_file_id_list = diffgram_file_id_list
-        self.__validate_file_ids()
+
         self.project = project
         self.transform = transform
         self._internal_file_list = []
-
+        self.__validate_file_ids()
 
     def __validate_file_ids(self):
-        url = '/api/'
-        raise NotImplementedError
-
-    def __extract_bbox_values(self, instance_list):
+        result = self.project.file.file_list_exists(self.diffgram_file_id_list)
+        if not result:
+            raise Exception(
+                'Some file IDs do not belong to the project. Please provide only files from the same project.')
+
+    def __extract_masks_from_polygon(self, instance_list, diffgram_file, empty_value = 0):
+        nx, ny = diffgram_file.image['width'], diffgram_file.image['height']
+        mask_list = []
+        for instance in instance_list:
+            if instance['type'] != 'polygon':
+                continue
+            poly = [(p['x'], p['y']) for p in instance['points']]
+
+            img = Image.new(mode = 'L', size = (nx, ny), color = 0)  # mode L = 8-bit pixels, black and white
+            draw = ImageDraw.Draw(img)
+            print()
+            draw.polygon(poly, outline = 1, fill = 1)
+            mask = np.array(img).astype('float32')
+            # mask[np.where(mask == 0)] = empty_value
+            print('mask', len(mask))
+            mask_list.append(mask)
+        return mask_list
+
+    def __extract_bbox_values(self, instance_list, diffgram_file):
         """
             Creates a pytorch tensor based on the instance type.
             For now we are assuming shapes here, but we can extend it
@@ -41,10 +63,10 @@ def __extract_bbox_values(self, instance_list):
         for inst in instance_list:
             if inst['type'] != 'box':
                 continue
-            x_min_list.append(inst['x_min'])
-            x_max_list.append(inst['x_max'])
-            y_min_list.append(inst['y_min'])
-            y_max_list.append(inst['y_max'])
+            x_min_list.append(inst['x_min'] / diffgram_file.image['width'])
+            x_max_list.append(inst['x_max'] / diffgram_file.image['width'])
+            y_min_list.append(inst['y_min'] / diffgram_file.image['width'])
+            y_max_list.append(inst['y_max'] / diffgram_file.image['width'])
 
         return x_min_list, x_max_list, y_min_list, y_max_list
 
@@ -58,7 +80,7 @@ def __getitem__(self, idx):
         if torch.is_tensor(idx):
             idx = idx.tolist()
 
-        diffgram_file = self.project.file.get_by_id(idx, with_instances = True)
+        diffgram_file = self.project.file.get_by_id(self.diffgram_file_id_list[idx], with_instances = True)
         if hasattr(diffgram_file, 'image'):
             image = imread(diffgram_file.image.get('url_signed'))
         else:
@@ -68,17 +90,17 @@ def __getitem__(self, idx):
         instance_types_in_file = set([x['type'] for x in instance_list])
         # Process the instances of each file
         processed_instance_list = []
-
-        sample = {'image': image}
+        sample = {'image': image, 'diffgram_file': diffgram_file}
         if 'box' in instance_types_in_file:
-            x_min_list, x_max_list, y_min_list, y_max_list = self.__extract_bbox_values(instance_list)
+            x_min_list, x_max_list, y_min_list, y_max_list = self.__extract_bbox_values(instance_list, diffgram_file)
             sample['x_min_list'] = torch.Tensor(x_min_list)
             sample['x_max_list'] = torch.Tensor(x_max_list)
             sample['y_min_list'] = torch.Tensor(y_min_list)
             sample['y_max_list'] = torch.Tensor(y_max_list)
         if 'polygon' in instance_types_in_file:
-
+            mask_list = self.__extract_masks_from_polygon(instance_list, diffgram_file)
+            sample['polygon_mask_list'] = mask_list
         if self.transform:
             sample = self.transform(sample)
 
-        return sample
\ No newline at end of file
+        return sample

From c9e953cbbb556fb05d5dcaf88de5205ac0b8bc49 Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Mon, 9 Aug 2021 14:21:58 -0600
Subject: [PATCH 07/17] wip: adding dataset iterator class

---
 pytorch_test.py                               |  14 +-
 .../core/diffgram_dataset_iterator.py         | 129 ++++++++++++++++++
 sdk/diffgram/core/directory.py                |   1 +
 .../diffgram_pytorch_dataset.cpython-38.pyc   | Bin 4466 -> 4425 bytes
 .../diffgram_pytorch_dataset.py               |  94 ++++---------
 sdk/diffgram/tensorflow_diffgram/__init__.py  |   0
 .../diffgram_tensorflow_dataset.py            |  80 +++++++++++
 7 files changed, 243 insertions(+), 75 deletions(-)
 create mode 100644 sdk/diffgram/core/diffgram_dataset_iterator.py
 create mode 100644 sdk/diffgram/tensorflow_diffgram/__init__.py
 create mode 100644 sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py

diff --git a/pytorch_test.py b/pytorch_test.py
index d7c55e2..83fe139 100644
--- a/pytorch_test.py
+++ b/pytorch_test.py
@@ -23,11 +23,17 @@
 img = Image.new("L", [diffgram_dataset[0]['diffgram_file'].image['width'], diffgram_dataset[0]['diffgram_file'].image['height']], 0)
 mask1 = diffgram_dataset[0]['polygon_mask_list'][0]
 mask2 = diffgram_dataset[0]['polygon_mask_list'][1]
-print(mask1)
-for x in mask1:
-    print(x)
 plt.figure()
 plt.subplot(1,2,1)
 # plt.imshow(img, 'gray', interpolation='none')
 plt.imshow(mask1, 'jet', interpolation='none', alpha=0.7)
-plt.imshow(mask2, 'Oranges', interpolation='none', alpha=0.7)
\ No newline at end of file
+plt.imshow(mask2, 'Oranges', interpolation='none', alpha=0.7)
+plt.show()
+
+
+# Dataset Example
+
+dataset = project.directory.get('Default')
+
+sliced_dataset = dataset.slice(query = 'labels.sheep  > 0 or labels.sofa > 0')
+
diff --git a/sdk/diffgram/core/diffgram_dataset_iterator.py b/sdk/diffgram/core/diffgram_dataset_iterator.py
new file mode 100644
index 0000000..53e831f
--- /dev/null
+++ b/sdk/diffgram/core/diffgram_dataset_iterator.py
@@ -0,0 +1,129 @@
+from PIL import Image, ImageDraw
+from imageio import imread
+
+
+class DiffgramDatasetIterator:
+
+    def __init__(self, project, diffgram_file_id_list):
+        """
+
+        :param project (sdk.core.core.Project): A Project object from the Diffgram SDK
+        :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram.
+        """
+        self.diffgram_file_id_list = diffgram_file_id_list
+
+        self.project = project
+        self._internal_file_list = []
+        self.__validate_file_ids()
+        self.current_file_index = 0
+
+    def __iter__(self):
+        self.current_file_index = 0
+        return self
+
+    def __next__(self):
+        file_id = self.diffgram_file_id_list[self.current_file_index]
+        diffgram_file = self.project.file.get_by_id(file_id, with_instances = True)
+        instance_data = self.get_file_instances(diffgram_file)
+        self.current_file_index += 1
+        return instance_data
+
+    def __validate_file_ids(self):
+        result = self.project.file.file_list_exists(self.diffgram_file_id_list)
+        if not result:
+            raise Exception(
+                'Some file IDs do not belong to the project. Please provide only files from the same project.')
+
+    def get_image_data(self, diffgram_file):
+        if hasattr(diffgram_file, 'image'):
+            image = imread(diffgram_file.image.get('url_signed'))
+            return image
+        else:
+            raise Exception('Pytorch datasets only support images. Please provide only file_ids from images')
+
+    def get_file_instances(self, diffgram_file):
+        if diffgram_file['type'] not in ['image', 'frame']:
+            raise NotImplementedError('File type "{}" is not supported yet'.format(diffgram_file['type']))
+
+        image = self.get_image_data(diffgram_file)
+        instance_list = diffgram_file.instance_list
+        instance_types_in_file = set([x['type'] for x in instance_list])
+        # Process the instances of each file
+        sample = {'image': image, 'diffgram_file': diffgram_file}
+        has_boxes = False
+        has_poly = False
+        if 'box' in instance_types_in_file:
+            has_boxes = True
+            x_min_list, x_max_list, y_min_list, y_max_list = self.extract_bbox_values(instance_list, diffgram_file)
+            sample['x_min_list'] = x_min_list
+            sample['x_max_list'] = x_max_list
+            sample['y_min_list'] = y_min_list
+            sample['y_max_list'] = y_max_list
+
+        if 'polygon' in instance_types_in_file:
+            has_poly = True
+            mask_list = self.extract_masks_from_polygon(instance_list, diffgram_file)
+            sample['polygon_mask_list'] = mask_list
+
+        if len(instance_types_in_file) > 2 and has_boxes and has_boxes:
+            raise NotImplementedError(
+                'SDK only supports boxes and polygon types currently. If you want a new instance type to be supported please contact us!'
+            )
+
+        label_id_list, label_name_list = self.extract_labels(instance_list)
+        sample['label_id_list'] = label_id_list
+        sample['label_name_list'] = label_name_list
+
+        return sample
+
+    def extract_masks_from_polygon(self, instance_list, diffgram_file, empty_value = 0):
+        nx, ny = diffgram_file.image['width'], diffgram_file.image['height']
+        mask_list = []
+        for instance in instance_list:
+            if instance['type'] != 'polygon':
+                continue
+            poly = [(p['x'], p['y']) for p in instance['points']]
+
+            img = Image.new(mode = 'L', size = (nx, ny), color = 0)  # mode L = 8-bit pixels, black and white
+            draw = ImageDraw.Draw(img)
+            draw.polygon(poly, outline = 1, fill = 1)
+            mask = np.array(img).astype('float32')
+            # mask[np.where(mask == 0)] = empty_value
+            mask_list.append(mask)
+        return mask_list
+
+    def extract_labels(self, instance_list, allowed_instance_types = None):
+        label_file_id_list = []
+        label_names_list = []
+
+        for inst in instance_list:
+            if allowed_instance_types and inst['type'] in allowed_instance_types:
+                continue
+
+            label_file_id_list.append(inst['label_file']['id'])
+            label_names_list.append(inst['label_file']['label']['name'])
+
+        return label_file_id_list, label_names_list
+
+    def extract_bbox_values(self, instance_list, diffgram_file):
+        """
+            Creates a pytorch tensor based on the instance type.
+            For now we are assuming shapes here, but we can extend it
+            to accept custom shapes specified by the user.
+        :param instance:
+        :return:
+        """
+        x_min_list = []
+        x_max_list = []
+        y_min_list = []
+        y_max_list = []
+
+        for inst in instance_list:
+            if inst['type'] != 'box':
+                continue
+            x_min_list.append(inst['x_min'] / diffgram_file.image['width'])
+            x_max_list.append(inst['x_max'] / diffgram_file.image['width'])
+            y_min_list.append(inst['y_min'] / diffgram_file.image['width'])
+            y_max_list.append(inst['y_max'] / diffgram_file.image['width'])
+
+        return x_min_list, x_max_list, y_min_list, y_max_list
diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index 901bda4..e737e07 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -109,6 +109,7 @@ def slice(self, query):
 			file_view_mode = 'ids_only'
 		)
 		sliced_dataset = SlicedDirectory(
+			client = self.client,
 			query = query,
 			original_directory = self
 		)
diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc
index 7cde116072e495af96c8f60b8f6b8e7fede4b1a9..afdb50b38f9e7da81a9ef35d57cfffae642e6b76 100644
GIT binary patch
delta 417
zcmZ9GO-lk%6o%)F&WCfo;}!d)4?znW!-#TGi4+w?K~O~0DjP_OGO??hYFf4mAvpd3
zT}4ZA?GI=bwVYkh52#kP2)fs0=fK0^@V@7~Ux}v#-KEr0M4hch-CB#LHMqyDmVhE2
zXiGDmb*g#h5fL4(1wg6l4Z{mHp<y1;CfpEDZ-hM31W+zUJ$*-U?Do4h2C8ErGlmCv
zpSbL+rLwQNoV%C4?pCUo<pORQC%PXzcPnSuF=k)@KaAcW1E#jA$<+P{VdF^w$K+u-
zI}$0(Sb3Pnx^->)fnuS=O|gCi9>!KAhSS7^HJm5gki$DN1dI4UCh3NdR8#>4uBPxa
zIN1Is%K%Bt(hlt74_%APM?57lD={ZAFR>tz#k_sUq{a^%7pgoemkvJLx1{Uox*g0%
ya=rhZQkfRn;;@M?kvfrC_`1XrmRUyMlq1DA8p96j0fw(E1uOW=MqnI=qI-Y3<zbTm

delta 530
zcmZ9I&1(};6vgLG=1r#aGLuPcQ|ebR1|(@B($X%h#R~PKDlH-+WT6gZh_Ri?cr&F$
zM-hYI#?^J<G64~FrDW|^aM!KxUvOpr0^iHh_u$9j+;g~ae^rhu&OOJmHTfoIxc~FJ
z(`4|G)>s8z(F3+|En!0qwA6lLrdFHFpL>@9Ey<3obY={rnYJ^A7d&BYH?dPbgrP3e
z=7^_!44^$dotk&GT7GiKs&GRXq2J%F<pk@7ekVXCw~9;@9H1#x%h*K!K(H9QQGz8k
z)1z20AJHEO)9)vPI1rh=AfkHqF~hSmK_9Y9m6K9vPyz!i$bk+;eHLsm!2?fsrSoi=
z{u&GP+B`lvVf}{rMH$DcM9spS(hY?sar+@G(XRcRVO=`($F9N_dHl|)%PI`w*mf`3
z3gf*X45B1xUq$R8ZqQ4<2)8F+c?Y0MMdt^!=+L=dRvWOXu%d8LVNKzZ!aA+Gk35wT
znNE;kL75Hu<i6q4&a~a2)%^P0|4o&=KyUM9*rCt)_uegK+*G(C(K=7x+yx4}5^PhU
mFi+&oL6N?A_41}13%%EGWvBP=_Rvrci}u5KfamDQyZ;ZsdVYEU

diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
index e912a6c..14ccb95 100644
--- a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
+++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
@@ -1,13 +1,12 @@
-from torch.utils.data import Dataset, DataLoader
-import torch
 import os
-from imageio import imread
+
 import numpy as np
 import scipy as sp
-from PIL import Image, ImageDraw
+
+from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator
 
 
-class DiffgramPytorchDataset(Dataset):
+class DiffgramPytorchDataset(DiffgramDatasetIterator, Dataset):
 
     def __init__(self, project, diffgram_file_id_list = None, transform = None):
         """
@@ -16,60 +15,21 @@ def __init__(self, project, diffgram_file_id_list = None, transform = None):
         :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram.
         :param transform (callable, optional): Optional transforms to be applied on a sample
         """
+        super(DiffgramDatasetIterator, self).__init__(project, diffgram_file_id_list)
+        global torch, Dataset, DataLoader
+        try:
+            import torch as torch  # type: ignore
+            from torch.utils.data import Dataset, DataLoader
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                "'torch' module should be installed to convert the Dataset into pytorch format"
+            )
         self.diffgram_file_id_list = diffgram_file_id_list
 
         self.project = project
         self.transform = transform
-        self._internal_file_list = []
         self.__validate_file_ids()
 
-    def __validate_file_ids(self):
-        result = self.project.file.file_list_exists(self.diffgram_file_id_list)
-        if not result:
-            raise Exception(
-                'Some file IDs do not belong to the project. Please provide only files from the same project.')
-
-    def __extract_masks_from_polygon(self, instance_list, diffgram_file, empty_value = 0):
-        nx, ny = diffgram_file.image['width'], diffgram_file.image['height']
-        mask_list = []
-        for instance in instance_list:
-            if instance['type'] != 'polygon':
-                continue
-            poly = [(p['x'], p['y']) for p in instance['points']]
-
-            img = Image.new(mode = 'L', size = (nx, ny), color = 0)  # mode L = 8-bit pixels, black and white
-            draw = ImageDraw.Draw(img)
-            print()
-            draw.polygon(poly, outline = 1, fill = 1)
-            mask = np.array(img).astype('float32')
-            # mask[np.where(mask == 0)] = empty_value
-            print('mask', len(mask))
-            mask_list.append(mask)
-        return mask_list
-
-    def __extract_bbox_values(self, instance_list, diffgram_file):
-        """
-            Creates a pytorch tensor based on the instance type.
-            For now we are assuming shapes here, but we can extend it
-            to accept custom shapes specified by the user.
-        :param instance:
-        :return:
-        """
-        x_min_list = []
-        x_max_list = []
-        y_min_list = []
-        y_max_list = []
-
-        for inst in instance_list:
-            if inst['type'] != 'box':
-                continue
-            x_min_list.append(inst['x_min'] / diffgram_file.image['width'])
-            x_max_list.append(inst['x_max'] / diffgram_file.image['width'])
-            y_min_list.append(inst['y_min'] / diffgram_file.image['width'])
-            y_max_list.append(inst['y_max'] / diffgram_file.image['width'])
-
-        return x_min_list, x_max_list, y_min_list, y_max_list
-
     def __len__(self):
         return len(self.diffgram_file_id_list)
 
@@ -81,25 +41,17 @@ def __getitem__(self, idx):
             idx = idx.tolist()
 
         diffgram_file = self.project.file.get_by_id(self.diffgram_file_id_list[idx], with_instances = True)
-        if hasattr(diffgram_file, 'image'):
-            image = imread(diffgram_file.image.get('url_signed'))
-        else:
-            raise Exception('Pytorch datasets only support images. Please provide only file_ids from images')
 
-        instance_list = diffgram_file.instance_list
-        instance_types_in_file = set([x['type'] for x in instance_list])
-        # Process the instances of each file
-        processed_instance_list = []
-        sample = {'image': image, 'diffgram_file': diffgram_file}
-        if 'box' in instance_types_in_file:
-            x_min_list, x_max_list, y_min_list, y_max_list = self.__extract_bbox_values(instance_list, diffgram_file)
-            sample['x_min_list'] = torch.Tensor(x_min_list)
-            sample['x_max_list'] = torch.Tensor(x_max_list)
-            sample['y_min_list'] = torch.Tensor(y_min_list)
-            sample['y_max_list'] = torch.Tensor(y_max_list)
-        if 'polygon' in instance_types_in_file:
-            mask_list = self.__extract_masks_from_polygon(instance_list, diffgram_file)
-            sample['polygon_mask_list'] = mask_list
+        sample = self.get_file_instances(diffgram_file)
+        if 'x_min_list' in sample:
+            sample['x_min_list'] = torch.Tensor(sample['x_min_list'])
+        if 'x_max_list' in sample:
+            sample['x_max_list'] = torch.Tensor(sample['x_max_list'])
+        if 'y_min_list' in sample:
+            sample['y_min_list'] = torch.Tensor(sample['y_min_list'])
+        if 'y_max_list' in sample:
+            sample['y_max_list'] = torch.Tensor(sample['y_max_list'])
+
         if self.transform:
             sample = self.transform(sample)
 
diff --git a/sdk/diffgram/tensorflow_diffgram/__init__.py b/sdk/diffgram/tensorflow_diffgram/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py
new file mode 100644
index 0000000..36a9dc4
--- /dev/null
+++ b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py
@@ -0,0 +1,80 @@
+from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator
+import os
+
+
+class DiffgramTensorflowDataset(DiffgramDatasetIterator):
+
+    def __init__(self, project, diffgram_file_id_list = None):
+        """
+
+        :param project (sdk.core.core.Project): A Project object from the Diffgram SDK
+        :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram.
+        :param transform (callable, optional): Optional transforms to be applied on a sample
+        """
+        super(DiffgramDatasetIterator, self).__init__(project, diffgram_file_id_list)
+        global tf
+        try:
+            import tensorflow as tf  # type: ignore
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                "'tensorflow' module should be installed to convert the Dataset into tensorflow format"
+            )
+        self.diffgram_file_id_list = diffgram_file_id_list
+
+        self.project = project
+        self.__validate_file_ids()
+
+    def int64_feature(self, value):
+        return tf.train.Feature(int64_list = tf.train.Int64List(value = [value]))
+
+    def int64_list_feature(self, value):
+        return tf.train.Feature(int64_list = tf.train.Int64List(value = value))
+
+    def bytes_feature(self, value):
+        return tf.train.Feature(bytes_list = tf.train.BytesList(value = [value]))
+
+    def bytes_list_feature(self, value):
+        return tf.train.Feature(bytes_list = tf.train.BytesList(value = value))
+
+    def float_feature(self, value):
+        return tf.train.Feature(float_list = tf.train.FloatList(value = [value]))
+
+    def float_list_feature(self, value):
+        return tf.train.Feature(float_list = tf.train.FloatList(value = value))
+
+    def __validate_file_ids(self):
+        result = self.project.file.file_list_exists(self.diffgram_file_id_list)
+        if not result:
+            raise Exception(
+                'Some file IDs do not belong to the project. Please provide only files from the same project.')
+
+    def __iter__(self):
+        self.current_file_index = 0
+        return self
+
+    def __next__(self):
+        file_id = self.diffgram_file_id_list[self.current_file_index]
+        diffgram_file = self.project.file.get_by_id(file_id, with_instances = True)
+        instance_data = self.get_file_instances(diffgram_file)
+        filename, file_extension = os.path.splitext(instance_data['diffgram_file']['image']['original_filename'])
+        print('instance_data', instance_data)
+        tf_example_dict = {
+            'image/height': self.int64_feature(instance_data['diffgram_file']['height']),
+            'image/width': self.int64_feature(instance_data['diffgram_file']['width']),
+            'image/filename': self.bytes_feature(filename),
+            'image/source_id': self.bytes_feature(filename),
+            'image/encoded': self.bytes_feature(instance_data['image']),
+            'image/format': self.bytes_feature(file_extension),
+            'image/object/bbox/xmin': self.float_list_feature(instance_data['x_min_list']),
+            'image/object/bbox/xmax': self.float_list_feature(instance_data['x_max_list']),
+            'image/object/bbox/ymin': self.float_list_feature(instance_data['y_min_list']),
+            'image/object/bbox/ymax': self.float_list_feature(instance_data['y_max_list']),
+            'image/object/class/text': self.bytes_list_feature(instance_data['label_name_list']),
+            'image/object/class/label': self.int64_list_feature(instance_data['label_id_list']),
+        }
+        tf_example = tf.train.Example(features = tf.train.Features(feature = tf_example_dict))
+        self.current_file_index += 1
+        return tf_example
+
+    def get_dataset_obj(self):
+        return tf.data.Dataset.from_generator(self.__iter__)

From 8b907cd6efa6fe9acffe2f5d7bdd9d4a4903f6b7 Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Mon, 9 Aug 2021 14:27:41 -0600
Subject: [PATCH 08/17] fix: remove file

---
 sdk/add_file_id_to_json.py                    | 46 -------------------
 .../tensorflow_diffgram/pytorch_test.py       |  0
 2 files changed, 46 deletions(-)
 delete mode 100644 sdk/add_file_id_to_json.py
 rename pytorch_test.py => sdk/diffgram/tensorflow_diffgram/pytorch_test.py (100%)

diff --git a/sdk/add_file_id_to_json.py b/sdk/add_file_id_to_json.py
deleted file mode 100644
index bfcbfbf..0000000
--- a/sdk/add_file_id_to_json.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from diffgram.core.core import Project
-import json
-
-project = Project(project_string_id = "coco-dataset",
-                  debug = True,
-                  client_id = "LIVE__rj6whqkwxkups7oczqis",
-                  client_secret = "fr5vy64v2096qad9av0dgw3fr0kjavt4c156soiwx51ntyv9qswpuxkhg0lf")
-
-
-def find_file(file_list, name):
-    for f in file_list:
-        if f.original_filename == name:
-            return f
-    return None
-
-
-with open('/home/pablo/Downloads/coco2017.json') as json_file:
-    data = json.load(json_file)
-
-    dataset_default = project.directory.get(name = "Default")
-
-    page_num = 1
-    all_files = []
-    print('start')
-    while page_num != None:
-        print('Current page', page_num)
-        diffgram_files = dataset_default.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base')
-        page_num = dataset_default.file_list_metadata['next_page']
-        print('{} of {}'.format(page_num, dataset_default.file_list_metadata['total_pages']))
-        all_files = all_files + diffgram_files
-
-    print('')
-    print('Files fetched: ', len(all_files))
-    result = []
-    for elm in data:
-        file = find_file(all_files, name = elm['image_name'])
-        if file:
-            print('Adding file ID {} to {}'.format(file.id, elm['image_name']))
-            elm['file_id'] = file.id
-            result.append(elm)
-        else:
-            print(elm['image_name'], 'not found.')
-
-    s = json.dumps(result).
-    f = open('/home/pablo/Downloads/coco2017_with_ids.json', 'w')
-    f.write(s)
diff --git a/pytorch_test.py b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py
similarity index 100%
rename from pytorch_test.py
rename to sdk/diffgram/tensorflow_diffgram/pytorch_test.py

From 060f2eb91f6d25303b71bffdc756788c606f137a Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Mon, 9 Aug 2021 14:30:41 -0600
Subject: [PATCH 09/17] ignore pyc

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 3b05b95..0a12786 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 .vs/PythonSettings.json
 .vs/VSWorkspaceState.json
 
+*.pyc
 .idea/
 
 sdk/diffgram/__pycache__/

From 4ece9b6e352aa90af6b106eed3d2f3339ea55fbf Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Mon, 9 Aug 2021 14:31:43 -0600
Subject: [PATCH 10/17] fix: remove pyc

---
 .../__pycache__/__init__.cpython-38.pyc          | Bin 172 -> 0 bytes
 .../diffgram_pytorch_dataset.cpython-38.pyc      | Bin 4425 -> 0 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc
 delete mode 100644 sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc

diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index 956dd7bfa87685db1edb53c6b09a34f90b73a6a5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 172
zcmWIL<>g`kg3DSQi6Hthh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o10SKO;XkRlguH
zDJNgwr6|83DZfHLB{MB8y(lqPzo4=tBR@~KI3-&jgptMbi;^?q;R5mTnR%Hd@$q^E
VmA5!-a`RJ4b5iX<R(%Fy1^|w^E6V@?

diff --git a/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc b/sdk/diffgram/pytorch_diffgram/__pycache__/diffgram_pytorch_dataset.cpython-38.pyc
deleted file mode 100644
index afdb50b38f9e7da81a9ef35d57cfffae642e6b76..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4425
zcmbVPTW{OQ73Po>MahzHxq3H9iC(r*khOybDB9FbQ^dPz+SCn@El?1<U}+6y)1gRZ
zX0)~p>ZQqRfEMUuAMD5clm3QzZD01i1&Y43-x*5sy#Y#M4(HA@=X~cfziqdh7M@od
zOYyr!%laENPChOgk5T0BLAb?PX7yRf`gUkDYTKC;I_B$!F1}7y>wBR$8LNlB8Eb@1
zeBErW-wIp(cGzatQ;XNQ_txTG!rnOHJhxw3o%%QUi`B7peIr(}Oq6a?<#`eFL~M4L
z_R_vcVvZTLXZ?6P(GBz25b<7@mY#eZG#;bKUw|Z5$hZ~Se2LrKdCS1G%WH2fFuly{
z+(%E1H+U009-reaeCxc;=kfLV0$;?p!5LV*qOWhHz23Hn`!7bS5Z#>#tD9Yt)%2md
zjq(IVeix*)ODnQU2FgkY)ZVs0Q9E>xEXa#Cx8s}4vYO3c@;n^G_)jnp#ctA7!5zt8
zt#%8MROCg~-FX=NJebsjV$00ziJ~8<og|p@8ob>2)!A+N<Q!2i&5|ffCH_E^Hq3)q
zY^6%XVie@V{#GJ_q8HG_;MvCaWng|V-F)@z&RCbrULpFyoo<|E@m7}H3yOhCi#*P-
z{ck4KU^Y<(stC4{ARY{|G~q!31_BxP2U#-Pe6v&2*G@B?a>KW-Peh~}vmL`LQJSkn
z;E%@#)OVvP)2Fo;MPJ8R%43yOlT$7`u6AXT^+W^e6Fw2LmG_ot<MZ!$ZKvobYXcmv
zSlbZAV5`_)n{u}{7^$5i{}c*aqjIvJFcZx>rj-aQs*wu$tAmmDBOD=BQPjc@r7_gH
zXM2p^gKuqI2*cIsnl6wudkBhT4+f3_Dy6k+?Xp9Fa@RRxV^%VGOSzoAVSj*~5EM5L
zzI=%TPIWE=UIcleprowGw@F=uuZi4OgBMu>gQfP@DNmrkY-E;Bph9z@&f^80nvtt^
z2~*+X41!Uz4+u&lZ{7H8znd7WbZkLF(4I)-FjGba6`4mjbbVfbA7P*p8e3#8vzh&$
zgS(ds?VS1r$?ud+69XQK5#k?FDeYa@?Mn5vS77jCcU;@H#$L%btgo_hy{vO*1)lGf
zBksSxU3on8)mN+y>kS+GitXCmFWs`%v&sFubH)wj9J*x#N@<qO-CF6x9l;^JbC{*P
z!#a%|`n-+txzamqlygUnS+8%G9+Wn}scU;FS3BC<Nz&~drCl`|BruO68*LZ)IZV`W
zikB$z76>_{udWgnxP?P8I$F>Ell|HyJZQGB*~rKo4SV&zW`i?o(9L!7AKjuqc=QSF
zN?y=|SPq_ED)(d`7TZ-(KbEg#L~bRTFuS^*74RPN5h~MNLvK*Ps-<Snjea_gcKZcS
zV5#XrqHEnEE5tXXISL}hP-SV3L7*!O>%FXq)q@{&n!1)!@DOx*?c~XxAhm`rg%NR`
zI_-R*Yq1dVNPDp~GW6h@lAL#16%gBLE>)a&E61tZGdnhTo=f@zH6lz76KuARGSUq?
zW96^>>1yrLX|<E~x3$ahf1C%i&ImTV6~Qhp6I1|><xLQaT?f#8)&kg<?W?Q_;4j)O
zcJSc;5Pz5a#q=TXH-rA4(E!LD6c^<f@WKEdsNpsM|H<pCrE_RiCIC}m++DFgwg|4J
zYx--(bWZlyOuu)o-!uI%o74NyAINx0uSXd55B@m|78F6BAXiX{4B}woTmzNlvJk-*
zT$PD2<d&wY2uRJxp^P^89CPzxFW5t15-3s*`zf5C+=&O+cP9~uYg<D_6S{F8Ku3T`
zkg8K`z*WXw@?$}FC=~+pWVsw9-Lyy1Zfj(m=};!(n1M;~pR)IGw&z2UsG-P@JH$0;
zL^}vTy0#yILve*Vs2-zhL|x_+@57JY!I=0CNXIu&79HxiODmdYbp=tX$NQ7I+2Xi9
z?RSZ}vsk@$jMXi$R2hN1kNqo<`Yu3<*RFtilhON;eQ@uCAU(0&DNjcBK1;SJOe%_R
zh-ELW_iPiMz{!%_M3nceIx<D@if;-Zx*kQC5k*hYkhGJ<4sKt{j+uIn0b=3_ttVSi
zRux9YPtic$dS$av&nTJcC&)?oXF?Q0{0w7fg?A&0wi6ZQKvFb-kBW+3gsdd~CG9-n
zZ1qykE?n9*%dnmvIyiY{!|A_JQPv?G{t*+)CDMT-3+^8&!LEzQW9=f2!I&Ah-`QhZ
z)k~z#Z*1|o@?nOJ!zOo+Amp)=`)ZEYD0M)8YwW5v_jvs+%$%YeW>hZ1!FvBM_PEX)
zW$l$G{)*gp9_DKw*<ZS2f7}>1$8+Fkq4bX!+(vVqxLcrzht}Mr)ikZvq=o2*vG%&f
z=gppf$DWJTp5s>8!q~z(fniDf10zdi9dQ#;8O~yH^9v|uelJx!$g(P5E{$D;&7sI5
znQrF^KiHh)i2-Q=HxgN;HgY%^6hZ|ikjTr^p9qO$l`K~?Pr-&1L&`7__$n$U0=K3S
zcoKP;F>eDDSnmp^i5o;tV_1BQzTcr72i+h3FPx+*=||B;F%~}|?_fNFctEuu5&1EZ
zheUotgu-OFGzrxvdK>r9S=Kd^$A*nmMwPqOo+>DUSBW%knU-!4QnyBsPL=4@cVZbU
zB?O7X{9ZelB9^F0{z*G@8@zXE_CAe{CtO76ePO)4e(=Fn=aUbU>UY{ztannrUx~vA
zhWH7uPcaiw*pPsbxj2M3kO6lSiTE-@RpmFm>NazsFh8YZp4O+HGoGMQ)2}fu?}1o|
z>ArmvblLWC6Iixg>S0&e{NyF4>f?UUv=6@hVf`b`$F;gjrJ8i#$dRK*k#0s2(yAe@
zg^_MW(d%KHRU=}7Se+w6E=fEi@)#ssy;RB<dL=<;I&o%mdUD314nj^`Qp>lS^S;+y
z^l$i%@0hX9LbzxkwmMX4CRfQY1(}j?h>&N~b|J&MvADD_n9?=eVh1DfJ&cAm*-df1
nvgJTKFP=R&d7w!F&HfiLrTz6v9*@Y)8_pIX=1SN;TXg;l+qk2n


From 15241d42012a4d6a4101fff90888f06dd4d029aa Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Mon, 9 Aug 2021 16:24:22 -0600
Subject: [PATCH 11/17] wip: added sliced dataset to pytorch functionality

---
 .../core/diffgram_dataset_iterator.py         |  4 +--
 sdk/diffgram/core/directory.py                | 34 +++++++++++--------
 sdk/diffgram/core/sliced_directory.py         |  6 ++--
 .../diffgram_pytorch_dataset.py               | 19 +++--------
 .../tensorflow_diffgram/pytorch_test.py       | 26 ++++++++------
 5 files changed, 44 insertions(+), 45 deletions(-)

diff --git a/sdk/diffgram/core/diffgram_dataset_iterator.py b/sdk/diffgram/core/diffgram_dataset_iterator.py
index 53e831f..15dc9a9 100644
--- a/sdk/diffgram/core/diffgram_dataset_iterator.py
+++ b/sdk/diffgram/core/diffgram_dataset_iterator.py
@@ -1,6 +1,6 @@
 from PIL import Image, ImageDraw
 from imageio import imread
-
+import numpy as np
 
 class DiffgramDatasetIterator:
 
@@ -42,7 +42,7 @@ def get_image_data(self, diffgram_file):
             raise Exception('Pytorch datasets only support images. Please provide only file_ids from images')
 
     def get_file_instances(self, diffgram_file):
-        if diffgram_file['type'] not in ['image', 'frame']:
+        if diffgram_file.type not in ['image', 'frame']:
             raise NotImplementedError('File type "{}" is not supported yet'.format(diffgram_file['type']))
 
         image = self.get_image_data(diffgram_file)
diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index e737e07..14c2d80 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -96,17 +96,19 @@ def all_file_ids(self):
 		page_num = 1
 		result = []
 		while page_num is not None:
-			diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only')
+			diffgram_ids = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only')
 			page_num = self.file_list_metadata['next_page']
-			result = result + diffgram_files
+			result = result + diffgram_ids
 		return result
 
 	def slice(self, query):
 		from diffgram.core.sliced_directory import SlicedDirectory
-		result = self.list_files(
+		# Get the first page to validate syntax.
+		self.list_files(
 			limit = 25,
 			page_num = 1,
-			file_view_mode = 'ids_only'
+			file_view_mode = 'ids_only',
+			query = query,
 		)
 		sliced_dataset = SlicedDirectory(
 			client = self.client,
@@ -120,7 +122,6 @@ def to_pytorch(self, transform = None):
 			Transforms the file list inside the dataset into a pytorch dataset.
 		:return:
 		"""
-		from diffgram.core.sliced_directory import SlicedDirectory
 		file_id_list = self.all_file_ids()
 		pytorch_dataset = DiffgramPytorchDataset(
 			project = self.client,
@@ -211,7 +212,6 @@ def list_files(
 		else:
 			logging.info("Using Default Dataset ID " + str(self.client.directory_id))
 			directory_id = self.client.directory_id
-		#print("directory_id", directory_id)
 
 		metadata = {'metadata' :
 			{
@@ -222,7 +222,8 @@ def list_files(
 				'media_type': "All",
 				'page': page_num,
 				'file_view_mode': file_view_mode,
-				'search_term': search_term
+				'search_term': search_term,
+				'query': query
 			}
 		}
 
@@ -245,14 +246,17 @@ def list_files(
 		self.file_list_metadata = data.get('metadata')
 		# TODO would like this to perhaps be a seperate function
 		# ie part of File_Constructor perhaps
-		file_list = []
-		for file_json in file_list_json:
-			file = File.new(
-				client = self.client,
-				file_json = file_json)
-			file_list.append(file)
-
-		return file_list
+		if file_view_mode == 'ids_only':
+			return file_list_json
+		else:
+			file_list = []
+			for file_json in file_list_json:
+				file = File.new(
+					client = self.client,
+					file_json = file_json)
+				file_list.append(file)
+
+			return file_list
 
 
 	def get(self, 
diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py
index 187b0bb..cb84d3c 100644
--- a/sdk/diffgram/core/sliced_directory.py
+++ b/sdk/diffgram/core/sliced_directory.py
@@ -1,17 +1,21 @@
 from diffgram.core.directory import Directory
 from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
 
+
 class SlicedDirectory(Directory):
 
     def __init__(self, client, original_directory: Directory, query: str):
         self.original_directory = original_directory
         self.query = query
         self.client = client
+        # Share the same ID from the original directory as this is just an in-memory construct for better semantics.
+        self.id = original_directory.id
 
     def all_file_ids(self):
         page_num = 1
         result = []
         while page_num is not None:
+            print('slcied query', self.query)
             diffgram_files = self.list_files(limit = 1000,
                                              page_num = page_num,
                                              file_view_mode = 'ids_only',
@@ -20,7 +24,6 @@ def all_file_ids(self):
             result = result + diffgram_files
         return result
 
-
     def to_pytorch(self, transform = None):
         """
             Transforms the file list inside the dataset into a pytorch dataset.
@@ -34,4 +37,3 @@ def to_pytorch(self, transform = None):
 
         )
         return pytorch_dataset
-
diff --git a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
index 14ccb95..4239f51 100644
--- a/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
+++ b/sdk/diffgram/pytorch_diffgram/diffgram_pytorch_dataset.py
@@ -1,8 +1,5 @@
-import os
-
-import numpy as np
-import scipy as sp
-
+from torch.utils.data import Dataset, DataLoader
+import torch as torch  # type: ignore
 from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator
 
 
@@ -15,20 +12,12 @@ def __init__(self, project, diffgram_file_id_list = None, transform = None):
         :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram.
         :param transform (callable, optional): Optional transforms to be applied on a sample
         """
-        super(DiffgramDatasetIterator, self).__init__(project, diffgram_file_id_list)
-        global torch, Dataset, DataLoader
-        try:
-            import torch as torch  # type: ignore
-            from torch.utils.data import Dataset, DataLoader
-        except ModuleNotFoundError:
-            raise ModuleNotFoundError(
-                "'torch' module should be installed to convert the Dataset into pytorch format"
-            )
+        super(DiffgramPytorchDataset, self).__init__(project, diffgram_file_id_list)
+
         self.diffgram_file_id_list = diffgram_file_id_list
 
         self.project = project
         self.transform = transform
-        self.__validate_file_ids()
 
     def __len__(self):
         return len(self.diffgram_file_id_list)
diff --git a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py
index 83fe139..616ccad 100644
--- a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py
+++ b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py
@@ -18,22 +18,26 @@
 
 
 # Draw
-import matplotlib.pyplot as plt
-from PIL import Image, ImageDraw
-img = Image.new("L", [diffgram_dataset[0]['diffgram_file'].image['width'], diffgram_dataset[0]['diffgram_file'].image['height']], 0)
-mask1 = diffgram_dataset[0]['polygon_mask_list'][0]
-mask2 = diffgram_dataset[0]['polygon_mask_list'][1]
-plt.figure()
-plt.subplot(1,2,1)
-# plt.imshow(img, 'gray', interpolation='none')
-plt.imshow(mask1, 'jet', interpolation='none', alpha=0.7)
-plt.imshow(mask2, 'Oranges', interpolation='none', alpha=0.7)
-plt.show()
+def display_masks():
+    import matplotlib.pyplot as plt
+    from PIL import Image, ImageDraw
+    img = Image.new("L", [diffgram_dataset[0]['diffgram_file'].image['width'],
+                          diffgram_dataset[0]['diffgram_file'].image['height']], 0)
+    mask1 = diffgram_dataset[0]['polygon_mask_list'][0]
+    mask2 = diffgram_dataset[0]['polygon_mask_list'][1]
+    plt.figure()
+    plt.subplot(1, 2, 1)
+    # plt.imshow(img, 'gray', interpolation='none')
+    plt.imshow(mask1, 'jet', interpolation = 'none', alpha = 0.7)
+    plt.imshow(mask2, 'Oranges', interpolation = 'none', alpha = 0.7)
+    plt.show()
 
 
 # Dataset Example
 
 dataset = project.directory.get('Default')
 
+pytorch_dataset = dataset.to_pytorch()
+
 sliced_dataset = dataset.slice(query = 'labels.sheep  > 0 or labels.sofa > 0')
 

From 04edb30d0ffbd7ad898820dc6406916890a44fa3 Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Tue, 24 Aug 2021 08:18:54 -0600
Subject: [PATCH 12/17] wip: pytorch tensorflow

---
 sdk/diffgram/core/directory.py                | 10 +++++
 sdk/diffgram/core/sliced_directory.py         | 11 ++++-
 .../diffgram_tensorflow_dataset.py            | 44 ++++++++++---------
 .../tensorflow_diffgram/pytorch_test.py       |  2 +
 4 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index 14c2d80..7478644 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -2,6 +2,7 @@
 from ..regular.regular import refresh_from_dict
 import logging
 from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
+from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
 
 def get_directory_list(self):
 	"""
@@ -131,6 +132,15 @@ def to_pytorch(self, transform = None):
 		)
 		return pytorch_dataset
 
+	def to_tensorflow(self):
+		file_id_list = self.all_file_ids()
+		diffgram_tensorflow_dataset = DiffgramTensorflowDataset(
+			project = self.client,
+			diffgram_file_id_list = file_id_list
+		)
+		tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj()
+		return tf_dataset
+
 	def new(self, name: str):
 		"""
 		Create a new directory and update directory list.
diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py
index cb84d3c..be36c27 100644
--- a/sdk/diffgram/core/sliced_directory.py
+++ b/sdk/diffgram/core/sliced_directory.py
@@ -1,5 +1,6 @@
 from diffgram.core.directory import Directory
 from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
+from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
 
 
 class SlicedDirectory(Directory):
@@ -15,7 +16,6 @@ def all_file_ids(self):
         page_num = 1
         result = []
         while page_num is not None:
-            print('slcied query', self.query)
             diffgram_files = self.list_files(limit = 1000,
                                              page_num = page_num,
                                              file_view_mode = 'ids_only',
@@ -37,3 +37,12 @@ def to_pytorch(self, transform = None):
 
         )
         return pytorch_dataset
+
+    def to_tensorflow(self):
+        file_id_list = self.all_file_ids()
+        diffgram_tensorflow_dataset = DiffgramTensorflowDataset(
+            project = self.client,
+            diffgram_file_id_list = file_id_list
+        )
+        tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj()
+        return tf_dataset
diff --git a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py
index 36a9dc4..3109d6c 100644
--- a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py
+++ b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py
@@ -1,24 +1,23 @@
 from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator
 import os
-
+try:
+    import tensorflow as tf  # type: ignore
+except ModuleNotFoundError:
+    raise ModuleNotFoundError(
+        "'tensorflow' module should be installed to convert the Dataset into tensorflow format"
+    )
 
 class DiffgramTensorflowDataset(DiffgramDatasetIterator):
 
-    def __init__(self, project, diffgram_file_id_list = None):
+    def __init__(self, project, diffgram_file_id_list):
         """
 
         :param project (sdk.core.core.Project): A Project object from the Diffgram SDK
         :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram.
         :param transform (callable, optional): Optional transforms to be applied on a sample
         """
-        super(DiffgramDatasetIterator, self).__init__(project, diffgram_file_id_list)
-        global tf
-        try:
-            import tensorflow as tf  # type: ignore
-        except ModuleNotFoundError:
-            raise ModuleNotFoundError(
-                "'tensorflow' module should be installed to convert the Dataset into tensorflow format"
-            )
+        super(DiffgramTensorflowDataset, self).__init__(project, diffgram_file_id_list)
+
         self.diffgram_file_id_list = diffgram_file_id_list
 
         self.project = project
@@ -52,24 +51,29 @@ def __iter__(self):
         self.current_file_index = 0
         return self
 
+    def get_next_elm(self):
+        yield self.__next__()
+
     def __next__(self):
         file_id = self.diffgram_file_id_list[self.current_file_index]
         diffgram_file = self.project.file.get_by_id(file_id, with_instances = True)
+        print('AAA', diffgram_file.id)
+        image = self.get_image_data(diffgram_file)
         instance_data = self.get_file_instances(diffgram_file)
-        filename, file_extension = os.path.splitext(instance_data['diffgram_file']['image']['original_filename'])
-        print('instance_data', instance_data)
+        filename, file_extension = os.path.splitext(instance_data['diffgram_file'].image['original_filename'])
+        label_names_bytes = [x.encode() for x in instance_data['label_name_list']]
         tf_example_dict = {
-            'image/height': self.int64_feature(instance_data['diffgram_file']['height']),
-            'image/width': self.int64_feature(instance_data['diffgram_file']['width']),
-            'image/filename': self.bytes_feature(filename),
-            'image/source_id': self.bytes_feature(filename),
-            'image/encoded': self.bytes_feature(instance_data['image']),
-            'image/format': self.bytes_feature(file_extension),
+            'image/height': self.int64_feature(instance_data['diffgram_file'].image['height']),
+            'image/width': self.int64_feature(instance_data['diffgram_file'].image['width']),
+            'image/filename': self.bytes_feature(filename.encode()),
+            'image/source_id': self.bytes_feature(filename.encode()),
+            'image/encoded': self.bytes_feature(image.tobytes()),
+            'image/format': self.bytes_feature(file_extension.encode()),
             'image/object/bbox/xmin': self.float_list_feature(instance_data['x_min_list']),
             'image/object/bbox/xmax': self.float_list_feature(instance_data['x_max_list']),
             'image/object/bbox/ymin': self.float_list_feature(instance_data['y_min_list']),
             'image/object/bbox/ymax': self.float_list_feature(instance_data['y_max_list']),
-            'image/object/class/text': self.bytes_list_feature(instance_data['label_name_list']),
+            'image/object/class/text': self.bytes_list_feature(label_names_bytes),
             'image/object/class/label': self.int64_list_feature(instance_data['label_id_list']),
         }
         tf_example = tf.train.Example(features = tf.train.Features(feature = tf_example_dict))
@@ -77,4 +81,4 @@ def __next__(self):
         return tf_example
 
     def get_dataset_obj(self):
-        return tf.data.Dataset.from_generator(self.__iter__)
+        return tf.data.Dataset.from_generator(self.get_next_elm, output_signature = tf.TensorSpec(shape=(1,)))
diff --git a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py
index 616ccad..ab5987b 100644
--- a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py
+++ b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py
@@ -38,6 +38,8 @@ def display_masks():
 dataset = project.directory.get('Default')
 
 pytorch_dataset = dataset.to_pytorch()
+tf_dataset = dataset.to_tensorflow()
+
 
 sliced_dataset = dataset.slice(query = 'labels.sheep  > 0 or labels.sofa > 0')
 

From 4d61f77b67e06760805dfa0fe383a4c94c2ecea6 Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Tue, 24 Aug 2021 15:12:01 -0600
Subject: [PATCH 13/17] wip: implemented len and getitem on iterators

---
 sdk/diffgram/core/diffgram_dataset_iterator.py |  8 ++++++++
 sdk/diffgram/core/directory.py                 | 13 ++++++++++---
 sdk/diffgram/core/sliced_directory.py          |  6 ++++--
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/sdk/diffgram/core/diffgram_dataset_iterator.py b/sdk/diffgram/core/diffgram_dataset_iterator.py
index 15dc9a9..81fd7af 100644
--- a/sdk/diffgram/core/diffgram_dataset_iterator.py
+++ b/sdk/diffgram/core/diffgram_dataset_iterator.py
@@ -21,6 +21,14 @@ def __iter__(self):
         self.current_file_index = 0
         return self
 
+    def __len__(self):
+        return len(self.diffgram_file_id_list)
+
+    def __getitem__(self, idx):
+        diffgram_file = self.project.file.get_by_id(self.diffgram_file_id_list[idx], with_instances = True)
+        instance_data = self.get_file_instances(diffgram_file)
+        return instance_data
+
     def __next__(self):
         file_id = self.diffgram_file_id_list[self.current_file_index]
         diffgram_file = self.project.file.get_by_id(file_id, with_instances = True)
diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index 7478644..4ddc57f 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -3,6 +3,8 @@
 import logging
 from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
 from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
+from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator
+
 
 def get_directory_list(self):
 	"""
@@ -70,14 +72,19 @@ def set_directory_by_name(self, name):
 				  str(names_attempted))
 
 
-class Directory():
+class Directory(DiffgramDatasetIterator):
 
-	def __init__(self,
-			     client):
+	def __init__(self, client, file_id_list_sliced = None):
 
 		self.client = client
 		self.id = None
 		self.file_list_metadata = {}
+		
+		if file_id_list_sliced is None:
+			self.file_id_list = self.all_file_ids()
+		else:
+			self.file_id_list = file_id_list_sliced
+		super(Directory, self).__init__(self.client, self.file_id_list)
 
 	def all_files(self):
 		"""
diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py
index be36c27..514a204 100644
--- a/sdk/diffgram/core/sliced_directory.py
+++ b/sdk/diffgram/core/sliced_directory.py
@@ -11,6 +11,8 @@ def __init__(self, client, original_directory: Directory, query: str):
         self.client = client
         # Share the same ID from the original directory as this is just an in-memory construct for better semantics.
         self.id = original_directory.id
+        self.file_id_list = self.all_file_ids()
+        super(Directory, self).__init__(self.client, self.file_id_list)
 
     def all_file_ids(self):
         page_num = 1
@@ -29,10 +31,10 @@ def to_pytorch(self, transform = None):
             Transforms the file list inside the dataset into a pytorch dataset.
         :return:
         """
-        file_id_list = self.all_file_ids()
+
         pytorch_dataset = DiffgramPytorchDataset(
             project = self.client,
-            diffgram_file_id_list = file_id_list,
+            diffgram_file_id_list = self.file_id_list,
             transform = transform
 
         )

From 066f10dc53e27fd653d2243952d9069f22318801 Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Tue, 24 Aug 2021 15:38:16 -0600
Subject: [PATCH 14/17] fix: tf dataset  get item

---
 sdk/diffgram/core/directory.py                |  5 ++--
 sdk/diffgram/core/sliced_directory.py         |  3 +--
 .../diffgram_tensorflow_dataset.py            | 24 ++++++++++---------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index 4ddc57f..057563b 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -79,7 +79,7 @@ def __init__(self, client, file_id_list_sliced = None):
 		self.client = client
 		self.id = None
 		self.file_list_metadata = {}
-		
+
 		if file_id_list_sliced is None:
 			self.file_id_list = self.all_file_ids()
 		else:
@@ -145,8 +145,7 @@ def to_tensorflow(self):
 			project = self.client,
 			diffgram_file_id_list = file_id_list
 		)
-		tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj()
-		return tf_dataset
+		return diffgram_tensorflow_dataset
 
 	def new(self, name: str):
 		"""
diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py
index 514a204..629f945 100644
--- a/sdk/diffgram/core/sliced_directory.py
+++ b/sdk/diffgram/core/sliced_directory.py
@@ -46,5 +46,4 @@ def to_tensorflow(self):
             project = self.client,
             diffgram_file_id_list = file_id_list
         )
-        tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj()
-        return tf_dataset
+        return diffgram_tensorflow_dataset
diff --git a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py
index 3109d6c..7c9bf01 100644
--- a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py
+++ b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py
@@ -1,5 +1,6 @@
 from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator
 import os
+
 try:
     import tensorflow as tf  # type: ignore
 except ModuleNotFoundError:
@@ -7,6 +8,7 @@
         "'tensorflow' module should be installed to convert the Dataset into tensorflow format"
     )
 
+
 class DiffgramTensorflowDataset(DiffgramDatasetIterator):
 
     def __init__(self, project, diffgram_file_id_list):
@@ -47,17 +49,13 @@ def __validate_file_ids(self):
             raise Exception(
                 'Some file IDs do not belong to the project. Please provide only files from the same project.')
 
-    def __iter__(self):
-        self.current_file_index = 0
-        return self
-
-    def get_next_elm(self):
-        yield self.__next__()
+    def __getitem__(self, idx):
+        tf_example = self.get_tf_train_example(idx)
+        return tf_example
 
-    def __next__(self):
-        file_id = self.diffgram_file_id_list[self.current_file_index]
+    def get_tf_train_example(self, idx):
+        file_id = self.diffgram_file_id_list[idx]
         diffgram_file = self.project.file.get_by_id(file_id, with_instances = True)
-        print('AAA', diffgram_file.id)
         image = self.get_image_data(diffgram_file)
         instance_data = self.get_file_instances(diffgram_file)
         filename, file_extension = os.path.splitext(instance_data['diffgram_file'].image['original_filename'])
@@ -77,8 +75,12 @@ def __next__(self):
             'image/object/class/label': self.int64_list_feature(instance_data['label_id_list']),
         }
         tf_example = tf.train.Example(features = tf.train.Features(feature = tf_example_dict))
+        return tf_example
+
+    def __next__(self):
+        tf_example = self.get_tf_train_example(self.current_file_index)
         self.current_file_index += 1
         return tf_example
 
-    def get_dataset_obj(self):
-        return tf.data.Dataset.from_generator(self.get_next_elm, output_signature = tf.TensorSpec(shape=(1,)))
+    # def get_dataset_obj(self):
+    #     return tf.data.Dataset.from_generator(self.get_next_elm, output_signature = tf.TensorSpec(shape = (1,)))

From 6c3be64e7d43ca907f626916e7e4a339ac8dc0a3 Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Wed, 25 Aug 2021 09:31:48 -0600
Subject: [PATCH 15/17] feat: add explore() function

---
 sdk/diffgram/core/directory.py        |  9 +++++++++
 sdk/diffgram/core/sliced_directory.py | 17 ++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index 057563b..a1f8d17 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -109,6 +109,15 @@ def all_file_ids(self):
 			result = result + diffgram_ids
 		return result
 
+	def explore(self):
+		message = '{}/studio/annotate/{}/explorer?dataset_id={}'.format(
+			self.client.host,
+			self.project.project_string_id,
+			self.id
+		)
+		print('\033[92m' + 'To Explore your dataset visit:' + '\033[0m')
+		print('\033[96m' + message + '\033[0m')
+
 	def slice(self, query):
 		from diffgram.core.sliced_directory import SlicedDirectory
 		# Get the first page to validate syntax.
diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py
index 629f945..5dd64a1 100644
--- a/sdk/diffgram/core/sliced_directory.py
+++ b/sdk/diffgram/core/sliced_directory.py
@@ -1,7 +1,7 @@
 from diffgram.core.directory import Directory
 from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
 from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
-
+import urllib
 
 class SlicedDirectory(Directory):
 
@@ -26,6 +26,21 @@ def all_file_ids(self):
             result = result + diffgram_files
         return result
 
+    def explore(self):
+
+
+        payload = {'dataset_id': self.original_directory.id, 'query': self.query}
+        params = urllib.parse.urlencode(payload, quote_via = urllib.parse.quote)
+
+        message = '{}/studio/annotate/{}/explorer?{}'.format(
+            self.client.host,
+            self.project.project_string_id,
+            params
+
+        )
+        print('\033[92m' + 'To Explore your dataset visit:' + '\033[0m')
+        print('\033[96m' + message +  '\033[0m')
+
     def to_pytorch(self, transform = None):
         """
             Transforms the file list inside the dataset into a pytorch dataset.

From c6aa1042211dbf34604ae0c5f97d3b90ea780bbb Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Wed, 25 Aug 2021 09:31:48 -0600
Subject: [PATCH 16/17] Revert "feat: add explore() function"

This reverts commit 6c3be64e7d43ca907f626916e7e4a339ac8dc0a3.
---
 sdk/diffgram/core/directory.py        |  9 ---------
 sdk/diffgram/core/sliced_directory.py | 17 +----------------
 2 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
index a1f8d17..057563b 100644
--- a/sdk/diffgram/core/directory.py
+++ b/sdk/diffgram/core/directory.py
@@ -109,15 +109,6 @@ def all_file_ids(self):
 			result = result + diffgram_ids
 		return result
 
-	def explore(self):
-		message = '{}/studio/annotate/{}/explorer?dataset_id={}'.format(
-			self.client.host,
-			self.project.project_string_id,
-			self.id
-		)
-		print('\033[92m' + 'To Explore your dataset visit:' + '\033[0m')
-		print('\033[96m' + message + '\033[0m')
-
 	def slice(self, query):
 		from diffgram.core.sliced_directory import SlicedDirectory
 		# Get the first page to validate syntax.
diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py
index 5dd64a1..629f945 100644
--- a/sdk/diffgram/core/sliced_directory.py
+++ b/sdk/diffgram/core/sliced_directory.py
@@ -1,7 +1,7 @@
 from diffgram.core.directory import Directory
 from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
 from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
-import urllib
+
 
 class SlicedDirectory(Directory):
 
@@ -26,21 +26,6 @@ def all_file_ids(self):
             result = result + diffgram_files
         return result
 
-    def explore(self):
-
-
-        payload = {'dataset_id': self.original_directory.id, 'query': self.query}
-        params = urllib.parse.urlencode(payload, quote_via = urllib.parse.quote)
-
-        message = '{}/studio/annotate/{}/explorer?{}'.format(
-            self.client.host,
-            self.project.project_string_id,
-            params
-
-        )
-        print('\033[92m' + 'To Explore your dataset visit:' + '\033[0m')
-        print('\033[96m' + message +  '\033[0m')
-
     def to_pytorch(self, transform = None):
         """
             Transforms the file list inside the dataset into a pytorch dataset.

From 68fedf85a4211cc64c138099a3bf0a055a942f27 Mon Sep 17 00:00:00 2001
From: Pablo <pjestradac@gmail.com>
Date: Wed, 25 Aug 2021 10:19:45 -0600
Subject: [PATCH 17/17] feat: add overwrite flag

---
 sdk/diffgram/file/file.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/diffgram/file/file.py b/sdk/diffgram/file/file.py
index 6df65b4..4fedf11 100644
--- a/sdk/diffgram/file/file.py
+++ b/sdk/diffgram/file/file.py
@@ -62,7 +62,8 @@ def update(
         packet['instance_list'] = instance_list
 
         # Current default server side is to not overwrite
-        # packet['overwrite'] = overwrite
+        if overwrite:
+            packet['mode'] = "update_with_existing"
 
         self.client.file.from_packet(packet=packet)