Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
.vs/PythonSettings.json
.vs/VSWorkspaceState.json

*.pyc
.idea/

sdk/diffgram/__pycache__/
Expand Down
1 change: 0 additions & 1 deletion sdk/diffgram/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,6 @@ def set_default_directory(self,
self.directory_id = self.default_directory['id']

self.directory_list = data["directory_list"]

self.session.headers.update(
{'directory_id': str(self.directory_id)})

Expand Down
137 changes: 137 additions & 0 deletions sdk/diffgram/core/diffgram_dataset_iterator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from PIL import Image, ImageDraw
from imageio import imread
import numpy as np

class DiffgramDatasetIterator:

def __init__(self, project, diffgram_file_id_list):
"""

:param project (sdk.core.core.Project): A Project object from the Diffgram SDK
:param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram.
"""
self.diffgram_file_id_list = diffgram_file_id_list

self.project = project
self._internal_file_list = []
self.__validate_file_ids()
self.current_file_index = 0

def __iter__(self):
self.current_file_index = 0
return self

def __len__(self):
return len(self.diffgram_file_id_list)

def __getitem__(self, idx):
diffgram_file = self.project.file.get_by_id(self.diffgram_file_id_list[idx], with_instances = True)
instance_data = self.get_file_instances(diffgram_file)
return instance_data

def __next__(self):
file_id = self.diffgram_file_id_list[self.current_file_index]
diffgram_file = self.project.file.get_by_id(file_id, with_instances = True)
instance_data = self.get_file_instances(diffgram_file)
self.current_file_index += 1
return instance_data

def __validate_file_ids(self):
result = self.project.file.file_list_exists(self.diffgram_file_id_list)
if not result:
raise Exception(
'Some file IDs do not belong to the project. Please provide only files from the same project.')

def get_image_data(self, diffgram_file):
if hasattr(diffgram_file, 'image'):
image = imread(diffgram_file.image.get('url_signed'))
return image
else:
raise Exception('Pytorch datasets only support images. Please provide only file_ids from images')

def get_file_instances(self, diffgram_file):
if diffgram_file.type not in ['image', 'frame']:
raise NotImplementedError('File type "{}" is not supported yet'.format(diffgram_file['type']))

image = self.get_image_data(diffgram_file)
instance_list = diffgram_file.instance_list
instance_types_in_file = set([x['type'] for x in instance_list])
# Process the instances of each file
sample = {'image': image, 'diffgram_file': diffgram_file}
has_boxes = False
has_poly = False
if 'box' in instance_types_in_file:
has_boxes = True
x_min_list, x_max_list, y_min_list, y_max_list = self.extract_bbox_values(instance_list, diffgram_file)
sample['x_min_list'] = x_min_list
sample['x_max_list'] = x_max_list
sample['y_min_list'] = y_min_list
sample['y_max_list'] = y_max_list

if 'polygon' in instance_types_in_file:
has_poly = True
mask_list = self.extract_masks_from_polygon(instance_list, diffgram_file)
sample['polygon_mask_list'] = mask_list

if len(instance_types_in_file) > 2 and has_boxes and has_boxes:
raise NotImplementedError(
'SDK only supports boxes and polygon types currently. If you want a new instance type to be supported please contact us!'
)

label_id_list, label_name_list = self.extract_labels(instance_list)
sample['label_id_list'] = label_id_list
sample['label_name_list'] = label_name_list

return sample

def extract_masks_from_polygon(self, instance_list, diffgram_file, empty_value = 0):
nx, ny = diffgram_file.image['width'], diffgram_file.image['height']
mask_list = []
for instance in instance_list:
if instance['type'] != 'polygon':
continue
poly = [(p['x'], p['y']) for p in instance['points']]

img = Image.new(mode = 'L', size = (nx, ny), color = 0) # mode L = 8-bit pixels, black and white
draw = ImageDraw.Draw(img)
draw.polygon(poly, outline = 1, fill = 1)
mask = np.array(img).astype('float32')
# mask[np.where(mask == 0)] = empty_value
mask_list.append(mask)
return mask_list

def extract_labels(self, instance_list, allowed_instance_types = None):
label_file_id_list = []
label_names_list = []

for inst in instance_list:
if allowed_instance_types and inst['type'] in allowed_instance_types:
continue

label_file_id_list.append(inst['label_file']['id'])
label_names_list.append(inst['label_file']['label']['name'])

return label_file_id_list, label_names_list

def extract_bbox_values(self, instance_list, diffgram_file):
"""
Creates a pytorch tensor based on the instance type.
For now we are assuming shapes here, but we can extend it
to accept custom shapes specified by the user.
:param instance:
:return:
"""
x_min_list = []
x_max_list = []
y_min_list = []
y_max_list = []

for inst in instance_list:
if inst['type'] != 'box':
continue
x_min_list.append(inst['x_min'] / diffgram_file.image['width'])
x_max_list.append(inst['x_max'] / diffgram_file.image['width'])
y_min_list.append(inst['y_min'] / diffgram_file.image['width'])
y_max_list.append(inst['y_max'] / diffgram_file.image['width'])

return x_min_list, x_max_list, y_min_list, y_max_list
114 changes: 94 additions & 20 deletions sdk/diffgram/core/directory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from diffgram.file.file import File
from ..regular.regular import refresh_from_dict
import logging
from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator


def get_directory_list(self):
Expand Down Expand Up @@ -71,14 +74,80 @@ def set_directory_by_name(self, name):
str(names_attempted))


class Directory():
class Directory(DiffgramDatasetIterator):

def __init__(self,
client):
def __init__(self, client, file_id_list_sliced = None):

self.client = client
self.id = None
self.file_list_metadata = {}

if file_id_list_sliced is None:
self.file_id_list = self.all_file_ids()
else:
self.file_id_list = file_id_list_sliced
super(Directory, self).__init__(self.client, self.file_id_list)

def all_files(self):
"""
Get all the files of the directoy.
Warning! This can be an expensive function and take a long time.
:return:
"""
page_num = 1
result = []
while page_num is not None:
diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base')
page_num = self.file_list_metadata['next_page']
result = result + diffgram_files
return result

def all_file_ids(self):
page_num = 1
result = []
while page_num is not None:
diffgram_ids = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only')
page_num = self.file_list_metadata['next_page']
result = result + diffgram_ids
return result

def slice(self, query):
from diffgram.core.sliced_directory import SlicedDirectory
# Get the first page to validate syntax.
self.list_files(
limit = 25,
page_num = 1,
file_view_mode = 'ids_only',
query = query,
)
sliced_dataset = SlicedDirectory(
client = self.client,
query = query,
original_directory = self
)
return sliced_dataset

def to_pytorch(self, transform = None):
"""
Transforms the file list inside the dataset into a pytorch dataset.
:return:
"""
file_id_list = self.all_file_ids()
pytorch_dataset = DiffgramPytorchDataset(
project = self.client,
diffgram_file_id_list = file_id_list,
transform = transform

)
return pytorch_dataset

def to_tensorflow(self):
file_id_list = self.all_file_ids()
diffgram_tensorflow_dataset = DiffgramTensorflowDataset(
project = self.client,
diffgram_file_id_list = file_id_list
)
return diffgram_tensorflow_dataset

def new(self, name: str):
"""
Expand Down Expand Up @@ -131,9 +200,12 @@ def new(self, name: str):


def list_files(
self,
limit=None,
search_term: str =None):
self,
page_num=1,
limit=100,
search_term: str =None,
file_view_mode: str = 'annotation',
query: str = None):
"""
Get a list of files in directory (from Diffgram service).

Expand All @@ -158,7 +230,6 @@ def list_files(
else:
logging.info("Using Default Dataset ID " + str(self.client.directory_id))
directory_id = self.client.directory_id
#print("directory_id", directory_id)

metadata = {'metadata' :
{
Expand All @@ -167,10 +238,10 @@ def list_files(
'annotation_status': "All",
'limit': limit,
'media_type': "All",
'request_next_page': False,
'request_previous_page': False,
'file_view_mode': "annotation",
'search_term': search_term
'page': page_num,
'file_view_mode': file_view_mode,
'search_term': search_term,
'query': query
}
}

Expand All @@ -190,17 +261,20 @@ def list_files(
# Success
data = response.json()
file_list_json = data.get('file_list')

self.file_list_metadata = data.get('metadata')
# TODO would like this to perhaps be a seperate function
# ie part of File_Constructor perhaps
file_list = []
for file_json in file_list_json:
file = File.new(
client = self.client,
file_json = file_json)
file_list.append(file)

return file_list
if file_view_mode == 'ids_only':
return file_list_json
else:
file_list = []
for file_json in file_list_json:
file = File.new(
client = self.client,
file_json = file_json)
file_list.append(file)

return file_list


def get(self,
Expand Down
49 changes: 49 additions & 0 deletions sdk/diffgram/core/sliced_directory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from diffgram.core.directory import Directory
from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset


class SlicedDirectory(Directory):

def __init__(self, client, original_directory: Directory, query: str):
self.original_directory = original_directory
self.query = query
self.client = client
# Share the same ID from the original directory as this is just an in-memory construct for better semantics.
self.id = original_directory.id
self.file_id_list = self.all_file_ids()
super(Directory, self).__init__(self.client, self.file_id_list)

def all_file_ids(self):
page_num = 1
result = []
while page_num is not None:
diffgram_files = self.list_files(limit = 1000,
page_num = page_num,
file_view_mode = 'ids_only',
query = self.query)
page_num = self.file_list_metadata['next_page']
result = result + diffgram_files
return result

def to_pytorch(self, transform = None):
"""
Transforms the file list inside the dataset into a pytorch dataset.
:return:
"""

pytorch_dataset = DiffgramPytorchDataset(
project = self.client,
diffgram_file_id_list = self.file_id_list,
transform = transform

)
return pytorch_dataset

def to_tensorflow(self):
file_id_list = self.all_file_ids()
diffgram_tensorflow_dataset = DiffgramTensorflowDataset(
project = self.client,
diffgram_file_id_list = file_id_list
)
return diffgram_tensorflow_dataset
9 changes: 5 additions & 4 deletions sdk/diffgram/file/file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from ..regular.regular import refresh_from_dict


class File():
"""
file literal object
Expand All @@ -11,11 +10,12 @@ class File():

def __init__(
self,
id=None,
client=None):
id = None,
client = None):
self.id = id
self.client = client

@staticmethod
def new(
client,
file_json):
Expand Down Expand Up @@ -62,7 +62,8 @@ def update(
packet['instance_list'] = instance_list

# Current default server side is to not overwrite
# packet['overwrite'] = overwrite
if overwrite:
packet['mode'] = "update_with_existing"

self.client.file.from_packet(packet=packet)

Expand Down
Loading