Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactoring dataprep to use dask and adding sample script #1470

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions docs/notebooks/dataprep_dask.py
@@ -0,0 +1,24 @@
from gdsfactory.generic_tech.layer_map import LAYER as l
import gdsfactory.dataprep as dp
import gdsfactory as gf
import dask

dask.config.set(scheduler='threads')

if __name__ == '__main__':
c = gf.c.coupler_ring(cross_section="strip")
c.write_gds("src.gds")

d = dp.Layout(filepath="src.gds", layermap=dict(l))
# we're going to do a bunch of derivations just to get a more interesting task graph... don't mind if it physically doesn't make sense
d.SLAB150 = d.WG + 3
d.SHALLOW_ETCH = d.SLAB150 - d.WG
d.DEEP_ETCH = d.WG + 2
d.M1 = d.DEEP_ETCH + 1
d.M2 = d.DEEP_ETCH - d.SHALLOW_ETCH
# visualize the taskgraph and save as 'tasks.html'
d.visualize("tasks")
# evaluation of the task graph is lazy
d.calculate()
c = d.write("dst.gds")
# c
68 changes: 57 additions & 11 deletions gdsfactory/dataprep.py
@@ -1,29 +1,50 @@
import kfactory as kf
from kfactory import kdb
from dask.delayed import delayed, Delayed
from dask.distributed import Future
import dask

@delayed
def size(region: kdb.Region, offset: float):
return region.dup().size(int(offset * 1e3))

@delayed
def boolean_or(region1: kdb.Region, region2: kdb.Region):
return region1.__or__(region2)

@delayed
def boolean_not(region1: kdb.Region, region2: kdb.Region):
return kdb.Region.__sub__(region1, region2)

@delayed
def copy(region: kdb.Region):
return region.dup()


class Region(kdb.Region):
def __iadd__(self, offset):
"""Adds an offset to the layer."""
return self.size(int(offset * 1e3))
return size(self, offset)

def __isub__(self, offset):
"""Adds an offset to the layer."""
return self.size(-int(offset * 1e3))
return size(self, offset)

def __add__(self, element):
if isinstance(element, float):
self.size(int(element * 1e3))
if isinstance(element, (float, int)):
return size(self, element)

elif isinstance(element, kdb.Region):
self = self.__or__(element)
elif isinstance(element, (kdb.Region, Delayed)):
return boolean_or(self, element)
else:
raise ValueError(f'Cannot add type {type(element)} to region')

def __sub__(self, element):
if isinstance(element, float):
self.size(-int(element * 1e3))
if isinstance(element, (float, int)):
return size(self, -element)

elif isinstance(element, kdb.Region):
return super().__sub__(element)
elif isinstance(element, (kdb.Region, Delayed)):
return boolean_not(self, element)

def copy(self):
return self.dup()
Expand All @@ -45,12 +66,37 @@ def __init__(self, layermap, filepath=None):
self.layermap = layermap
self.lib = lib

def calculate(self):
tasks = {layername: getattr(self, layername) for layername in self.layermap}
results = dask.compute(tasks)
for layername, result in results[0].items():
setattr(self, layername, result)

def visualize(self, filename):
tasks = []
layer_names = []
named_tasks = {}
print("visualizing task graph...")
for layername, layer in self.layermap.items():
region = getattr(self, layername)
if isinstance(region, Delayed) or isinstance(region, Future):
tasks.append(region)
layer_names.append(layername)
named_tasks[layername] = region
dask.visualize(named_tasks, filename=filename)

def write(self, filename, cellname: str = "out") -> kf.KCell:
self.calculate()
c = kf.KCell(cellname, self.lib)

for layername, layer in self.layermap.items():
region = getattr(self, layername)
c.shapes(self.lib.layer(layer[0], layer[1])).insert(region)
if isinstance(region, Delayed):
region = region.compute()
try:
c.shapes(self.lib.layer(layer[0], layer[1])).insert(region)
except TypeError:
raise ValueError(f'Unexpected type for region {layername}: {type(region)}')
c.write(filename)
return c

Expand Down