In [53]:
import yt
import inspect
import ipywidgets
import traitlets
from yt.utilities.io_handler import BaseIOHandler
from IPython.display import Markdown, display
import textwrap
import collections

In [54]:
all_subclasses = []
def subclasses_assemble(parent):
    for child in parent.__subclasses__(): 
        all_subclasses.append(child)  # add the name        
        subclasses_assemble(child)  # continue downward... 
    
subclasses_assemble(BaseIOHandler)

class_dropdown = ipywidgets.Dropdown(options=[(_.__name__, _) for _ in all_subclasses])
func_dropdown = ipywidgets.Dropdown(options=[_ for _ in dir(BaseIOHandler) if not _.startswith("__")])
defined_at = ipywidgets.HTML()
source = ipywidgets.Output(layout=ipywidgets.Layout(width="50%", height="50em"))


def update_class(event):
    current_func = func_dropdown.value
    func_dropdown.options = [_ for _ in dir(class_dropdown.value) if not _.startswith("__")]
    if current_func in func_dropdown.options:
        func_dropdown.value = current_func

update_class(None)
class_dropdown.observe(update_class, ["value"])
    
def update_source(event):
    cls = class_dropdown.value
    f = getattr(cls, func_dropdown.value)
    
    source.clear_output()
    if not isinstance(f, collections.abc.Callable): return
    defined_at.value = f"<tt>{inspect.getsourcefile(f)}:{inspect.getsourcelines(f)[1]}</tt>"
    with source:
        display(
            Markdown(
                data="```python\n"
                + textwrap.dedent(inspect.getsource(f))
                + "\n```"
            )
        )

func_dropdown.observe(update_source, ["value"])
class_dropdown.observe(update_class, ["value"])
update_source(None)
display(ipywidgets.VBox([class_dropdown, func_dropdown, defined_at, source]))



VBox(children=(Dropdown(options=(('BaseParticleIOHandler', <class 'yt.utilities.io_handler.BaseParticleIOHandl…

On only display those children with functions that differ from the base handler?

In [47]:
def source_check(obj, func_name):
    f = getattr(obj, func_name)
    if isinstance(f, collections.abc.Callable):         
        return f"{inspect.getsourcefile(f)}:{inspect.getsourcelines(f)[1]}"
    return
    
def child_overrides_func(func_name, child, parent):
    c_func = source_check(child, func_name)
    p_func = source_check(parent, func_name)
    return c_func != p_func

def get_children_classes(base_class, func_name=None):
    # recursively builds a list of all children that override func_name of base_class
    
    all_subclasses = []     
    def subclasses_assemble(parent):
        for child in parent.__subclasses__(): 
            if func_name is None or child_overrides_func(func_name, child, base_class):
                all_subclasses.append(child)  # add the name        
            subclasses_assemble(child)  # continue downward... 

    subclasses_assemble(base_class) # all the children
    return all_subclasses


subclasses = get_children_classes(BaseIOHandler, '_read_particle_selection')
subclasses

[yt.frontends.gadget_fof.io.IOHandlerGadgetFOFHaloHDF5,
 yt.frontends.halo_catalog.io.IOHandlerYTHalo,
 yt.frontends.open_pmd.io.IOHandlerOpenPMDHDF5]

In [49]:
base_funcs = [_ for _ in dir(BaseIOHandler) if not _.startswith("__")]

subclasses_that_override = {}
for f in base_funcs:
    subclasses_that_override[f] = get_children_classes(BaseIOHandler, f)


In [52]:
subclasses_that_override['_read_particle_selection']

[yt.frontends.gadget_fof.io.IOHandlerGadgetFOFHaloHDF5,
 yt.frontends.halo_catalog.io.IOHandlerYTHalo,
 yt.frontends.open_pmd.io.IOHandlerOpenPMDHDF5]

In [66]:

func_dropdown = ipywidgets.Dropdown(options=base_funcs, value='_read_particle_selection')
class_dropdown = ipywidgets.Dropdown(options=subclasses_that_override[func_dropdown.value])
defined_at = ipywidgets.HTML()
source = ipywidgets.Output(layout=ipywidgets.Layout(width="100%", height="50em"))


def update_classlist(event):
    # function value has changed, update the class
    current_func = func_dropdown.value
    class_dropdown.options = subclasses_that_override[current_func]

update_classlist(None)
func_dropdown.observe(update_classlist, ["value"])

   
def update_source(event):
    cls = class_dropdown.value
    if cls is None: 
        source.clear_output()
        defined_at.value = ""
        return
    f = getattr(cls, func_dropdown.value)
    
    source.clear_output()
    if not isinstance(f, collections.abc.Callable): return
    defined_at.value = f"<tt>{inspect.getsourcefile(f)}:{inspect.getsourcelines(f)[1]}</tt>"
    with source:
        display(
            Markdown(
                data="```python\n"
                + textwrap.dedent(inspect.getsource(f))
                + "\n```"
            )
        )

func_dropdown.observe(update_classlist, ["value"])  # if selected function changes, update the class list
class_dropdown.observe(update_source, ["value"])  # if selected class changes, update the source
update_source(None)
display(ipywidgets.VBox([func_dropdown, class_dropdown, defined_at, source]))


VBox(children=(Dropdown(index=18, options=('_cache_on', '_count_particles_chunks', '_count_selected_particles'…

In [91]:
import difflib
from pprint import pprint

basefunc = getattr(BaseIOHandler, "_read_particle_selection")
func_src = inspect.getsource(basefunc).split("\n")

cfunc = getattr(yt.frontends.gadget_fof.IOHandlerGadgetFOFHaloHDF5, "_read_particle_selection")
f_c = inspect.getsource(cfunc).split("\n")
d = difflib.Differ()
result = list(d.compare(func_src, f_c))
diffresult = "\n".join(result)

Markdown(
                data="```python\n"
                + textwrap.dedent(diffresult)
                + "\n```"
            )


```python
-     def _read_particle_selection(
+     def _read_particle_selection(self, dobj, fields):
?                                  ++++++++++++++++++++

+         rv = {}
+         ind = {}
-         self, chunks, selector, fields: List[Tuple[str, str]]
-     ) -> Mapping[Tuple[str, str], np.ndarray]:
-         rv = {}  # the return dictionary
-         ind = {}  # holds the most recent max index of the return arrays by field
- 
-         # Initialize containers for tracking particle, field information
-         # ptf (particle field types) maps particle type to list of on-disk fields to read
-         # psize maps particle type to on-disk size across chunks
-         # fsize maps particle type to size of return values
-         # field_maps stores fields, accounting for field unions
-         ptf: DefaultDict[str, List[str]] = defaultdict(list)
-         psize: DefaultDict[str, int] = defaultdict(lambda: 0)
-         fsize: DefaultDict[Tuple[str, str], int] = defaultdict(lambda: 0)
-         field_maps: DefaultDict[Tuple[str, str], List[Tuple[str, str]]] = defaultdict(
-             list
-         )
- 
          # We first need a set of masks for each particle type
-         chunks = list(chunks)
+         ptf = defaultdict(list)  # ON-DISK TO READ
+         fsize = defaultdict(lambda: 0)  # COUNT RV
+         field_maps = defaultdict(list)  # ptypes -> fields
          unions = self.ds.particle_unions
          # What we need is a mapping from particle types to return types
          for field in fields:
              ftype, fname = field
              fsize[field] = 0
              # We should add a check for p.fparticle_unions or something here
              if ftype in unions:
                  for pt in unions[ftype]:
                      ptf[pt].append(fname)
                      field_maps[pt, fname].append(field)
              else:
                  ptf[ftype].append(fname)
                  field_maps[field].append(field)
-         # Now we have our full listing
- 
-         # Now we add particle counts across chunks to psize
-         self._count_particles_chunks(psize, chunks, ptf, selector)

          # Now we allocate
+         psize = {dobj.ptype: dobj.particle_number}
          for field in fields:
              if field[0] in unions:
                  for pt in unions[field[0]]:
                      fsize[field] += psize.get(pt, 0)
              else:
                  fsize[field] += psize.get(field[0], 0)
-         shape: Tuple[int, ...]
          for field in fields:
              if field[1] in self._vector_fields:
-                 vsize = self._vector_fields[field[1]]  # type:ignore
-                 # note: the above line causes a mypy failure due to how we
-                 # convert _vector_fields from a tuple to dict in __init__. mypy
-                 # is expecting a tuple here. And since _vector_fields is used in
-                 # many places, just ignoring for now...
-                 shape = (fsize[field], vsize)
?                                           -

+                 shape = (fsize[field], self._vector_fields[field[1]])
?                                        ++++++ +++++++++++ ++  ++++++

              elif field[1] in self._array_fields:
                  shape = (fsize[field],) + self._array_fields[field[1]]
+             elif field in self.ds.scalar_field_list:
+                 shape = (1,)
              else:
                  shape = (fsize[field],)
              rv[field] = np.empty(shape, dtype="float64")
              ind[field] = 0
          # Now we read.
-         for field_r, vals in self._read_particle_fields(chunks, ptf, selector):
?                                                         ^^^^^^     ----------

+         for field_r, vals in self._read_particle_fields(dobj, ptf):
?                                                         ^^^^

              # Note that we now need to check the mappings
              for field_f in field_maps[field_r]:
                  my_ind = ind[field_f]
-                 # mylog.debug("Filling %s from %s to %s with %s",
-                 #    field_f, my_ind, my_ind+vals.shape[0], field_r)
                  rv[field_f][my_ind : my_ind + vals.shape[0], ...] = vals
                  ind[field_f] += vals.shape[0]
          # Now we need to truncate all our fields, since we allow for
          # over-estimating.
          for field_f in ind:
              rv[field_f] = rv[field_f][: ind[field_f]]
          return rv

```

In [89]:


pprint(result)

['-     def _read_particle_selection(',
 '+     def _read_particle_selection(self, dobj, fields):',
 '?                                  ++++++++++++++++++++\n',
 '+         rv = {}',
 '+         ind = {}',
 '-         self, chunks, selector, fields: List[Tuple[str, str]]',
 '-     ) -> Mapping[Tuple[str, str], np.ndarray]:',
 '-         rv = {}  # the return dictionary',
 '-         ind = {}  # holds the most recent max index of the return arrays '
 'by field',
 '- ',
 '-         # Initialize containers for tracking particle, field information',
 '-         # ptf (particle field types) maps particle type to list of on-disk '
 'fields to read',
 '-         # psize maps particle type to on-disk size across chunks',
 '-         # fsize maps particle type to size of return values',
 '-         # field_maps stores fields, accounting for field unions',
 '-         ptf: DefaultDict[str, List[str]] = defaultdict(list)',
 '-         psize: DefaultDict[str, int] = defaultdict(lambda: 0)',
 '-  

```python
-     def _read_particle_selection(
+     def _read_particle_selection(self, dobj, fields):
?                                  ++++++++++++++++++++

+         rv = {}
+         ind = {}
-         self, chunks, selector, fields: List[Tuple[str, str]]
-     ) -> Mapping[Tuple[str, str], np.ndarray]:
-         rv = {}  # the return dictionary
-         ind = {}  # holds the most recent max index of the return arrays by field
- 
-         # Initialize containers for tracking particle, field information
-         # ptf (particle field types) maps particle type to list of on-disk fields to read
-         # psize maps particle type to on-disk size across chunks
-         # fsize maps particle type to size of return values
-         # field_maps stores fields, accounting for field unions
-         ptf: DefaultDict[str, List[str]] = defaultdict(list)
-         psize: DefaultDict[str, int] = defaultdict(lambda: 0)
-         fsize: DefaultDict[Tuple[str, str], int] = defaultdict(lambda: 0)
-         field_maps: DefaultDict[Tuple[str, str], List[Tuple[str, str]]] = defaultdict(
-             list
-         )
- 
          # We first need a set of masks for each particle type
-         chunks = list(chunks)
+         ptf = defaultdict(list)  # ON-DISK TO READ
+         fsize = defaultdict(lambda: 0)  # COUNT RV
+         field_maps = defaultdict(list)  # ptypes -> fields
          unions = self.ds.particle_unions
          # What we need is a mapping from particle types to return types
          for field in fields:
              ftype, fname = field
              fsize[field] = 0
              # We should add a check for p.fparticle_unions or something here
              if ftype in unions:
                  for pt in unions[ftype]:
                      ptf[pt].append(fname)
                      field_maps[pt, fname].append(field)
              else:
                  ptf[ftype].append(fname)
                  field_maps[field].append(field)
-         # Now we have our full listing
- 
-         # Now we add particle counts across chunks to psize
-         self._count_particles_chunks(psize, chunks, ptf, selector)

          # Now we allocate
+         psize = {dobj.ptype: dobj.particle_number}
          for field in fields:
              if field[0] in unions:
                  for pt in unions[field[0]]:
                      fsize[field] += psize.get(pt, 0)
              else:
                  fsize[field] += psize.get(field[0], 0)
-         shape: Tuple[int, ...]
          for field in fields:
              if field[1] in self._vector_fields:
-                 vsize = self._vector_fields[field[1]]  # type:ignore
-                 # note: the above line causes a mypy failure due to how we
-                 # convert _vector_fields from a tuple to dict in __init__. mypy
-                 # is expecting a tuple here. And since _vector_fields is used in
-                 # many places, just ignoring for now...
-                 shape = (fsize[field], vsize)
?                                           -

+                 shape = (fsize[field], self._vector_fields[field[1]])
?                                        ++++++ +++++++++++ ++  ++++++

              elif field[1] in self._array_fields:
                  shape = (fsize[field],) + self._array_fields[field[1]]
+             elif field in self.ds.scalar_field_list:
+                 shape = (1,)
              else:
                  shape = (fsize[field],)
              rv[field] = np.empty(shape, dtype="float64")
              ind[field] = 0
          # Now we read.
-         for field_r, vals in self._read_particle_fields(chunks, ptf, selector):
?                                                         ^^^^^^     ----------

+         for field_r, vals in self._read_particle_fields(dobj, ptf):
?                                                         ^^^^

              # Note that we now need to check the mappings
              for field_f in field_maps[field_r]:
                  my_ind = ind[field_f]
-                 # mylog.debug("Filling %s from %s to %s with %s",
-                 #    field_f, my_ind, my_ind+vals.shape[0], field_r)
                  rv[field_f][my_ind : my_ind + vals.shape[0], ...] = vals
                  ind[field_f] += vals.shape[0]
          # Now we need to truncate all our fields, since we allow for
          # over-estimating.
          for field_f in ind:
              rv[field_f] = rv[field_f][: ind[field_f]]
          return rv

```