# CytoTable object graph analysis

This notebook explores how CytoTable objects operate. The work is related to [CytoTable#75](https://github.com/cytomining/CytoTable/issues/75).

In [1]:
import shutil
import tempfile

import cytotable
import objgraph
from IPython.display import Image

In [2]:
# create a list of files to reference
list_of_sqlite_files = [
    "./examples/data/all_cellprofiler.sqlite",
]
graph_img = "cytotable-object-graph.png"

In [3]:
cytotable.convert(
    source_path="./examples/data/all_cellprofiler.sqlite",
    dest_path="./examples/data/test-result.parquet",
    dest_datatype="parquet",
    preset="cellprofiler_sqlite_pycytominer",
)

PosixPath('/Users/dabu5788/Documents/work/CytoTable-benchmarks-d33bs/notebooks/examples/data/test-result.parquet')

In [4]:
objgraph.show_refs(cytotable, refcounts=True, filename=graph_img)

Graph written to /var/folders/m0/31lvvzb50sl0d_l7bnb74cdr0000gq/T/objgraph-2kiknz8x.dot (27 nodes)
Image generated as cytotable-object-graph.png


In [5]:
cytotable.convert(
    source_path="./examples/data/all_cellprofiler.sqlite",
    dest_path="./examples/data/test-result.parquet",
    dest_datatype="parquet",
    preset="cellprofiler_sqlite_pycytominer",
)

Reusing previously loaded Parsl configuration.


PosixPath('/Users/dabu5788/Documents/work/CytoTable-benchmarks-d33bs/notebooks/examples/data/test-result.parquet')

In [6]:
print(objgraph.show_growth(limit=3), end="\n\n")

function    30989    +30989
tuple       22084    +22084
dict        18528    +18528
None



In [7]:
cytotable.convert(
    source_path="./examples/data/all_cellprofiler.sqlite",
    dest_path="./examples/data/test-result.parquet",
    dest_datatype="parquet",
    preset="cellprofiler_sqlite_pycytominer",
)

Reusing previously loaded Parsl configuration.


PosixPath('/Users/dabu5788/Documents/work/CytoTable-benchmarks-d33bs/notebooks/examples/data/test-result.parquet')

In [8]:
print(objgraph.show_growth(limit=3), end="\n\n")

dict    18585       +57
list     8765       +39
cell    11044       +14
None



In [9]:
cytotable.convert(
    source_path="./examples/data/all_cellprofiler.sqlite",
    dest_path="./examples/data/test-result.parquet",
    dest_datatype="parquet",
    preset="cellprofiler_sqlite_pycytominer",
)

Reusing previously loaded Parsl configuration.


PosixPath('/Users/dabu5788/Documents/work/CytoTable-benchmarks-d33bs/notebooks/examples/data/test-result.parquet')

In [10]:
print(objgraph.show_growth(limit=3), end="\n\n")

dict    18644       +59
list     8801       +36
set      1398       +12
None



In [11]:
objgraph.show_refs(cytotable, refcounts=True, filename=graph_img)

Graph written to /var/folders/m0/31lvvzb50sl0d_l7bnb74cdr0000gq/T/objgraph-m6n5ciyf.dot (27 nodes)
Image generated as cytotable-object-graph.png


In [12]:
roots = objgraph.get_leaking_objects()
objgraph.show_refs(roots[:3], refcounts=True, filename="roots.png")

Graph written to /var/folders/m0/31lvvzb50sl0d_l7bnb74cdr0000gq/T/objgraph-e95ozh1s.dot (60 nodes)
Image generated as roots.png


In [13]:
objgraph.show_most_common_types(objects=roots, shortnames=False)

builtins.dict                         2467
builtins.weakref                      1988
builtins.tuple                        724
_frozen_importlib_external.FileFinder 182
builtins.list                         160
_cffi_backend.CType                   63
logging.PlaceHolder                   59
builtins.slice                        59
builtins.module                       52
builtins.function                     50


In [14]:
print(objgraph.by_type(typename="dict", objects=roots)[:20])

[{140684002326528: <weakref at 0x1442c9130; to 'type' at 0x7ff38bfbf800 (StataNonCatValueLabel)>}, {140684002330864: <weakref at 0x1442c9400; to 'ABCMeta' at 0x7ff38bfc08f0 (StataReader)>, 140684002333696: <weakref at 0x1442c9450; to 'type' at 0x7ff38bfc1400 (StataWriter)>}, {140684002338016: <weakref at 0x1442c94f0; to 'type' at 0x7ff38bfc24e0 (StataWriter117)>}, {140684002339536: <weakref at 0x1442c9590; to 'type' at 0x7ff38bfc2ad0 (StataWriterUTF8)>}, {140684002340480: <weakref at 0x1442e0680; to 'type' at 0x7ff38bfc2e80 (_EtreeFrameParser)>, 140684002341424: <weakref at 0x1442e06d0; to 'type' at 0x7ff38bfc3230 (_LxmlFrameParser)>}, {'Py_Repr': []}, {'__new__': <built-in method __new__ of type object at 0x144454b48>, '__doc__': 'File-level encryption properties for the low-level API', '__pyx_vtable__': <capsule object NULL at 0x144375090>}, {'__repr__': <slot wrapper '__repr__' of 'pyarrow._parquet.ParquetSchema' objects>, '__lt__': <slot wrapper '__lt__' of 'pyarrow._parquet.Parque