# CytoTable object graph analysis

This notebook explores how CytoTable objects operate. The work is related to [CytoTable#75](https://github.com/cytomining/CytoTable/issues/75).

In [None]:
import gc
import shutil
import tempfile

import cytotable
import objgraph
from IPython.display import Image

gc.set_debug(gc.DEBUG_LEAK)

In [None]:
# create a list of files to reference
list_of_sqlite_files = [
    "./examples/data/all_cellprofiler.sqlite",
]
graph_img = "cytotable-object-graph.png"

In [None]:
cytotable.convert(
    source_path="./examples/data/all_cellprofiler.sqlite",
    dest_path="./examples/data/test-result.parquet",
    dest_datatype="parquet",
    preset="cellprofiler_sqlite_pycytominer",
)

In [None]:
objgraph.show_refs(cytotable, refcounts=True, filename=graph_img)

In [None]:
cytotable.convert(
    source_path="./examples/data/all_cellprofiler.sqlite",
    dest_path="./examples/data/test-result.parquet",
    dest_datatype="parquet",
    preset="cellprofiler_sqlite_pycytominer",
)

In [None]:
print(objgraph.show_growth(limit=3), end="\n\n")

In [None]:
cytotable.convert(
    source_path="./examples/data/all_cellprofiler.sqlite",
    dest_path="./examples/data/test-result.parquet",
    dest_datatype="parquet",
    preset="cellprofiler_sqlite_pycytominer",
)

In [None]:
print(objgraph.show_growth(limit=3), end="\n\n")

In [None]:
cytotable.convert(
    source_path="./examples/data/all_cellprofiler.sqlite",
    dest_path="./examples/data/test-result.parquet",
    dest_datatype="parquet",
    preset="cellprofiler_sqlite_pycytominer",
)

In [None]:
print(objgraph.show_growth(limit=3), end="\n\n")

In [None]:
objgraph.show_refs(cytotable, refcounts=True, filename=graph_img)

In [None]:
roots = objgraph.get_leaking_objects()
objgraph.show_refs(roots[:3], refcounts=True, filename="roots.png")

In [None]:
objgraph.show_most_common_types(objects=roots, shortnames=False)

In [None]:
print(objgraph.by_type(typename="dict", objects=roots)[:5])

In [None]:
print(objgraph.by_type(typename="cell", objects=roots)[:5])

In [None]:
# Explicitly collect garbage and check for memory leak warnings
import sys

import pandas as pd

collected = gc.collect()
if gc.garbage:
    print(f"Memory leak detected: {len(gc.garbage)} objects")
    df = pd.DataFrame(
        [
            {"type": type(obj), "refcount": sys.getrefcount(obj), "repr": repr(obj)}
            for obj in gc.garbage
        ]
    )

df.head()

In [None]:
df.sort_values(by="refcount", ascending=False).drop_duplicates()