In [None]:
import sys
import os
import polars as pl
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
from cytodataparser import CytoGateParser

# Structures/Core Testing

## GateNode Testing

In [None]:
from cytodataparser.structures import GateNode

In [None]:
root = GateNode("Root")
child = GateNode("Root/Child", parent=root)
root.add_child(child)

assert root.is_root()
assert not root.is_leaf()
assert child.is_leaf()
assert child.depth() == 1
assert root.height() == 1
assert child.parent == root
assert root.children == [child]

## GateTree Testing

In [None]:
from cytodataparser.structures import GateTree
import polars as pl

In [None]:
row = pl.DataFrame({
    "Cells | Count": [1000, 10],
    "Cells/CD4 | Count": [600, 1],
    "Cells/CD4/Living | Count": [500, 1],
    "Cells/CD4/Living/Next | Count": [300, 1],
    "Cells/CD4/Living/Next/No | Count": [300, 1],
    "Cells/CD8 | Count": [400, 20],
}).row(0, named=True)
tree = GateTree(row)
assert tree.root.name == "Cells"
assert "Cells/CD4" in tree.nodes
assert tree.get_nodes("Cells/CD8")[0].measures["Count"] == 400
assert tree.get_nodes("Cells/CD8")[0].measures["pct_parent"] == 400 / 1000 * 100
assert tree.max_depth() == 4
assert tree.get_nodes("Cells/CD4/Ungated")[0].measures["Count"] == 100


## Test Utils

In [None]:
from cytodataparser.utils.predicates import parse_string_condition, from_range, matches_regex

In [None]:
cond = parse_string_condition("> 10 and < 20")
assert cond(15)
assert not cond(25)

cond = parse_string_condition("== 10")
assert cond(10)
assert not cond(15)

cond = parse_string_condition("<= 10")
assert cond(9)
assert not cond(15)

cond = parse_string_condition(">= 10")
assert not cond(9)
assert cond(15)

cond = parse_string_condition("!= 10")
assert cond(9)
assert not cond(10)

cond = parse_string_condition("!= Hello")
assert not cond("Hello")
assert cond("Goodbye")

cond = from_range(range(10, 20))
assert cond(15)
assert not cond(25)

cond = matches_regex(r"^AB\d{3}Z$")
assert cond("AB123Z")
assert not cond("AB12Z")

## Test Core

In [1]:
from cytodataparser import CytoGateParser
import cytodataparser.plotting as plt
import flowkit as fk

In [2]:
cgp = CytoGateParser.from_file(r"C:\Users\brend\OneDrive\Individual Research\CytoDataParser\examples\2025-06-20 Skin Tx Characterization\23-Jun-2025.wsp", fcs_files=r"C:\Users\brend\OneDrive\Individual Research\CytoDataParser\examples\2025-06-20 Skin Tx Characterization\Unmixed")



In [5]:
plt.categorical_plot(cgp, x=["MouseID", "Tissue"], y="relative_percent", node_terms="CD45.1+ CD4+")

Running ANOVA



Using first node: root/Cells/Singlets/CD45.1+ CD4+
Nodes found: ['root/Cells/Singlets/CD45.1+ CD4+']



Color column not found in DataFrame



In [6]:
directories = [r"C:\Users\brend\Downloads\2025-06-20 Skin Tx Characterization\Unmixed\Control",
               r"C:\Users\brend\Downloads\2025-06-20 Skin Tx Characterization\Unmixed\Experimental"]
sample_paths = [directories[0] + r"\1 Spl.fcs", directories[0] + r"\1 LN.fcs", directories[0] + r"\2 Spl.fcs", directories[0] + r"\2 LN.fcs", directories[0] + r"\3 Spl.fcs", directories[0] + r"\3 LN.fcs",
                directories[1] + r"\1 Spl.fcs", directories[1] + r"\1 LN.fcs", directories[1] + r"\2 Spl.fcs", directories[1] + r"\2 LN.fcs", directories[1] + r"\3 Spl.fcs", directories[1] + r"\3 LN.fcs"]

In [60]:
ws = fk.Workspace(r"C:\Users\brend\Downloads\2025-06-20 Skin Tx Characterization\23-Jun-2025.wsp", fcs_samples=sample_paths)

In [63]:
sample = ws.get_samples()[0]

In [64]:
sample

Sample(v3.1, 1 LN.fcs, 14 channels, 36344 events)

In [36]:
ws.summary()

Unnamed: 0_level_0,samples,loaded_samples,gates,max_gate_depth
group_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
All Samples,12,12,18,6
Control,6,6,18,6
Experimental,6,6,18,6
LN,6,6,18,6
Spleen,6,6,18,6


In [None]:
ws.get_gate_hierarchy("1 LN.fcs")

In [76]:
ws.analyze_samples(group_name="Control", sample_id="2 LN.fcs")

In [None]:
ws.get_gating_results("2 LN.fcs")

<flowkit._models.gating_results.GatingResults at 0x296de8139b0>

In [82]:
result['full_path'] = result.apply(
    lambda row: "/".join(row['gate_path'] + (row['gate_name'],)),
    axis=1
)

In [83]:
result

Unnamed: 0,sample,gate_path,gate_name,gate_type,quadrant_parent,parent,count,absolute_percent,relative_percent,level,full_path
0,2 LN.fcs,"(root,)",Cells,PolygonGate,,root,89741,97.61247,97.61247,0,root/Cells
1,2 LN.fcs,"(root, Cells)",Singlets,PolygonGate,,Cells,86038,93.584668,95.873681,1,root/Cells/Singlets
2,2 LN.fcs,"(root, Cells, Singlets)",CD45.1+ CD4+,PolygonGate,,Singlets,578,0.628698,0.671796,2,root/Cells/Singlets/CD45.1+ CD4+
3,2 LN.fcs,"(root, Cells, Singlets)",CD45.1- CD4+,PolygonGate,,Singlets,68487,74.494213,79.600874,2,root/Cells/Singlets/CD45.1- CD4+
4,2 LN.fcs,"(root, Cells, Singlets, CD45.1+ CD4+)",CD4+ CD3+,PolygonGate,,CD45.1+ CD4+,514,0.559085,88.927336,3,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+
5,2 LN.fcs,"(root, Cells, Singlets, CD45.1- CD4+)",CD4+ CD3+,PolygonGate,,CD45.1- CD4+,68467,74.472459,99.970797,3,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+
6,2 LN.fcs,"(root, Cells, Singlets, CD45.1+ CD4+, CD4+ CD3+)",Naive,PolygonGate,,CD4+ CD3+,182,0.197964,35.40856,4,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Naive
7,2 LN.fcs,"(root, Cells, Singlets, CD45.1- CD4+, CD4+ CD3+)",Naive,PolygonGate,,CD4+ CD3+,44800,48.729551,65.432982,4,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Naive
8,2 LN.fcs,"(root, Cells, Singlets, CD45.1+ CD4+, CD4+ CD3+)",Tmem,PolygonGate,,CD4+ CD3+,46,0.050035,8.949416,4,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Tmem
9,2 LN.fcs,"(root, Cells, Singlets, CD45.1- CD4+, CD4+ CD3+)",Tmem,PolygonGate,,CD4+ CD3+,2963,3.222894,4.327632,4,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Tmem


In [79]:
result = ws.get_analysis_report()

In [86]:
summary = result.pivot(index = "sample", columns="full_path", values=["count", "absolute_percent", "relative_percent"])

summary.columns = [f"{col[1]} | {col[0]}" for col in summary.columns]

# Optional: reset index if you want 'sample' as a column
summary = summary.reset_index()

In [93]:
import polars as pl

In [97]:
summary = pl.DataFrame(summary)

In [101]:
metadata = pl.DataFrame([ws.get_keywords(sample.id) for sample in ws.get_samples()])

In [102]:
summary.join(metadata, left_on="sample", right_on = "$FIL")

sample,root/Cells | count,root/Cells/Singlets | count,root/Cells/Singlets/CD45.1+ CD4+ | count,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+ | count,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Naive | count,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Naive/Anergic | count,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Tmem | count,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Tmem/Anergic | count,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Treg | count,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Treg/Anergic | count,root/Cells/Singlets/CD45.1- CD4+ | count,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+ | count,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Naive | count,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Naive/Anergic | count,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Tmem | count,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Tmem/Anergic | count,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Treg | count,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Treg/Anergic | count,root/Cells | absolute_percent,root/Cells/Singlets | absolute_percent,root/Cells/Singlets/CD45.1+ CD4+ | absolute_percent,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+ | absolute_percent,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Naive | absolute_percent,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Naive/Anergic | absolute_percent,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Tmem | absolute_percent,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Tmem/Anergic | absolute_percent,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Treg | absolute_percent,root/Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Treg/Anergic | absolute_percent,root/Cells/Singlets/CD45.1- CD4+ | absolute_percent,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+ | absolute_percent,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Naive | absolute_percent,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Naive/Anergic | absolute_percent,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Tmem | absolute_percent,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Tmem/Anergic | absolute_percent,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Treg | absolute_percent,root/Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Treg/Anergic | absolute_percent,…,$P2S,$P3S,$P4S,$P5S,$P6S,$P7S,FILENAME,$P15N,$P15S,$P15R,$P16N,$P16S,$P16R,$P17N,$P17S,$P17R,$P18N,$P18S,$P18R,$P19N,$P19S,$P19R,$P20N,$P20S,$P20R,$P21N,$P21S,$P21R,Char. Date,MouseID,Sex,DoB,CageID,Group,Tissue,Transplant Date,Transplant Donor Strain
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""2 LN.fcs""",89741.0,86038.0,578.0,514.0,182.0,0.0,46.0,2.0,24.0,18.0,68487.0,68467.0,44800.0,54.0,2963.0,269.0,5368.0,2653.0,97.61247,93.584668,0.628698,0.559085,0.197964,0.0,0.050035,0.002175,0.026105,0.019579,74.494213,74.472459,48.729551,0.058737,3.222894,0.292595,5.838844,2.885703,…,"""""","""""","""""","""""","""""","""""","""""","""Comp-eFluor 450-A""","""FoxP3""","""4194304""","""Comp-BUV 805-A""","""CD44""","""4194304""","""Comp-Alexa Fluor 488-A""","""CD4""","""4194304""","""Comp-PE-A""","""CD73""","""4194304""","""Comp-PE-Cy7-A""","""CD45.1""","""4194304""","""Comp-APC-A""","""FR4""","""4194304""","""Comp-APC-eFluor 780-A""","""CD3""","""4194304""","""2025-06-20""","""2""","""Male""","""2025-03-19""","""04024""","""Control""","""LN""","""2025-05-30""","""Balb/c"""


In [88]:
ws.get_keywords()

TypeError: Workspace.get_keywords() missing 1 required positional argument: 'sample_id'

# Plotting Testing

In [None]:
from cytodataparser.plotting import categorical_plot
from cytodataparser import CytoGateParser
from cytodataparser.utils import helpers
from cytodataparser import save_to_json
import polars as pl

In [None]:
cgp = CytoGateParser.from_file(r"C:\Users\brend\Downloads\2025-06-20 Skin Tx Summary.xlsx")

In [None]:
cgp.add_samples(path = r"C:\Users\brend\Downloads\2025-06-20 Skin Tx Summary.xlsx")

In [None]:
cgp.find_samples(criteria={"MouseID": "== 2"})

In [None]:
cgp.get_nodes(terms=["Anergic"])

In [None]:
save_to_json(cgp, path="./Test.json")

In [None]:
categorical_plot(cgp, node=["Anergic"], sample_criteria=None, x="Group", color="node", plot_type="box", show_points=True, title="Percent Anergic Cells in Donor and Recipient Populations")

In [None]:
#TODO: Points in this case are outside of the violins
categorical_plot(cgp, node=["CD4+"], sample_criteria=None, x="MouseID", color="Lobe", plot_type="violin", show_points=True)

# Stats Testing

In [None]:
from cytodataparser.analysis import summary, testing

In [None]:
df = pl.read_excel(r"C:\Users\bfoley3\OneDrive - Johns Hopkins\Hybrid Thymus\Experimental Data\2025-05-14 Thymocyte Characterization\2025-05-14 Thymocyte Characterization.xlsx")
cgp = CytoGateParser(df)

In [None]:
summary.describe_metric(cgp, node=["Recipient", "CD4+"], sample_criteria={"Strain": "== B6"}, groupby="Lobe")

In [None]:
testing.run_ttest(cgp, node=["Donor", "CD4+", "5"], groupby="Strain")

In [None]:
testing.run_anova(cgp, node=["Donor", "CD4+", "5"], sample_criteria={"Strain": "== B6"}, groupby="MouseID")

In [None]:
testing.run_chi2_test(cgp, row_field="Strain", col_field="Lobe")

In [None]:
testing.run_correlation(cgp, node_a=["Donor", "CD4+", "5"], node_b=["Recipient", "CD4+", "5"])

In [None]:
testing.run_correlation(cgp, node_a=["Donor", "CD4+", "5"], metric=["Count", "pct_parent"])