In [7]:
import sys
import os
import polars as pl
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
from cytodataparser import CytoGateParser

# Structures/Core Testing

## GateNode Testing

In [8]:
from cytodataparser.structures import GateNode

In [9]:
root = GateNode("Root")
child = GateNode("Root/Child", parent=root)
root.add_child(child)

assert root.is_root()
assert not root.is_leaf()
assert child.is_leaf()
assert child.depth() == 1
assert root.height() == 1
assert child.parent == root
assert root.children == [child]

## GateTree Testing

In [10]:
from cytodataparser.structures import GateTree
import polars as pl

In [11]:
row = pl.DataFrame({
    "Cells | Count": [1000, 10],
    "Cells/CD4 | Count": [600, 1],
    "Cells/CD4/Living | Count": [500, 1],
    "Cells/CD4/Living/Next | Count": [300, 1],
    "Cells/CD4/Living/Next/No | Count": [300, 1],
    "Cells/CD8 | Count": [400, 20],
}).row(0, named=True)
tree = GateTree(row)
assert tree.root.name == "Cells"
assert "Cells/CD4" in tree.nodes
assert tree.get_nodes("Cells/CD8")[0].measures["Count"] == 400
assert tree.get_nodes("Cells/CD8")[0].measures["pct_parent"] == 400 / 1000 * 100
assert tree.max_depth() == 4
assert tree.get_nodes("Cells/CD4/Ungated")[0].measures["Count"] == 100


## Test Utils

In [12]:
from cytodataparser.utils.predicates import parse_string_condition, from_range, matches_regex

In [13]:
cond = parse_string_condition("> 10 and < 20")
assert cond(15)
assert not cond(25)

cond = parse_string_condition("== 10")
assert cond(10)
assert not cond(15)

cond = parse_string_condition("<= 10")
assert cond(9)
assert not cond(15)

cond = parse_string_condition(">= 10")
assert not cond(9)
assert cond(15)

cond = parse_string_condition("!= 10")
assert cond(9)
assert not cond(10)

cond = parse_string_condition("!= Hello")
assert not cond("Hello")
assert cond("Goodbye")

cond = from_range(range(10, 20))
assert cond(15)
assert not cond(25)

cond = matches_regex(r"^AB\d{3}Z$")
assert cond("AB123Z")
assert not cond("AB12Z")

## Test Core

In [14]:
from cytodataparser import CytoGateParser

In [17]:
parser.samples

MouseID,Strain,Cells | Count,Cells/CD4 | Count
str,str,i64,i64
"""A""","""B6""",1000,500
"""B""","""F1""",900,300


In [18]:
df = pl.DataFrame({
    "MouseID": ["A", "B"],
    "Strain": ["B6", "F1"],
    "Cells | Count": [1000, 900],
    "Cells/CD4 | Count": [500, 300],
})
parser = CytoGateParser.from_xlsx(df)
assert len(parser) == 2
assert parser.get_metadata(0)["Strain"] == "B6"
match = parser.find_samples({"Strain": "== F1"})
assert len(match) == 1

TypeError: object of type 'NoneType' has no len()

# Plotting Testing

In [1]:
from cytodataparser.plotting import categorical_plot
from cytodataparser import CytoGateParser
from cytodataparser.utils import helpers
from cytodataparser import load_from_xlsx, save_to_json, load_from_json
import polars as pl

In [5]:
df = pl.read_excel(r"C:\Users\brend\Downloads\2025-06-20 Skin Tx Summary.xlsx")

In [2]:
cgp = load_from_xlsx(r"C:\Users\brend\Downloads\2025-06-20 Skin Tx Summary.xlsx")

In [2]:
cgp= load_from_json("./Test.json")

In [8]:
cgp.find_samples(criteria={"MouseID": "== 2"})

[<cytodataparser.structures.sample.Sample at 0x1ae5ff32090>,
 <cytodataparser.structures.sample.Sample at 0x1ae5fe4f8b0>,
 <cytodataparser.structures.sample.Sample at 0x1ae5fe4fa10>,
 <cytodataparser.structures.sample.Sample at 0x1ae2cc531b0>]

In [9]:
cgp.get_nodes(terms=["Anergic"])

[{'metadata': {'Char. Date': '2025-06-20',
   'MouseID': 1,
   'Sex': 'Male',
   'DoB': '2025-03-19',
   'CageID': 4023,
   'Group': 'Experimental',
   'Tissue': 'LN',
   'Transplant Date': '2025-05-30',
   'Transplant Donor Strain': 'Balb/c'},
  'nodes': [Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Th/Anergic,
   Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Naive/Anergic,
   Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Tmem/Anergic,
   Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Tmem/Anergic]},
 {'metadata': {'Char. Date': '2025-06-20',
   'MouseID': 1,
   'Sex': 'Male',
   'DoB': '2025-03-19',
   'CageID': 4024,
   'Group': 'Control',
   'Tissue': 'LN',
   'Transplant Date': '2025-05-30',
   'Transplant Donor Strain': 'Balb/c'},
  'nodes': [Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Th/Anergic,
   Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Naive/Anergic,
   Cells/Singlets/CD45.1+ CD4+/CD4+ CD3+/Tmem/Anergic,
   Cells/Singlets/CD45.1- CD4+/CD4+ CD3+/Tmem/Anergic]},
 {'metadata': {'Char. Date': '2025-06-20',
   'MouseID'

In [6]:
save_to_json(cgp, path="./Test.json")

In [None]:
categorical_plot(cgp, node=["Anergic"], sample_criteria=None, x="Group", color="node", plot_type="box", show_points=True, title="Percent Anergic Cells in Donor and Recipient Populations")

Running t-test


In [None]:
#TODO: Points in this case are outside of the violins
categorical_plot(cgp, node=["CD4+"], sample_criteria=None, x="MouseID", color="Lobe", plot_type="violin", show_points=True)





# Stats Testing

In [2]:
from cytodataparser.analysis import summary, testing

In [3]:
df = pl.read_excel(r"C:\Users\bfoley3\OneDrive - Johns Hopkins\Hybrid Thymus\Experimental Data\2025-05-14 Thymocyte Characterization\2025-05-14 Thymocyte Characterization.xlsx")
cgp = CytoGateParser(df)

In [4]:
summary.describe_metric(cgp, node=["Recipient", "CD4+"], sample_criteria={"Strain": "== B6"}, groupby="Lobe")

{'gates': ['Cells/Singlets/Recipient/CD4+'],
 'metric': 'pct_parent',
 'n_total': 8,
 'groupby': 'Lobe',
 'n_per_group': {('R',): 4, ('L',): 4},
 'group_stats': {('R',): {'mean': 49.555813857222375,
   'std': 39.66454157939692,
   'median': 45.165742394900754,
   'q25': 16.25884726201142,
   'q75': 78.46270899011171},
  ('L',): {'mean': 61.707266364848934,
   'std': 36.49329105164335,
   'median': 71.02791332203867,
   'q25': 44.98847061526951,
   'q75': 87.74670907161811}}}

In [6]:
testing.run_ttest(cgp, node=["Donor", "CD4+", "5"], groupby="Strain")

{'test': 'welch_t_test',
 'metric': 'pct_parent',
 'paired': False,
 'groupby': 'Strain',
 'groups': ['F1', 'B6'],
 'n_per_group': {'F1': 4, 'B6': 8},
 'statistic': -8.682978980913258,
 'p_value': 5.862985332299878e-06,
 'group_means': {'F1': 4.26308769531631, 'B6': 37.99310460333008},
 'group_stds': {'F1': 4.4574801494159475, 'B6': 8.999084091562345},
 'significant': True}

In [7]:
testing.run_anova(cgp, node=["Donor", "CD4+", "5"], sample_criteria={"Strain": "== B6"}, groupby="MouseID")



{'test': 'kruskal',
 'gate': ['Cells/Singlets/Donor/CD4+/5'],
 'metric': 'pct_parent',
 'groupby': 'MouseID',
 'groups': ['L', 'LL', 'LR', 'R'],
 'n_per_group': {'L': 2, 'LL': 2, 'LR': 2, 'R': 2},
 'statistic': 0.8333333333333321,
 'p_value': 0.8414786391315312,
 'group_means': {'L': 40.06297214390288,
  'LL': 32.80146663488259,
  'LR': 37.80006942919051,
  'R': 41.30791020534437},
 'group_stds': {'L': 16.137657904664685,
  'LL': 1.7461284996842408,
  'LR': 13.185657428446303,
  'R': 6.710049316230868},
 'assumption_check': {'normality': True, 'equal_variance': False},
 'posthoc': {'test': 'dunn',
  'comparisons': [{'group1': 'L',
    'group2': 'LL',
    'p_value': 1.0,
    'mean_diff': 7.261505509020296},
   {'group1': 'L',
    'group2': 'LR',
    'p_value': 1.0,
    'mean_diff': 2.262902714712375},
   {'group1': 'L',
    'group2': 'R',
    'p_value': 1.0,
    'mean_diff': -1.2449380614414878},
   {'group1': 'LL',
    'group2': 'LR',
    'p_value': 1.0,
    'mean_diff': -4.99860279430

In [8]:
testing.run_chi2_test(cgp, row_field="Strain", col_field="Lobe")



{'test': 'chi2',
 'row_field': 'Strain',
 'col_field': 'Lobe',
 'row_labels': ['B6', 'F1'],
 'col_labels': ['L', 'R'],
 'chi2': 0.0,
 'p_value': 1.0,
 'degrees_of_freedom': 1,
 'observed': [[4, 4], [2, 2]],
 'expected': [[4.0, 4.0], [2.0, 2.0]]}

In [9]:
testing.run_correlation(cgp, node_a=["Donor", "CD4+", "5"], node_b=["Recipient", "CD4+", "5"])

{'test': 'correlation',
 'method': 'pearson',
 'gates': ['Donor/CD4+/5', 'Recipient/CD4+/5'],
 'metrics': 'Count',
 'n': 12,
 'correlation': -0.143888658994571,
 'p_value': 0.6554936985187627}

In [10]:
testing.run_correlation(cgp, node_a=["Donor", "CD4+", "5"], metric=["Count", "pct_parent"])

{'test': 'correlation',
 'method': 'pearson',
 'gates': ['Donor/CD4+/5:Count', 'Donor/CD4+/5:pct_parent'],
 'metrics': 'Count vs pct_parent',
 'n': 12,
 'correlation': 0.6029104493145697,
 'p_value': 0.03797428767841035}