# 2. Generating Aggregate Profiles

This notebook transforms single-cell morphological profiles from the CFReT pilot dataset into summary representations for downstream analysis. Aggregation reduces noise and enables robust comparisons between experimental conditions by collapsing hundreds or thousands of single-cell measurements into representative profiles.

Two levels of aggregation are generated:
1. **Replicate-level profiles**: Aggregate cells by well position, heart number, cell type, and treatment to create technical replicate profiles
2. **Consensus profiles**: Further aggregate replicates by heart type and treatment to generate condition-level consensus signatures

Here we used `pycytominer.aggregate()` to apply median aggregation to generate two profiles explained above. Then output profiles are saved as parquet files.

In [1]:
import sys
import pathlib

import polars as pl
from pycytominer import aggregate

sys.path.append("../../")
from utils.data_utils import split_meta_and_features

Setting input and output paths 

In [2]:
# setting data path for cfret-pilot dataset
cfret_profiles_path = pathlib.Path(
    "../0.download-data/data/sc-profiles/cfret/localhost230405150001_sc_feature_selected.parquet"
).resolve(strict=True)

# set results directory path
results_dir = pathlib.Path("./results").resolve()
results_dir.mkdir(exist_ok=True)

# make aggregate profile directory
aggregate_profiles_dir = results_dir / "aggregate_profiles"
aggregate_profiles_dir.mkdir(exist_ok=True)

In [3]:
# load in the cfret-pilot dataset
cfret_df = pl.read_parquet(cfret_profiles_path)

# add a column that indicates the heart and treatment added
cfret_df = cfret_df.with_columns(
    pl.concat_str(
        [
            pl.col("Metadata_cell_type"),
            pl.col("Metadata_treatment"),
        ],
        separator="_",
    ).alias("Metadata_heart_treatment")
)

# split feature space
cfret_meta, cfret_feats = split_meta_and_features(cfret_df)

# display
print(cfret_df.shape)
cfret_df.head()

(15793, 678)


Metadata_cell_id,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_cell_type,Metadata_heart_failure_type,Metadata_treatment,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cells_Location_Center_X,Metadata_Cells_Location_Center_Y,Metadata_Image_Count_Cells,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cells_Number_Object_Number,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Nuclei_Number_Object_Number,Metadata_Site,Cytoplasm_AreaShape_BoundingBoxMinimum_X,Cytoplasm_AreaShape_Compactness,Cytoplasm_AreaShape_Eccentricity,Cytoplasm_AreaShape_Extent,Cytoplasm_AreaShape_FormFactor,Cytoplasm_AreaShape_MajorAxisLength,Cytoplasm_AreaShape_MeanRadius,Cytoplasm_AreaShape_MinorAxisLength,Cytoplasm_AreaShape_Perimeter,Cytoplasm_AreaShape_Solidity,Cytoplasm_AreaShape_Zernike_0_0,Cytoplasm_AreaShape_Zernike_1_1,Cytoplasm_AreaShape_Zernike_2_0,Cytoplasm_AreaShape_Zernike_2_2,Cytoplasm_AreaShape_Zernike_3_1,Cytoplasm_AreaShape_Zernike_4_0,Cytoplasm_AreaShape_Zernike_4_2,…,Nuclei_Texture_DifferenceVariance_Actin_3_01_256,Nuclei_Texture_DifferenceVariance_Mitochondria_3_03_256,Nuclei_Texture_DifferenceVariance_PM_3_03_256,Nuclei_Texture_InfoMeas1_ER_3_00_256,Nuclei_Texture_InfoMeas1_ER_3_01_256,Nuclei_Texture_InfoMeas1_ER_3_02_256,Nuclei_Texture_InfoMeas1_ER_3_03_256,Nuclei_Texture_InfoMeas1_Hoechst_3_00_256,Nuclei_Texture_InfoMeas1_Hoechst_3_01_256,Nuclei_Texture_InfoMeas1_Hoechst_3_02_256,Nuclei_Texture_InfoMeas1_Hoechst_3_03_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_00_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_01_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_02_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_03_256,Nuclei_Texture_InfoMeas1_PM_3_00_256,Nuclei_Texture_InfoMeas1_PM_3_01_256,Nuclei_Texture_InfoMeas1_PM_3_02_256,Nuclei_Texture_InfoMeas1_PM_3_03_256,Nuclei_Texture_InfoMeas2_ER_3_01_256,Nuclei_Texture_InfoMeas2_ER_3_03_256,Nuclei_Texture_InfoMeas2_Hoechst_3_01_256,Nuclei_Texture_InfoMeas2_Hoechst_3_03_256,Nuclei_Texture_InfoMeas2_PM_3_01_256,Nuclei_Texture_InfoMeas2_PM_3_03_256,Nuclei_Texture_InverseDifferenceMoment_Actin_3_02_256,Nuclei_Texture_InverseDifferenceMoment_ER_3_01_256,Nuclei_Texture_InverseDifferenceMoment_ER_3_03_256,Nuclei_Texture_InverseDifferenceMoment_Mitochondria_3_03_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_01_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_03_256,Nuclei_Texture_SumEntropy_PM_3_01_256,Nuclei_Texture_SumVariance_ER_3_03_256,Nuclei_Texture_SumVariance_Hoechst_3_03_256,Nuclei_Texture_SumVariance_Mitochondria_3_01_256,Nuclei_Texture_SumVariance_PM_3_01_256,Metadata_heart_treatment
str,str,i64,i64,str,str,str,f64,f64,f64,f64,i64,i64,str,str,i64,i64,i64,i64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str
"""210e4376efde9d557a5c60029bdda6…","""B""",2,9,"""failing""","""rejected""","""DMSO""",221.046761,137.115493,246.6028,109.285755,40,1,"""localhost230405150001""","""B02""",1,1,6,6,"""f00""",-1.35494,0.841229,0.648883,-0.850138,-1.045214,1.298358,0.376165,0.935101,1.530228,-0.983617,-0.261031,-0.299817,-0.721977,0.944725,0.161074,0.532329,1.845864,…,-0.052719,0.797095,0.359081,-0.173336,0.300041,0.217945,-0.039774,0.488531,0.472164,0.28659,0.464359,0.501649,0.507623,1.076663,0.741941,-0.696022,-0.178762,0.186741,0.158222,0.341595,0.50487,-0.440604,-0.426966,0.194372,-0.035117,0.400021,-0.619206,-0.393448,0.961214,0.406068,0.374039,-0.280532,-0.158967,-0.344804,-0.263653,-0.305486,"""failing_DMSO"""
"""cef18f209640ef8ae98ec110cfdcb6…","""B""",2,9,"""failing""","""rejected""","""DMSO""",690.596142,183.067828,716.170091,177.132195,40,1,"""localhost230405150001""","""B02""",2,2,7,7,"""f00""",0.657107,-0.850399,-0.584931,2.090925,1.263259,-0.021031,1.627957,0.944161,-0.085511,1.475345,2.164761,-0.688462,1.215015,1.499086,-0.770667,1.012721,0.6791,…,-0.318777,-1.154168,-0.66473,0.134835,0.263514,-0.124309,0.634517,0.968512,0.859562,0.351144,0.914468,-2.508508,-2.389124,-1.80698,-2.121536,-0.231231,-0.763949,-1.055166,-0.258152,0.282319,0.048807,-0.981164,-1.0743,0.612996,0.290339,0.030854,-0.421502,-0.61852,-0.050925,0.424753,0.323462,-0.096856,-0.218001,-0.359297,2.621455,-0.175679,"""failing_DMSO"""
"""cca07fa581da808fdefe80f9c0542d…","""B""",2,9,"""failing""","""rejected""","""DMSO""",626.56149,206.923698,623.94374,199.90644,40,1,"""localhost230405150001""","""B02""",3,3,8,8,"""f00""",0.384287,-0.727344,0.399813,0.699568,0.778991,-0.192578,-0.166121,-0.185078,-0.620564,0.385325,0.41953,-1.35377,-0.189027,1.88019,-0.198823,0.77826,2.084304,…,-0.437225,0.097014,0.148712,-0.126239,0.315114,-0.682006,-0.952994,0.534521,0.448969,-0.512213,0.68761,-0.333052,-1.116806,-0.671374,-0.085583,0.565659,0.117809,-0.035232,0.340022,0.392109,0.906171,-0.637012,-0.912759,-0.139719,-0.319312,-0.119514,-0.62708,-0.213998,0.492022,0.783465,0.531513,-0.515924,-0.090464,-0.381751,-0.23489,-0.312005,"""failing_DMSO"""
"""c076728ed2ebba7c01e6adb4244b02…","""B""",2,9,"""failing""","""rejected""","""DMSO""",559.448583,220.68816,528.646623,196.955552,40,1,"""localhost230405150001""","""B02""",4,4,9,9,"""f00""",-0.08178,-0.31057,-1.984463,0.923396,-0.152527,-0.454748,0.485672,0.978143,0.075853,0.333035,1.036702,2.124015,-0.11271,-1.276017,0.663499,1.351768,-2.07981,…,-0.180273,0.154455,0.355861,-0.285138,0.187411,-0.401472,-1.323716,0.216479,0.694455,0.22334,0.272893,-1.610123,-1.983535,-1.990444,-1.759351,-0.667021,-1.511134,-1.70973,-1.025608,0.38988,0.970785,-0.723812,-0.240465,1.02861,0.817875,0.731123,-0.410279,0.066951,0.233985,0.697668,0.3868,0.216837,-0.078625,-0.345897,-0.148249,-0.205381,"""failing_DMSO"""
"""2e8f5f11d29d8f82baa39f573e2e51…","""B""",2,9,"""failing""","""rejected""","""DMSO""",909.019946,247.69434,897.965996,253.621836,40,1,"""localhost230405150001""","""B02""",5,5,10,10,"""f00""",1.384627,-0.236857,0.651571,-0.525561,-0.256208,-0.352022,-0.51073,-0.650514,-0.61187,-0.390602,-0.915644,0.274757,-0.807468,-0.263914,1.012877,0.333081,0.457026,…,-0.235359,-0.874322,1.036752,0.560328,0.087048,0.500935,1.024688,0.682356,0.703425,-0.559919,0.535412,-0.446346,-0.250839,-0.325067,-0.220781,0.135176,-0.068065,-1.328074,-0.471597,-0.313553,-1.011855,-0.921082,-0.718369,-0.1701,0.076669,0.151063,0.78411,0.796587,-0.833035,0.971781,0.96971,-0.859995,-0.437968,-0.375427,0.054053,-0.346036,"""failing_DMSO"""


Generating aggregate profiles at the replicate level 

In [4]:
aggregate(
    population_df=cfret_df.to_pandas(),
    strata=[
        "Metadata_heart_treatment",
        "Metadata_WellRow",
        "Metadata_WellCol",
        "Metadata_heart_number",
        "Metadata_cell_type",
        "Metadata_treatment",
    ],
    features=cfret_feats,
    operation="median",
    output_type="parquet",
    output_file=(aggregate_profiles_dir / "cfret_replicate_profiles.parquet").resolve(),
)

PosixPath('/home/erikserrano/Projects/buscar/notebooks/2.cfret-analysis/results/aggregate_profiles/cfret_replicate_profiles.parquet')

Generating consensus profiles of of the treatment and heart type 

In [5]:
# aggregating profiles by heart and treatment
aggregate(
    population_df=cfret_df.to_pandas(),
    strata=["Metadata_heart_treatment", "Metadata_cell_type", "Metadata_treatment"],
    features=cfret_feats,
    operation="median",
    output_type="parquet",
    output_file=(aggregate_profiles_dir / "cfret_consensus_profiles.parquet").resolve(),
)

PosixPath('/home/erikserrano/Projects/buscar/notebooks/2.cfret-analysis/results/aggregate_profiles/cfret_consensus_profiles.parquet')