## 3 Generating centroid profiles
In this notebook, we identify the centroid for each cluster found in the single-cell profiles after running the buscar clustering module.

The centroid is the representative cell that best captures the distribution of cells within a cluster.

In [1]:
import sys
import pathlib

import polars as pl
from pycytominer import aggregate

sys.path.append("../../")
from utils.data_utils import split_meta_and_features

Setting input and output paths

In [2]:
# setting data path for cfret-pilot dataset
cfret_profiles_path = pathlib.Path(
    "../0.download-data/data/sc-profiles/cfret/localhost230405150001_sc_feature_selected.parquet"
).resolve(strict=True)

# setting cluster labels path
cluster_labels_path = pathlib.Path(
    "./results/clusters/cfret_pilot_cluster_labels.parquet"
).resolve(strict=True)

# setting outpaths for results
results_dir = pathlib.Path("./results").resolve()
results_dir.mkdir(exist_ok=True)

# setting outpath for centroids
centroids_dir = (results_dir / "centroids").resolve()
centroids_dir.mkdir(exist_ok=True)

Loading profiles

In [3]:
# loading in profiles and add cluster labels to profiles dataframe
cfret_df = pl.read_parquet(cfret_profiles_path).join(
    pl.read_parquet(cluster_labels_path), on="Metadata_cell_id", how="inner"
)

# add a column that indicates the heart and treatment added
cfret_df = cfret_df.with_columns(
    pl.concat_str(
        [
            pl.col("Metadata_cell_type"),
            pl.col("Metadata_treatment"),
        ],
        separator="_",
    ).alias("Metadata_heart_treatment")
)

# split feature space
cfret_meta, cfret_feats = split_meta_and_features(cfret_df)

# display
print(cfret_df.shape)


cfret_df.select(cfret_meta + cfret_feats).head()

(15793, 682)


Metadata_cell_id,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_cell_type,Metadata_heart_failure_type,Metadata_treatment,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cells_Location_Center_X,Metadata_Cells_Location_Center_Y,Metadata_Image_Count_Cells,Metadata_ImageNumber,Metadata_Plate,Metadata_Well,Metadata_Cells_Number_Object_Number,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Nuclei_Number_Object_Number,Metadata_Site,Metadata_cluster_id,Metadata_cluster_n_cells,Metadata_treatment_n_cells,Metadata_cluster_ratio,Metadata_heart_treatment,Cytoplasm_AreaShape_BoundingBoxMinimum_X,Cytoplasm_AreaShape_Compactness,Cytoplasm_AreaShape_Eccentricity,Cytoplasm_AreaShape_Extent,Cytoplasm_AreaShape_FormFactor,Cytoplasm_AreaShape_MajorAxisLength,Cytoplasm_AreaShape_MeanRadius,Cytoplasm_AreaShape_MinorAxisLength,Cytoplasm_AreaShape_Perimeter,Cytoplasm_AreaShape_Solidity,Cytoplasm_AreaShape_Zernike_0_0,Cytoplasm_AreaShape_Zernike_1_1,…,Nuclei_Texture_DifferenceEntropy_PM_3_00_256,Nuclei_Texture_DifferenceVariance_Actin_3_01_256,Nuclei_Texture_DifferenceVariance_Mitochondria_3_03_256,Nuclei_Texture_DifferenceVariance_PM_3_03_256,Nuclei_Texture_InfoMeas1_ER_3_00_256,Nuclei_Texture_InfoMeas1_ER_3_01_256,Nuclei_Texture_InfoMeas1_ER_3_02_256,Nuclei_Texture_InfoMeas1_ER_3_03_256,Nuclei_Texture_InfoMeas1_Hoechst_3_00_256,Nuclei_Texture_InfoMeas1_Hoechst_3_01_256,Nuclei_Texture_InfoMeas1_Hoechst_3_02_256,Nuclei_Texture_InfoMeas1_Hoechst_3_03_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_00_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_01_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_02_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_03_256,Nuclei_Texture_InfoMeas1_PM_3_00_256,Nuclei_Texture_InfoMeas1_PM_3_01_256,Nuclei_Texture_InfoMeas1_PM_3_02_256,Nuclei_Texture_InfoMeas1_PM_3_03_256,Nuclei_Texture_InfoMeas2_ER_3_01_256,Nuclei_Texture_InfoMeas2_ER_3_03_256,Nuclei_Texture_InfoMeas2_Hoechst_3_01_256,Nuclei_Texture_InfoMeas2_Hoechst_3_03_256,Nuclei_Texture_InfoMeas2_PM_3_01_256,Nuclei_Texture_InfoMeas2_PM_3_03_256,Nuclei_Texture_InverseDifferenceMoment_Actin_3_02_256,Nuclei_Texture_InverseDifferenceMoment_ER_3_01_256,Nuclei_Texture_InverseDifferenceMoment_ER_3_03_256,Nuclei_Texture_InverseDifferenceMoment_Mitochondria_3_03_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_01_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_03_256,Nuclei_Texture_SumEntropy_PM_3_01_256,Nuclei_Texture_SumVariance_ER_3_03_256,Nuclei_Texture_SumVariance_Hoechst_3_03_256,Nuclei_Texture_SumVariance_Mitochondria_3_01_256,Nuclei_Texture_SumVariance_PM_3_01_256
str,str,i64,i64,str,str,str,f64,f64,f64,f64,i64,i64,str,str,i64,i64,i64,i64,str,cat,u32,u32,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""210e4376efde9d557a5c60029bdda6…","""B""",2,9,"""failing""","""rejected""","""DMSO""",221.046761,137.115493,246.6028,109.285755,40,1,"""localhost230405150001""","""B02""",1,1,6,6,"""f00""","""DMSO_heart_9_louvain_0""",9142,9153,0.998798,"""failing_DMSO""",-1.35494,0.841229,0.648883,-0.850138,-1.045214,1.298358,0.376165,0.935101,1.530228,-0.983617,-0.261031,-0.299817,…,-0.740763,-0.052719,0.797095,0.359081,-0.173336,0.300041,0.217945,-0.039774,0.488531,0.472164,0.28659,0.464359,0.501649,0.507623,1.076663,0.741941,-0.696022,-0.178762,0.186741,0.158222,0.341595,0.50487,-0.440604,-0.426966,0.194372,-0.035117,0.400021,-0.619206,-0.393448,0.961214,0.406068,0.374039,-0.280532,-0.158967,-0.344804,-0.263653,-0.305486
"""cef18f209640ef8ae98ec110cfdcb6…","""B""",2,9,"""failing""","""rejected""","""DMSO""",690.596142,183.067828,716.170091,177.132195,40,1,"""localhost230405150001""","""B02""",2,2,7,7,"""f00""","""DMSO_heart_9_louvain_0""",9142,9153,0.998798,"""failing_DMSO""",0.657107,-0.850399,-0.584931,2.090925,1.263259,-0.021031,1.627957,0.944161,-0.085511,1.475345,2.164761,-0.688462,…,0.037684,-0.318777,-1.154168,-0.66473,0.134835,0.263514,-0.124309,0.634517,0.968512,0.859562,0.351144,0.914468,-2.508508,-2.389124,-1.80698,-2.121536,-0.231231,-0.763949,-1.055166,-0.258152,0.282319,0.048807,-0.981164,-1.0743,0.612996,0.290339,0.030854,-0.421502,-0.61852,-0.050925,0.424753,0.323462,-0.096856,-0.218001,-0.359297,2.621455,-0.175679
"""cca07fa581da808fdefe80f9c0542d…","""B""",2,9,"""failing""","""rejected""","""DMSO""",626.56149,206.923698,623.94374,199.90644,40,1,"""localhost230405150001""","""B02""",3,3,8,8,"""f00""","""DMSO_heart_9_louvain_0""",9142,9153,0.998798,"""failing_DMSO""",0.384287,-0.727344,0.399813,0.699568,0.778991,-0.192578,-0.166121,-0.185078,-0.620564,0.385325,0.41953,-1.35377,…,-0.439591,-0.437225,0.097014,0.148712,-0.126239,0.315114,-0.682006,-0.952994,0.534521,0.448969,-0.512213,0.68761,-0.333052,-1.116806,-0.671374,-0.085583,0.565659,0.117809,-0.035232,0.340022,0.392109,0.906171,-0.637012,-0.912759,-0.139719,-0.319312,-0.119514,-0.62708,-0.213998,0.492022,0.783465,0.531513,-0.515924,-0.090464,-0.381751,-0.23489,-0.312005
"""c076728ed2ebba7c01e6adb4244b02…","""B""",2,9,"""failing""","""rejected""","""DMSO""",559.448583,220.68816,528.646623,196.955552,40,1,"""localhost230405150001""","""B02""",4,4,9,9,"""f00""","""DMSO_heart_9_louvain_0""",9142,9153,0.998798,"""failing_DMSO""",-0.08178,-0.31057,-1.984463,0.923396,-0.152527,-0.454748,0.485672,0.978143,0.075853,0.333035,1.036702,2.124015,…,-0.370192,-0.180273,0.154455,0.355861,-0.285138,0.187411,-0.401472,-1.323716,0.216479,0.694455,0.22334,0.272893,-1.610123,-1.983535,-1.990444,-1.759351,-0.667021,-1.511134,-1.70973,-1.025608,0.38988,0.970785,-0.723812,-0.240465,1.02861,0.817875,0.731123,-0.410279,0.066951,0.233985,0.697668,0.3868,0.216837,-0.078625,-0.345897,-0.148249,-0.205381
"""2e8f5f11d29d8f82baa39f573e2e51…","""B""",2,9,"""failing""","""rejected""","""DMSO""",909.019946,247.69434,897.965996,253.621836,40,1,"""localhost230405150001""","""B02""",5,5,10,10,"""f00""","""DMSO_heart_9_louvain_0""",9142,9153,0.998798,"""failing_DMSO""",1.384627,-0.236857,0.651571,-0.525561,-0.256208,-0.352022,-0.51073,-0.650514,-0.61187,-0.390602,-0.915644,0.274757,…,-0.784562,-0.235359,-0.874322,1.036752,0.560328,0.087048,0.500935,1.024688,0.682356,0.703425,-0.559919,0.535412,-0.446346,-0.250839,-0.325067,-0.220781,0.135176,-0.068065,-1.328074,-0.471597,-0.313553,-1.011855,-0.921082,-0.718369,-0.1701,0.076669,0.151063,0.78411,0.796587,-0.833035,0.971781,0.96971,-0.859995,-0.437968,-0.375427,0.054053,-0.346036


We use **median aggregation** to generate centroid profiles for each cluster. For each cluster, we calculate the component-wise median across all cells to create a synthetic representative profile that captures the central tendency. This approach is robust to outliers, consistent with replicate and consensus profile generation workflow, and works well for high-dimensional morphological features.

In [4]:
# split metadata and features
cfret_meta, cfret_feats = split_meta_and_features(cfret_df)

print(f"Total cells: {len(cfret_df)}")
print(f"Number of features: {len(cfret_feats)}")
print(f"Unique clusters: {cfret_df['Metadata_cluster_id'].n_unique()}")

Total cells: 15793
Number of features: 657
Unique clusters: 9


Save centroid profiles

In [5]:
# aggregate by cluster using median to generate centroid profiles
centroids_df = aggregate(
    population_df=cfret_df.to_pandas(),
    strata=["Metadata_cluster_id"],
    features=cfret_feats,
    operation="median",
)

# convert back to polars
centroids_df = pl.from_pandas(centroids_df)

print(f"Total centroids generated: {len(centroids_df)}")
print(f"Centroid shape: {centroids_df.shape}")
centroids_df

  population_df = population_df.groupby(strata, dropna=False)  # type: ignore[assignment]


Total centroids generated: 9
Centroid shape: (9, 658)


Metadata_cluster_id,Cytoplasm_AreaShape_BoundingBoxMinimum_X,Cytoplasm_AreaShape_Compactness,Cytoplasm_AreaShape_Eccentricity,Cytoplasm_AreaShape_Extent,Cytoplasm_AreaShape_FormFactor,Cytoplasm_AreaShape_MajorAxisLength,Cytoplasm_AreaShape_MeanRadius,Cytoplasm_AreaShape_MinorAxisLength,Cytoplasm_AreaShape_Perimeter,Cytoplasm_AreaShape_Solidity,Cytoplasm_AreaShape_Zernike_0_0,Cytoplasm_AreaShape_Zernike_1_1,Cytoplasm_AreaShape_Zernike_2_0,Cytoplasm_AreaShape_Zernike_2_2,Cytoplasm_AreaShape_Zernike_3_1,Cytoplasm_AreaShape_Zernike_4_0,Cytoplasm_AreaShape_Zernike_4_2,Cytoplasm_AreaShape_Zernike_5_1,Cytoplasm_AreaShape_Zernike_5_3,Cytoplasm_AreaShape_Zernike_6_0,Cytoplasm_AreaShape_Zernike_6_2,Cytoplasm_AreaShape_Zernike_6_4,Cytoplasm_AreaShape_Zernike_7_1,Cytoplasm_AreaShape_Zernike_7_3,Cytoplasm_AreaShape_Zernike_7_5,Cytoplasm_AreaShape_Zernike_8_0,Cytoplasm_AreaShape_Zernike_8_2,Cytoplasm_AreaShape_Zernike_8_4,Cytoplasm_AreaShape_Zernike_8_6,Cytoplasm_AreaShape_Zernike_9_1,Cytoplasm_AreaShape_Zernike_9_3,Cytoplasm_AreaShape_Zernike_9_5,Cytoplasm_AreaShape_Zernike_9_7,Cytoplasm_Correlation_Correlation_Actin_Hoechst,Cytoplasm_Correlation_Correlation_Actin_Mitochondria,Cytoplasm_Correlation_Correlation_Actin_PM,…,Nuclei_Texture_DifferenceEntropy_PM_3_00_256,Nuclei_Texture_DifferenceVariance_Actin_3_01_256,Nuclei_Texture_DifferenceVariance_Mitochondria_3_03_256,Nuclei_Texture_DifferenceVariance_PM_3_03_256,Nuclei_Texture_InfoMeas1_ER_3_00_256,Nuclei_Texture_InfoMeas1_ER_3_01_256,Nuclei_Texture_InfoMeas1_ER_3_02_256,Nuclei_Texture_InfoMeas1_ER_3_03_256,Nuclei_Texture_InfoMeas1_Hoechst_3_00_256,Nuclei_Texture_InfoMeas1_Hoechst_3_01_256,Nuclei_Texture_InfoMeas1_Hoechst_3_02_256,Nuclei_Texture_InfoMeas1_Hoechst_3_03_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_00_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_01_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_02_256,Nuclei_Texture_InfoMeas1_Mitochondria_3_03_256,Nuclei_Texture_InfoMeas1_PM_3_00_256,Nuclei_Texture_InfoMeas1_PM_3_01_256,Nuclei_Texture_InfoMeas1_PM_3_02_256,Nuclei_Texture_InfoMeas1_PM_3_03_256,Nuclei_Texture_InfoMeas2_ER_3_01_256,Nuclei_Texture_InfoMeas2_ER_3_03_256,Nuclei_Texture_InfoMeas2_Hoechst_3_01_256,Nuclei_Texture_InfoMeas2_Hoechst_3_03_256,Nuclei_Texture_InfoMeas2_PM_3_01_256,Nuclei_Texture_InfoMeas2_PM_3_03_256,Nuclei_Texture_InverseDifferenceMoment_Actin_3_02_256,Nuclei_Texture_InverseDifferenceMoment_ER_3_01_256,Nuclei_Texture_InverseDifferenceMoment_ER_3_03_256,Nuclei_Texture_InverseDifferenceMoment_Mitochondria_3_03_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_01_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_03_256,Nuclei_Texture_SumEntropy_PM_3_01_256,Nuclei_Texture_SumVariance_ER_3_03_256,Nuclei_Texture_SumVariance_Hoechst_3_03_256,Nuclei_Texture_SumVariance_Mitochondria_3_01_256,Nuclei_Texture_SumVariance_PM_3_01_256
cat,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""DMSO_heart_9_louvain_0""",-0.017365,-0.375948,0.028875,0.200863,-0.04977,-0.228398,0.046875,0.063641,-0.251773,0.202299,0.216303,0.032826,0.18301,0.03706,0.0231,-0.16592,-0.007158,0.028768,-0.048361,-0.142402,-0.033751,-0.093717,-0.005448,-0.063685,-0.063426,-0.149138,-0.033556,-0.129697,-0.116366,-0.05216,-0.084436,-0.103767,-0.092716,-0.06537,0.482194,0.064856,…,-0.02033,-0.471746,-0.436971,-0.282468,-0.135781,-0.088347,-0.131908,-0.059839,0.184521,0.321721,0.178624,0.3202,-0.084185,-0.041163,-0.064363,-0.021267,-0.345281,-0.249803,-0.31299,-0.232883,0.493085,0.481681,-0.209778,-0.211974,0.442342,0.437698,-0.508997,-0.369874,-0.374387,-0.143125,0.106185,0.091377,0.125246,-0.14294,-0.321629,-0.179205,-0.172281
"""DMSO_heart_9_louvain_1""",0.395654,2.378332,-0.985671,-2.826533,-1.423156,-1.207654,-2.215587,-0.621809,-1.44509,-3.881097,-2.090087,-1.561002,-1.840888,-1.807643,-1.559165,-1.006071,-0.817123,-1.440496,-1.233745,-0.89177,-1.22465,-1.198378,-1.218086,-1.310631,-1.194103,-0.893521,-1.153585,-0.154594,-1.371785,-1.10184,-0.817372,-0.981738,-0.956727,-2.309949,0.034426,-2.595523,…,-0.081897,-0.263418,0.51143,-0.438628,-0.221524,-0.549879,-0.411148,-0.03664,-0.080766,0.043208,-0.553022,-0.102619,0.457555,0.136955,0.292389,0.493841,-1.226464,-1.13966,-1.176091,-0.838615,0.474993,0.2453,0.186422,0.098895,1.088297,0.968301,0.341102,0.084556,-0.04919,0.648268,0.28435,0.251995,0.241521,-0.231621,-0.320909,-0.257407,-0.016748
"""TGFRi_heart_9_leiden_0""",0.05084,-0.512712,0.122043,0.21409,0.206682,-0.534524,-0.363768,-0.438155,-0.590974,0.305995,0.184902,0.022715,0.101176,0.063108,0.007036,-0.166722,-0.037873,0.000546,-0.074703,-0.159134,-0.037196,-0.107186,-0.073931,-0.043619,-0.065998,-0.190028,-0.022159,-0.039249,-0.094816,-0.06538,-0.027334,-0.023891,-0.121467,0.111665,0.073403,0.191201,…,0.119815,-0.400729,-0.582538,-0.489821,-0.043447,0.000504,-0.038286,-0.000653,0.01419,0.03898,0.015632,0.066566,-0.094978,-0.032138,-0.027835,-0.012133,0.043169,0.061452,0.083777,0.110605,0.369934,0.366713,0.209091,0.197681,0.280064,0.272913,-0.137388,-0.20213,-0.220531,-0.354877,-0.222919,-0.215813,0.213626,-0.224205,-0.255745,-0.15432,-0.180122
"""TGFRi_heart_9_leiden_1""",0.069786,-0.469497,-0.131615,0.175795,0.118696,-0.845099,-0.725856,-0.570358,-0.836988,0.056,0.246278,-0.048875,-0.330748,0.230854,-0.017678,0.372895,0.004617,0.342574,-0.063696,0.096023,0.243625,0.027136,0.049719,0.040431,-0.057737,-0.050785,0.091417,0.025771,-0.067395,0.005561,0.161793,0.046476,-0.160186,0.124813,0.048712,0.128658,…,1.17069,-0.473631,-0.984541,-1.258119,-0.189699,-0.127408,-0.101198,-0.195698,-1.802566,-1.904054,-1.782106,-1.955272,-0.416038,-0.266277,-0.287373,-0.351814,-0.401621,-0.40268,-0.301852,-0.472552,0.714848,0.76774,1.747478,1.761503,1.029449,1.058242,-0.533105,-0.889939,-0.853182,-1.13172,-1.359421,-1.329124,1.571369,0.218984,1.479355,0.128419,0.631976
"""TGFRi_heart_9_leiden_2""",-0.203034,-0.180182,0.178097,0.186032,-0.327188,0.355701,0.520199,0.679824,0.836699,0.287773,0.316485,0.093614,0.651911,-0.134109,-7.8e-05,-0.390455,0.306842,-0.355661,0.165521,-0.29452,-0.282744,-0.094311,-0.484958,-0.589681,-0.163747,-0.462171,-0.155039,-0.064892,-0.003906,-0.194986,-0.552613,-0.397471,-0.016963,0.1438,0.108294,0.136585,…,-0.809491,-0.380828,-0.08169,0.370777,-0.09104,-0.220186,-0.187921,-0.186919,1.203534,1.167895,1.360482,1.094222,0.04329,0.140568,-0.143015,0.061989,0.140047,0.027957,0.18681,0.072304,0.052684,0.158956,-1.79827,-1.617563,-0.042937,-0.130666,-0.108145,0.339422,0.410049,0.234164,0.689465,0.738672,-0.430098,-0.318539,-0.397507,-0.206243,-0.323603
"""DMSO_heart_11_louvain_0""",-0.089359,-0.048628,0.77182,-0.552878,-0.475073,0.780666,-0.039416,-0.100798,0.300961,-0.320947,-0.787942,-0.462016,-0.488334,-0.381523,-0.378915,-0.481662,-0.214635,-0.450028,-0.388782,-0.465964,-0.414834,-0.095099,-0.419727,-0.344907,-0.26114,-0.423281,-0.337739,-0.149691,0.007461,-0.36725,-0.289816,-0.204504,-0.195897,0.107897,-0.979987,-0.248679,…,-0.748233,2.689132,0.438459,0.808148,0.0239,0.079622,0.002143,0.108313,0.57544,0.451501,0.536075,0.458506,0.071866,0.142473,-0.026739,0.160575,0.628621,0.679068,0.576462,0.661105,-0.148037,-0.125223,-0.252605,-0.258211,-0.773978,-0.773082,1.819476,0.727629,0.701551,0.809275,0.711707,0.681499,-0.914648,-0.409342,-0.294257,-0.251505,-0.381779
"""DMSO_heart_11_louvain_1""",-0.185982,-0.704415,-0.06179,0.406181,0.704153,-1.013965,-0.709926,-0.77931,-1.024892,0.560955,0.394207,0.082476,0.034459,0.219723,0.118281,0.23893,-0.066812,0.406346,-0.180088,0.164706,0.241641,-0.125471,0.140757,0.156625,-0.14521,0.124241,0.175212,0.02932,-0.167871,0.0212,0.089025,0.059247,-0.112633,0.99881,0.325866,1.094447,…,0.907871,0.00493,-0.662954,-0.927316,-0.128128,-0.217484,-0.173287,-0.425216,-0.64718,-0.742306,-0.754983,-0.931694,-0.294991,-0.292186,-0.488872,-0.607545,0.160981,-0.092709,-0.061026,-0.306624,0.812925,0.887689,1.14267,1.216825,0.664334,0.820288,0.862601,-1.21605,-1.147641,-0.438408,-1.078519,-0.800369,0.818534,0.452429,0.200085,-0.109316,0.069745
"""TGFRi_heart_11_louvain_0""",-0.08178,-0.133647,0.832756,-0.547976,-0.383368,0.618722,-0.262382,-0.330129,0.084263,-0.241623,-0.797622,-0.530191,-0.451588,-0.281318,-0.49391,-0.427594,-0.210232,-0.469392,-0.350128,-0.410353,-0.413905,0.022236,-0.391863,-0.292197,-0.294512,-0.361586,-0.331971,-0.188904,0.06988,-0.22934,-0.283059,-0.250591,-0.265338,0.238552,-0.709329,0.001229,…,-0.697265,2.748062,0.552421,0.615608,-0.238093,-0.176665,-0.172067,-0.160116,0.345957,0.228507,0.380141,0.251612,-0.00681,0.070788,0.077272,0.118142,0.465219,0.511664,0.52855,0.586091,0.11978,0.100735,0.062799,0.075708,-0.541789,-0.608047,1.830536,0.568123,0.552762,0.801993,0.546521,0.517282,-0.75612,-0.380266,-0.258423,-0.254312,-0.367058
"""TGFRi_heart_11_louvain_1""",-0.149985,-0.784473,-0.199155,0.669808,0.984705,-0.845036,-0.318641,-0.515862,-0.910999,0.85737,0.690863,0.189486,0.44886,0.202892,-0.047881,0.144906,-0.006814,0.272627,-0.14271,0.077734,0.033925,-0.119625,0.347716,0.173774,-0.033673,-0.025059,0.185187,-0.089088,-0.046491,0.19027,0.297102,-0.075725,-0.117452,0.486094,0.061934,0.462864,…,0.876119,0.227659,-0.390094,-1.080284,-0.041793,-0.116102,-0.006852,-0.014854,-0.980267,-1.209736,-1.012096,-1.132649,-0.337989,-0.408025,-0.347322,-0.202361,0.192599,-0.021131,0.124581,-0.124379,0.687418,0.648347,1.439643,1.38272,0.650656,0.710379,1.094288,-0.880076,-0.921128,-0.136533,-1.218487,-1.187144,0.940808,0.123705,0.395675,-0.169375,0.131136


In [6]:
# create a mapping of cluster_id to heart_treatment (unique per cluster)
cluster_treatment_mapping = cfret_df.select(
    ["Metadata_cluster_id", "Metadata_heart_treatment"]
).unique()

# join centroids with the treatment metadata
centroids_df = centroids_df.join(
    cluster_treatment_mapping, on="Metadata_cluster_id", how="left"
)

# save centroids to parquet file
centroids_output_path = centroids_dir / "cfret_pilot_centroids.parquet"
centroids_df.write_parquet(centroids_output_path)

print(f"Centroids saved to: {centroids_output_path}")
print(f"Final centroid shape: {centroids_df.shape}")

Centroids saved to: /home/erikserrano/Projects/buscar/notebooks/2.cfret-analysis/results/centroids/cfret_pilot_centroids.parquet
Final centroid shape: (9, 659)
