# Aggregate feature selected profiles

## Import libraries

In [1]:
import pathlib

import pandas as pd
from pycytominer import aggregate

## Set paths and variables

In [2]:
# set paths
input_profile_dir = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm_fs.parquet"
).resolve(strict=True)
output_profile_dir = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm_fs_aggregated.parquet"
).resolve()
fs_df = pd.read_parquet(input_profile_dir)
fs_df.head()

Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,1,C-10,150,Staurosporine,78.13,test,11,3,10.0,1.0,...,0.455522,0.39468,1.649389,0.010468,0.426432,-0.339073,1.587193,-0.362701,0.812067,1.215035
1,1,C-10,150,Staurosporine,78.13,test,11,3,10.0,15.0,...,1.055508,0.703437,-1.417191,0.261842,1.097767,-1.700814,0.564109,0.238833,0.718067,0.768177
2,1,C-10,150,Staurosporine,78.13,test,11,3,10.0,16.0,...,-0.834625,0.309778,-0.677732,1.00843,-0.144541,-1.514545,-0.609423,0.447713,1.357439,1.000067
3,1,C-10,150,Staurosporine,78.13,test,11,3,10.0,26.0,...,-1.519487,1.510775,-0.939313,-0.584098,2.944786,-0.227158,0.076995,-1.431423,-0.097869,0.970456
4,1,C-10,150,Staurosporine,78.13,test,11,3,10.0,37.0,...,-1.171937,0.214552,2.372796,-0.591718,-0.306193,0.506419,-0.360647,1.294736,1.949604,0.107606


## Perform aggregation

In [4]:
metadata_cols = fs_df.columns[fs_df.columns.str.contains("Metadata")].to_list()
feature_cols = fs_df.columns[~fs_df.columns.str.contains("Metadata")].to_list()
selected_metadata_cols = [
    "Metadata_Well",
    "Metadata_plate",
    "Metadata_compound",
    "Metadata_dose",
    "Metadata_control",
    "Metadata_Time",
]
feature_cols = fs_df.columns[~fs_df.columns.str.contains("Metadata")].to_list()
feature_cols = ["Metadata_number_of_singlecells"] + feature_cols

aggregated_df = aggregate(
    fs_df,
    features=feature_cols,
    strata=["Metadata_Well", "Metadata_Time", "Metadata_dose"],
    operation="median",
)
aggregated_df = pd.merge(
    aggregated_df,
    fs_df[selected_metadata_cols],
    how="left",
    on=["Metadata_Well", "Metadata_Time", "Metadata_dose"],
)
aggregated_df.drop_duplicates(inplace=True, ignore_index=True)

# rearrange the columns such that the metadata columns are first
for col in reversed(aggregated_df.columns):
    if col.startswith("Metadata_"):
        tmp_pop = aggregated_df.pop(col)
        aggregated_df.insert(0, col, tmp_pop)

print(aggregated_df.shape)
aggregated_df.to_parquet(output_profile_dir)
aggregated_df.head()

(389, 2396)


Unnamed: 0,Metadata_Well,Metadata_Time,Metadata_dose,Metadata_number_of_singlecells,Metadata_plate,Metadata_compound,Metadata_control,Cells_AreaShape_Compactness_CP,Cells_AreaShape_Eccentricity_CP,Cells_AreaShape_Extent_CP,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,C-02,0.0,0.0,183.0,1,Staurosporine,negative,-0.07711,0.475957,0.091597,...,0.229725,-0.149357,0.096118,-0.092986,0.045031,0.164684,0.055407,0.004974,-0.242772,0.102413
1,C-02,1.0,0.0,180.0,1,Staurosporine,negative,-0.296513,0.28499,0.226765,...,-0.073892,0.133531,-0.0378,0.005971,-0.243315,0.357192,-0.009844,0.177669,-0.066157,-0.072011
2,C-02,10.0,0.0,173.0,1,Staurosporine,negative,-0.004368,0.455522,-0.063581,...,-0.026653,-0.161838,0.110146,0.150975,-0.567474,0.259897,-0.010044,0.20663,-0.001741,-0.023353
3,C-02,11.0,0.0,174.0,1,Staurosporine,negative,0.187565,0.547188,-0.132059,...,-0.092784,0.05755,0.197094,0.240283,-0.608368,0.200364,0.065202,0.29587,0.237916,-0.137402
4,C-02,12.0,0.0,154.0,1,Staurosporine,negative,0.035033,0.554286,-0.355493,...,-0.093603,-0.027693,0.373045,-0.008521,-0.774369,0.443792,0.342298,0.417925,0.019141,-0.034194
