# Aggregate feature selected profiles

## Import libraries

In [1]:
import pathlib

import pandas as pd
from pycytominer import aggregate

## Set paths and variables

In [2]:
# set paths
input_profile_dir = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm_fs.parquet"
).resolve(strict=True)
output_profile_dir = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm_fs_aggregated.parquet"
).resolve()
fs_df = pd.read_parquet(input_profile_dir)

## Perform aggregation

In [3]:
metadata_cols = fs_df.columns[fs_df.columns.str.contains("Metadata")].to_list()
feature_cols = fs_df.columns[~fs_df.columns.str.contains("Metadata")].to_list()

aggregated_df = aggregate(
    fs_df,
    features=feature_cols,
    strata=["Metadata_Well", "Metadata_Time", "Metadata_dose"],
    operation="median",
)
aggregated_df = pd.merge(
    aggregated_df,
    fs_df[metadata_cols],
    how="left",
    on=["Metadata_Well", "Metadata_Time"],
)
# rearrange the columns such that the metadata columns are first
for col in reversed(aggregated_df.columns):
    if col.startswith("Metadata_"):
        tmp_pop = aggregated_df.pop(col)
        aggregated_df.insert(0, col, tmp_pop)

print(aggregated_df.shape)
aggregated_df.to_parquet(output_profile_dir)
aggregated_df.head()

(145489, 2445)


Unnamed: 0,Metadata_Well,Metadata_Time,Metadata_dose_x,Metadata_plate,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose_y,Metadata_control,Metadata_ImageNumber,Metadata_FOV,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,C-02,0.0,0.0,1,178,Staurosporine,0.0,negative,1,1,...,0.186208,-0.140797,0.04191,-0.008297,0.035978,0.121764,-0.021929,-0.020082,-0.140129,0.142675
1,C-02,0.0,0.0,1,178,Staurosporine,0.0,negative,1,1,...,0.186208,-0.140797,0.04191,-0.008297,0.035978,0.121764,-0.021929,-0.020082,-0.140129,0.142675
2,C-02,0.0,0.0,1,178,Staurosporine,0.0,negative,1,1,...,0.186208,-0.140797,0.04191,-0.008297,0.035978,0.121764,-0.021929,-0.020082,-0.140129,0.142675
3,C-02,0.0,0.0,1,178,Staurosporine,0.0,negative,1,1,...,0.186208,-0.140797,0.04191,-0.008297,0.035978,0.121764,-0.021929,-0.020082,-0.140129,0.142675
4,C-02,0.0,0.0,1,178,Staurosporine,0.0,negative,1,1,...,0.186208,-0.140797,0.04191,-0.008297,0.035978,0.121764,-0.021929,-0.020082,-0.140129,0.142675
