# Applying metrics

In [1]:
# imports
import sys
import pathlib
import pandas as pd
from pycytominer.cyto_utils import load_profiles

sys.path.append("../../utils")
from utils import data_utils

In [2]:
data_dir = pathlib.Path("../data")
results_dir = pathlib.Path("./results").resolve(strict=True)

# Setting path 
metadata_cluster_path = (results_dir / "cluster/metadata_w_clusters.csv").resolve(strict=True)

# setting single-cell profile paths
profile_paths = list(data_dir.glob("*sc_feature_selected.parquet"))
if len(profile_paths) == 0:
    raise FileNotFoundError("Profiles were not found at the given directory")


In [3]:
# loading in the data
shared_features = data_utils.find_shared_features(profile_paths)

# loading all single-cell profiles and updating it with the shared features
loaded_profiles_df = []
for single_cell_path in profile_paths:
    # loading in single cell feature selected data
    single_cell_df = load_profiles(single_cell_path)

    # append the updated profiles to the loaded_profiles_df
    loaded_profiles_df.append(single_cell_df[shared_features])

# Concatenate all the profiles
all_profiles_df = pd.concat(loaded_profiles_df, axis=0)

print(all_profiles_df.shape)
all_profiles_df.head()

(54610, 494)


Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_heart_number,Metadata_cell_type,Metadata_heart_failure_type,Metadata_treatment,Metadata_Pathway,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Cells_Location_Center_X,...,Nuclei_Texture_InfoMeas2_PM_3_03_256,Nuclei_Texture_InverseDifferenceMoment_Hoechst_3_00_256,Nuclei_Texture_InverseDifferenceMoment_Hoechst_3_01_256,Nuclei_Texture_InverseDifferenceMoment_Hoechst_3_02_256,Nuclei_Texture_InverseDifferenceMoment_Hoechst_3_03_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_00_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_01_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_02_256,Nuclei_Texture_InverseDifferenceMoment_PM_3_03_256,Nuclei_Texture_SumEntropy_PM_3_01_256
0,B,2,7,healthy,,DMSO,,870.048176,222.975912,883.760337,...,0.153167,-1.314356,-0.527268,-0.28336,-0.966427,-0.028467,0.025132,0.531559,0.161083,-0.084311
1,B,2,7,healthy,,DMSO,,372.665138,78.150612,422.940605,...,0.875659,-1.281228,-0.035844,-1.641539,-1.781835,-0.67462,-0.054664,-0.974624,-1.157279,1.004183
2,B,2,7,healthy,,DMSO,,691.469799,396.812081,683.988473,...,-0.630529,1.253008,0.978559,1.724513,1.741098,0.204027,0.415166,0.695386,0.509317,-0.669122
3,B,2,7,healthy,,DMSO,,658.817385,176.3645,656.476395,...,0.01472,-0.793306,-0.84018,-0.947567,-0.750173,-0.856654,-0.524341,-0.36156,0.09598,-0.099079
4,B,2,7,healthy,,DMSO,,1031.773316,87.448834,1023.158705,...,-2.189919,0.371659,-0.508734,-1.278283,-1.529378,-2.088097,-0.929627,-2.14462,-2.443222,1.224159


In [4]:
# parameters
metadata_treatments = "Metadata_treatment"
profile = None
target_name = None


# split the metadata and morphology feature
meta_cols, feat_cols = data_utils.split_meta_and_features(all_profiles_df)

# check if the selected metadata column contains the metadata_treatment that represents the control
if metadata_treatments not in meta_cols:
    raise ValueError(
        f"{metadata_treatments} is a metadata column that does not exist"
    )

target_df = all_profiles_df.loc[all_profiles_df[metadata_treatments] == target_name]
treated_df = all_profiles_df.loc[all_profiles_df[metadata_treatments] != target_name]