In [1]:
import pathlib

import pandas as pd
from pycytominer import normalize
from pycytominer.cyto_utils import output

In [2]:
# set path to data

combined_data_path = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_data.parquet"
).resolve(strict=True)

# set output path
normalized_data_output_path = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm.parquet"
).resolve()

# load data
combined_data = pd.read_parquet(combined_data_path)
print(combined_data.shape)
combined_data.head()

(145489, 3869)


Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_90_scDINO,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO
0,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,98.0,...,0.035068,0.000629,-0.034905,0.056939,-0.077406,0.033168,0.005295,0.015653,0.036244,-0.013741
1,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,108.0,...,-0.005525,-0.032507,-0.043178,0.01319,-0.073641,0.029824,0.032743,-0.017366,0.053009,-0.006046
2,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,123.0,...,0.015511,0.027061,0.002058,0.039717,-0.118989,0.010754,0.00684,0.048121,0.046048,0.018789
3,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,127.0,...,-0.009289,-0.016169,-0.035088,0.037067,-0.101932,-0.030957,0.027611,0.001648,0.09071,-0.000729
4,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,15.0,...,0.026018,-0.003725,0.002089,0.032937,-0.053131,0.023977,-0.018216,0.041922,0.045374,-0.033786


In [3]:
# Get columns that contain "Metadata"
metadata_features = combined_data.columns[
    combined_data.columns.str.contains("Metadata")
].tolist()

# get the feature columns
feature_columns = combined_data.columns.difference(metadata_features).to_list()

In [4]:
# Normalize the single cell data per time point

# get the unique time points
time_points = combined_data.Metadata_Time.unique()

output_dict_of_normalized_dfs = {}

# normalize annotated data
normalized_df = normalize(
    # df with annotated raw merged single cell features
    profiles=combined_data,
    features=feature_columns,
    meta_features=metadata_features,
    # specify samples used as normalization reference (negative control)
    samples="Metadata_compound == 'Staurosporine' and Metadata_dose == '0.0' and Metadata_Time == '0.0'",
    # normalization method used
    method="standardize",
)

output(
    normalized_df,
    output_filename=normalized_data_output_path,
    output_type="parquet",
)
print("Single cells have been normalized!")
# check to see if the features have been normalized
print(normalized_df.shape)
normalized_df.head()

Single cells have been normalized!
(145489, 3869)


Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,98.0,...,0.187026,-0.248392,1.60337,0.312717,1.018808,-0.202042,0.147753,-0.357406,-0.140129,1.510029
1,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,108.0,...,-1.215989,-0.610512,-1.242749,0.410329,0.872185,1.059767,-1.079649,0.219456,0.469064,-0.973322
2,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,123.0,...,1.306198,1.369489,0.48301,-0.765297,0.035978,-0.131003,1.354683,-0.020082,2.435365,3.352972
3,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,127.0,...,-0.52422,-0.256391,0.310584,-0.323113,-1.792923,0.823824,-0.372862,1.516754,0.890084,0.934903
4,1.0,C-02,178.0,Staurosporine,0.0,negative,1,1,0.0,15.0,...,0.002674,1.370834,0.04191,0.942032,0.615786,-1.282878,1.124272,-0.04327,-1.727197,0.671195
