In [1]:
import pathlib

import pandas as pd
from pycytominer import normalize
from pycytominer.cyto_utils import output

In [2]:
# set path to data

combined_data_path = pathlib.Path(
    "../data/20231017ChromaLive_6hr_4ch_MaxIP_combined_data.parquet"
).resolve(strict=True)

# set output path
normalized_data_output_path = pathlib.Path(
    "../data/20231017ChromaLive_6hr_4ch_MaxIP_normalized_data.parquet"
).resolve()

# load data
combined_data = pd.read_parquet(combined_data_path)
print(combined_data.shape)
combined_data.head()

(240048, 3476)


Unnamed: 0,Metadata_Well,Metadata_FOV,Metadata_Time,Metadata_ImageNumber,Metadata_Nuclei_Number_Object_Number,Metadata_plate,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,...,channel_DNA_cls_feature_90,channel_DNA_cls_feature_91,channel_DNA_cls_feature_92,channel_DNA_cls_feature_93,channel_DNA_cls_feature_94,channel_DNA_cls_feature_95,channel_DNA_cls_feature_96,channel_DNA_cls_feature_97,channel_DNA_cls_feature_98,channel_DNA_cls_feature_99
0,C-02,1,10,10,10,1,7914,Staurosporine,0.0,negative,...,-0.026835,-0.025143,0.022814,0.044236,-0.048172,0.003977,0.005565,0.033877,0.082223,0.009103
1,C-02,1,10,10,100,1,7914,Staurosporine,0.0,negative,...,0.02401,0.002705,-0.059467,0.032855,-0.05753,0.031927,0.017482,0.051654,0.024463,-0.034733
2,C-02,1,10,10,101,1,7914,Staurosporine,0.0,negative,...,-0.007732,0.024938,0.027292,0.034904,-0.127702,-0.014732,0.033218,0.008977,0.031269,-0.031651
3,C-02,1,10,10,102,1,7914,Staurosporine,0.0,negative,...,-0.041826,-0.028302,-0.034485,0.032456,-0.054537,0.021049,0.028299,-0.006374,0.101494,-0.018018
4,C-02,1,10,10,103,1,7914,Staurosporine,0.0,negative,...,-0.029698,-0.017408,-0.011866,0.045469,-0.077298,0.000747,0.076386,-0.007291,0.015712,-0.011359


In [3]:
# if column name contains TrackObjects, then prepend with Metadata
combined_data.columns = [
    "Metadata_" + x if "TrackObjects" in x else x for x in combined_data.columns
]

In [4]:
# Get columns that contain "Metadata"
metadata_features = combined_data.columns[
    combined_data.columns.str.contains("Metadata")
].tolist()

# get the feature columns
feature_columns = combined_data.columns.difference(metadata_features).to_list()

In [5]:
# normalize annotated data
normalized_df = normalize(
    # df with annotated raw merged single cell features
    profiles=combined_data,
    features=feature_columns,
    meta_features=metadata_features,
    # specify samples used as normalization reference (negative control)
    samples="Metadata_compound == 'Staurosporine' and Metadata_dose == 0.0 and Metadata_Time == '0001'",
    # normalization method used
    method="standardize",
)

output(
    normalized_df,
    output_filename=normalized_data_output_path,
    output_type="parquet",
)
print(f"Single cells have been normalized!")
# check to see if the features have been normalized
print(normalized_df.shape)
normalized_df.head()

Single cells have been normalized!
(240048, 3476)


Unnamed: 0,Metadata_Well,Metadata_FOV,Metadata_Time,Metadata_ImageNumber,Metadata_Nuclei_Number_Object_Number,Metadata_plate,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,...,channel_DNA_cls_feature_90,channel_DNA_cls_feature_91,channel_DNA_cls_feature_92,channel_DNA_cls_feature_93,channel_DNA_cls_feature_94,channel_DNA_cls_feature_95,channel_DNA_cls_feature_96,channel_DNA_cls_feature_97,channel_DNA_cls_feature_98,channel_DNA_cls_feature_99
0,C-02,1,10,10,10,1,7914,Staurosporine,0.0,negative,...,-0.710485,-0.662466,2.233475,0.756831,1.42457,-0.269752,-0.33276,0.890643,1.308059,1.560015
1,C-02,1,10,10,100,1,7914,Staurosporine,0.0,negative,...,1.457101,0.426141,-1.391238,0.003681,1.144096,0.921586,0.210286,1.524538,-0.778636,-1.817696
2,C-02,1,10,10,101,1,7914,Staurosporine,0.0,negative,...,0.103895,1.295239,2.430735,0.139302,-0.958964,-1.067174,0.927418,0.002792,-0.532758,-1.580211
3,C-02,1,10,10,102,1,7914,Staurosporine,0.0,negative,...,-1.349562,-0.785948,-0.290699,-0.022681,1.233795,0.457936,0.703224,-0.54456,2.004282,-0.529761
4,C-02,1,10,10,103,1,7914,Staurosporine,0.0,negative,...,-0.832564,-0.360085,0.70572,0.838482,0.551665,-0.407396,2.894587,-0.577285,-1.094789,-0.016602
