In [1]:
import pathlib

import pandas as pd
from pycytominer import normalize
from pycytominer.cyto_utils import output

In [2]:
# set path to data

combined_data_path = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_data.parquet"
).resolve(strict=True)

# set output path
normalized_data_output_path = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm.parquet"
).resolve()

# load data
combined_data = pd.read_parquet(combined_data_path)
print(combined_data.shape)
combined_data.head()

(182804, 3869)


Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_90_scDINO,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO
0,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,101,...,0.034462,-0.005022,-0.032672,0.056519,-0.081751,0.035719,0.00865,0.017762,0.038345,-0.015309
1,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,111,...,0.008132,-0.048781,-0.029867,0.013783,-0.089631,0.029462,0.013753,-0.041825,0.045902,-0.025322
2,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,11,...,0.016777,-0.02297,-0.012937,0.031086,-0.063206,0.017341,0.006725,0.058906,0.051218,-0.021713
3,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,128,...,0.016239,0.026569,9.4e-05,0.040027,-0.120004,0.01214,0.008501,0.04885,0.048612,0.017343
4,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,132,...,-0.009832,-0.015505,-0.03555,0.037506,-0.102835,-0.030543,0.026912,-0.000418,0.092182,-0.001539


In [3]:
# Get columns that contain "Metadata"
metadata_features = combined_data.columns[
    combined_data.columns.str.contains("Metadata")
].tolist()

# get the feature columns
feature_columns = combined_data.columns.difference(metadata_features).to_list()

In [4]:
# Normalize the single cell data per time point

# get the unique time points
time_points = combined_data.Metadata_Time.unique()

output_dict_of_normalized_dfs = {}

# normalize annotated data
normalized_df = normalize(
    # df with annotated raw merged single cell features
    profiles=combined_data,
    features=feature_columns,
    meta_features=metadata_features,
    # specify samples used as normalization reference (negative control)
    samples="Metadata_compound == 'Staurosporine' and Metadata_dose == '0.0' and Metadata_Time == '0.0'",
    # normalization method used
    method="standardize",
)

output(
    normalized_df,
    output_filename=normalized_data_output_path,
    output_type="parquet",
)
print("Single cells have been normalized!")
# check to see if the features have been normalized
print(normalized_df.shape)
normalized_df.head()

Single cells have been normalized!
(182804, 3869)


Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,101,...,0.018387,-0.178214,1.589703,0.313944,1.126927,-0.143103,0.241127,-0.293259,-0.283715,1.434163
1,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,111,...,-1.811176,-0.059895,-1.208776,0.10275,0.845704,0.08393,-1.990931,-0.030848,-1.033722,-0.942127
2,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,11,...,-0.731998,0.654253,-0.075728,0.810937,0.30094,-0.22878,1.782329,0.153739,-0.763335,0.725093
3,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,128,...,1.339222,1.203907,0.509754,-0.711263,0.067196,-0.149771,1.40565,0.063245,2.16211,3.187469
4,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,132,...,-0.41992,-0.29961,0.344723,-0.25113,-1.851114,0.669517,-0.439855,1.576201,0.747753,0.895601
