In [1]:
import pathlib

import pandas as pd
from pycytominer import normalize
from pycytominer.cyto_utils import output

In [2]:
# set path to data

combined_data_path = pathlib.Path(
    "../data/CP_scDINO_features/combined_data.parquet"
).resolve(strict=True)

# set output path
normalized_data_output_path = pathlib.Path(
    "../data/CP_scDINO_features/combined_norm_data.parquet"
).resolve()

# load data
combined_data = pd.read_parquet(combined_data_path)
print(combined_data.shape)
combined_data.head()

(184115, 3869)


Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_90_scDINO,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO
0,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,11,...,0.016777,-0.02297,-0.012937,0.031086,-0.063206,0.017341,0.006725,0.058906,0.051218,-0.021713
1,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,15,...,0.013826,-0.016597,-0.044203,0.030201,-0.03528,0.011751,0.000847,0.01861,0.040537,-0.014075
2,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,14,...,-0.024187,0.035309,-0.032714,0.032556,-0.092098,-0.004486,-0.032725,0.007328,0.033674,-0.004059
3,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,26,...,-0.012532,0.024189,-0.061641,0.044441,-0.067678,0.001005,0.004018,-0.056446,0.074201,-0.009546
4,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,35,...,-0.010496,-0.050797,-0.022384,0.036842,-0.092964,0.020148,-0.002594,0.074108,0.002868,-0.000477


In [3]:
# Get columns that contain "Metadata"
metadata_features = combined_data.columns[
    combined_data.columns.str.contains("Metadata")
].tolist()

# get the feature columns
feature_columns = combined_data.columns.difference(metadata_features).to_list()

In [5]:
# Normalize the single cell data per time point

# get the unique time points
time_points = combined_data.Metadata_Time.unique()

output_dict_of_normalized_dfs = {}

# normalize annotated data
normalized_df = normalize(
    # df with annotated raw merged single cell features
    profiles=combined_data,
    features=feature_columns,
    meta_features=metadata_features,
    # specify samples used as normalization reference (negative control)
    samples="Metadata_compound == 'Staurosporine' and Metadata_dose == '0.0' and Metadata_Time == '0.0'",
    # normalization method used
    method="standardize",
)

output(
    normalized_df,
    output_filename=normalized_data_output_path,
    output_type="parquet",
)
print("Single cells have been normalized!")
# check to see if the features have been normalized
print(normalized_df.shape)
normalized_df.head()

Single cells have been normalized!
(184115, 3869)


Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,11,...,-0.734818,0.654381,-0.070655,0.813982,0.296032,-0.227765,1.785438,0.158278,-0.762728,0.725202
1,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,15,...,-0.468204,-0.666395,-0.12843,1.562595,0.045222,-0.489645,0.273701,-0.211752,-0.189933,0.283274
2,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,14,...,1.703526,-0.181057,0.025402,0.039498,-0.683365,-1.985459,-0.149541,-0.449522,0.561193,0.198248
3,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,26,...,1.238245,-1.402979,0.80188,0.694113,-0.43696,-0.348364,-2.542127,0.954522,0.149704,-0.263583
4,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,35,...,-1.899088,0.255286,0.305438,0.016286,0.42201,-0.642979,2.355769,-1.516771,0.829755,2.041394
