# Normalize annotated single cells using negative control (DSMO 0.025% and DMSO 0.100%)

## Import libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import normalize
from pycytominer.cyto_utils import output

## Set paths and variables

In [2]:
# directory where combined parquet file are located
data_dir = pathlib.Path("./data/combined_data")

# directory where the normalized parquet file is saved to
output_dir = pathlib.Path("./data/normalized_data")
output_dir.mkdir(exist_ok=True)

# define input path for combined annotated parquet file
combined_file_path = str(pathlib.Path(f"{data_dir}/PBMC_sc.parquet"))

# define ouput path for normalized parquet file
normalized_output_file = str(pathlib.Path(f"{output_dir}/PBMC_sc_norm.parquet"))

## Normalize with standardize method with negative control on annotated data

In [3]:
# read in annotated single cell data
combined_df = pd.read_parquet(combined_file_path)
print("Normalizing annotated merged single cells!")

# normalize annotated data
normalized_df = normalize(
        # df with annotated raw merged single cell features
        profiles=combined_df,
        # specify samples used as normalization reference (negative control)
        samples="Metadata_inhibitor == 'DMSO' and Metadata_inhibitor_concentration == 0.025 and Metadata_inducer1 == 'DMSO'",
        # normalization method used
        method="standardize",
)

# save df as parquet file
output(
    df=normalized_df,
    output_filename=normalized_output_file,
    output_type="parquet",
)
print(f"Single cells have been normalized for PBMC cells and saved to {pathlib.Path(normalized_output_file).name} !")

Normalizing annotated merged single cells!
Single cells have been normalized for SH-SY5Y cells and saved to SHSY5Y_sc_norm.parquet !


In [4]:
# check to see if the features have been normalized
print(normalized_df.shape)
normalized_df.head()

(597902, 2926)


Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,Metadata_inducer1_concentration_unit,...,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,0.025742,0.016018,0.294581,0.313403,0.340904,0.304771,0.077819,0.076181,0.088,0.085886
1,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.040428,-0.038309,1.540182,1.422456,1.459884,1.458574,0.014013,0.010482,0.008044,0.008553
2,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.049183,-0.052954,0.002619,0.02267,0.01416,-0.006548,0.019567,0.035026,0.028281,0.011552
3,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.057027,-0.058113,-0.018521,-0.023614,-0.013422,-0.010141,-0.005182,-0.002528,0.003988,-0.004412
4,SH-SY5Y,B13,3765,6,Media ctr,,,media ctr,,,...,-0.029618,-0.0241,-0.052448,-0.061118,-0.0535,-0.041029,0.010896,0.002573,0.008965,0.018884
