# Normalize annotated single cells using negative control (DSMO 0.025% and DMSO 0.100%)

## Import libraries

In [1]:
import pathlib
import pandas as pd

from pycytominer import normalize
from pycytominer.cyto_utils import output

## Set paths and variables

In [2]:
# directory where combined parquet file are located
data_dir = pathlib.Path("./data/combined_data")

# directory where the normalized parquet file is saved to
output_dir = pathlib.Path("./data/normalized_data")
output_dir.mkdir(exist_ok=True)

# define input path for combined annotated parquet file
combined_file_path = str(pathlib.Path(f"{data_dir}/SHSY5Y_sc.parquet"))

# define ouput path for normalized parquet file
normalized_output_file = str(pathlib.Path(f"{output_dir}/SHSY5Y_sc_norm.parquet"))

## Normalize with standardize method with negative control on annotated data

In [3]:
# read in annotated single cell data
combined_df = pd.read_parquet(combined_file_path)
print("Normalizing annotated merged single cells!")

# normalize annotated data
normalized_df = normalize(
        # df with annotated raw merged single cell features
        profiles=combined_df,
        # specify samples used as normalization reference (negative control)
        samples="Metadata_inhibitor == 'DMSO' and Metadata_inhibitor_concentration == 0.025 and Metadata_inducer1 == 'DMSO'",
        # normalization method used
        method="standardize",
)

# save df as parquet file
output(
    df=normalized_df,
    output_filename=normalized_output_file,
    output_type="parquet",
)
print(f"Single cells have been normalized for SHSY5Y cells and saved to {pathlib.Path(normalized_output_file).name} !")

Normalizing annotated merged single cells!
Single cells have been normalized for SHSY5Y cells and saved to SHSY5Y_sc_norm.parquet !


In [4]:
# check to see if the features have been normalized
print(normalized_df.shape)
normalized_df.head()

(600816, 2926)


Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,Metadata_inducer1_concentration_unit,...,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,B13,3780,6,Media ctr,,,media ctr,,,...,0.025641,0.015906,0.294803,0.313566,0.341171,0.304999,0.078009,0.07637,0.088215,0.086094
1,SH-SY5Y,B13,3780,6,Media ctr,,,media ctr,,,...,-0.040634,-0.03851,1.542641,1.424443,1.462092,1.46085,0.014053,0.010517,0.008071,0.00858
2,SH-SY5Y,B13,3780,6,Media ctr,,,media ctr,,,...,-0.049402,-0.053179,0.002317,0.022355,0.013859,-0.006872,0.01962,0.035118,0.028355,0.011586
3,SH-SY5Y,B13,3780,6,Media ctr,,,media ctr,,,...,-0.059513,-0.059833,-0.07601,-0.079124,-0.072404,-0.071611,-0.031191,-0.030959,-0.028872,-0.029995
4,SH-SY5Y,B13,3780,6,Media ctr,,,media ctr,,,...,-0.055622,-0.057129,0.021522,0.019906,0.017246,0.007136,-0.059518,-0.056822,-0.060165,-0.059395
