# Normalize annotated single cells using negative control (DSMO 0.025% and DMSO 0.100%)

## Import libraries

In [1]:
import sys
import pathlib

import pandas as pd
from pycytominer import normalize
from pycytominer.cyto_utils import output

sys.path.append("../../utils")
import sc_extraction_utils as sc_utils

## Set paths and variables

In [2]:
# output directory for annotated file
output_dir = pathlib.Path("./data/")

# dictionary with each run for the cell type
run_info_dictionary = {
    "SHSY5Y_first_run": {
        # path to parquet file from annotate function
        "annotated_path": str(pathlib.Path("./data/SHSY5Y_first_run_sc.parquet"))
    },
    "SHSY5Y_second_run": {
        # path to parquet file from annotate function
        "annotated_path": str(pathlib.Path("./data/SHSY5Y_second_run_sc.parquet")),
    },
}

## Normalize with standardize method with negative control on annotated data

In [3]:
# process each run
for SHSY5Y_run, info in run_info_dictionary.items():
    annotated_df = pd.read_parquet(info["annotated_path"])
    output_file = str(pathlib.Path(f"{output_dir}/{SHSY5Y_run}_sc_norm.parquet"))
    print(f"Normalizing annotated merged single cells for {SHSY5Y_run}!")

    # normalize annotated data
    normalized_df = normalize(
            # df with annotated raw merged single cell features
            profiles=annotated_df,
            # specify samples used as normalization reference (negative control)
            samples="Metadata_inhibitor == 'DMSO' and Metadata_inhibitor_concentration == 0.025 and Metadata_inducer1 == 'DMSO'",
            # normalization method used
            method="standardize",
    )

    # save df as parquet file
    output(
        df=normalized_df,
        output_filename=output_file,
        output_type="parquet",
    )
    print(f"Single cells have been normalized for {SHSY5Y_run} and saved!")

Normalizing annotated merged single cells for SHSY5Y_first_run!
Normalizing annotated merged single cells for SHSY5Y_second_run!


In [4]:
print(normalized_df.shape)
normalized_df.head()

(290878, 2926)


Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,Metadata_inducer1_concentration_unit,...,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,-0.071099,-0.068807,-0.049819,-0.059617,-0.058303,-0.047504,-0.057179,-0.058488,-0.057185,-0.053613
1,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,-0.028482,-0.026126,0.080149,0.071267,0.071292,0.077668,0.026606,0.013372,0.011421,0.013514
2,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,-0.069882,-0.069519,-0.122258,-0.129815,-0.131801,-0.12785,-0.063946,-0.063002,-0.063857,-0.062186
3,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,-0.065439,-0.065071,-0.10021,-0.108524,-0.112492,-0.107776,-0.053867,-0.053819,-0.054397,-0.054698
4,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,-0.061002,-0.06455,-0.084493,-0.085454,-0.091895,-0.089405,-0.024934,-0.021652,-0.024195,-0.025522
