## Normalize merged single cells with standardized method for each plate

## Import libraries

In [1]:
import sys
import pathlib
import os

import pandas as pd
from pycytominer import normalize
from pycytominer.cyto_utils import output

sys.path.append("../utils")
import extraction_utils as sc_utils

In [2]:
# output directory for normalized data
output_dir = pathlib.Path("./data/normalized_data")
# if directory if doesn't exist, will not raise error if it already exists
os.makedirs(output_dir, exist_ok=True)

# dictionary with each run for the cell type
plate_info_dictionary = {
    "Plate_1": {
        # path to parquet file from annotate function
        "annotated_path": str(pathlib.Path("./data/annotated_data/Plate_1_sc.parquet"))
    },
    "Plate_2": {
        # path to parquet file from annotate function
        "annotated_path": str(pathlib.Path("./data/annotated_data/Plate_2_sc.parquet"))
    },
    "Plate_3": {
        # path to parquet file from annotate function
        "annotated_path": str(pathlib.Path("./data/annotated_data/Plate_3_sc.parquet"))
    },
    "Plate_3_prime": {
        # path to parquet file from annotate function
        "annotated_path": str(pathlib.Path("./data/annotated_data/Plate_3_prime_sc.parquet"))
    }
}

In [3]:
# process each run
for plate, info in plate_info_dictionary.items():
    annotated_df = pd.read_parquet(info["annotated_path"])
    output_file = str(pathlib.Path(f"{output_dir}/{plate}_sc_norm.parquet"))
    print(f"Normalizing annotated merged single cells for {plate}!")

    # normalize annotated data
    normalized_df = normalize(
            # df with annotated raw merged single cell features
            profiles=annotated_df,
            # normalization method used
            method="standardize"
    )

    # save df as parquet file
    output(
        df=normalized_df,
        output_filename=output_file,
        output_type="parquet",
    )
    print(f"Single cells have been normalized for {plate} and saved!")

Normalizing annotated merged single cells for Plate_1!
Single cells have been normalized for Plate_1 and saved!
Normalizing annotated merged single cells for Plate_2!
Single cells have been normalized for Plate_2 and saved!
Normalizing annotated merged single cells for Plate_3!
Single cells have been normalized for Plate_3 and saved!
Normalizing annotated merged single cells for Plate_3_prime!
Single cells have been normalized for Plate_3_prime and saved!


In [4]:
# print last normalized df to see if looks like normalization has occurred
print(normalized_df.shape)
normalized_df.head()

(4098, 1596)


Unnamed: 0,Metadata_WellRow,Metadata_Well,Metadata_number_of_singlecells,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_seed_density,Metadata_ImageNumber,Metadata_Cells_Number_Object_Number,Metadata_Cytoplasm_Parent_Cells,...,Nuclei_Texture_Variance_DAPI_3_02_256,Nuclei_Texture_Variance_DAPI_3_03_256,Nuclei_Texture_Variance_GFP_3_00_256,Nuclei_Texture_Variance_GFP_3_01_256,Nuclei_Texture_Variance_GFP_3_02_256,Nuclei_Texture_Variance_GFP_3_03_256,Nuclei_Texture_Variance_RFP_3_00_256,Nuclei_Texture_Variance_RFP_3_01_256,Nuclei_Texture_Variance_RFP_3_02_256,Nuclei_Texture_Variance_RFP_3_03_256
0,G,G4,388,4,NF1,WT,4000,1651,1,1,...,-0.021347,-0.123105,-0.344965,-0.359278,-0.350487,-0.347132,-0.411257,-0.405723,-0.417758,-0.415427
1,G,G4,388,4,NF1,WT,4000,1651,2,2,...,0.191027,0.174751,-0.422557,-0.415937,-0.407025,-0.416471,-0.50768,-0.501187,-0.512384,-0.514265
2,G,G4,388,4,NF1,WT,4000,1651,3,3,...,-0.439765,-0.483017,-0.463172,-0.459738,-0.457248,-0.460143,-0.515535,-0.514713,-0.510398,-0.504958
3,G,G4,388,4,NF1,WT,4000,1651,4,4,...,-0.641173,-0.663041,-0.480264,-0.478937,-0.478272,-0.477565,-0.442685,-0.416561,-0.430885,-0.455491
4,G,G4,388,4,NF1,WT,4000,1651,5,5,...,-0.342317,-0.365235,-0.409814,-0.382842,-0.410921,-0.410867,-0.472747,-0.461819,-0.472807,-0.485525
