# Normalize annotated single cells using negative control

## Import libraries

In [1]:
import pathlib
import pprint

import pandas as pd
from pycytominer import normalize
from pycytominer.cyto_utils import output

## Set paths and variables

In [2]:
# directory where combined parquet file are located
data_file_path = pathlib.Path("../data/3.combined_data/combined_data.parquet")

# directory where the normalized parquet file is saved to
output_dir = pathlib.Path("../data/4.normalized_data/normalized_profile.parquet")
output_dir.parent.mkdir(exist_ok=True, parents=True)

## Define dict of paths

## Normalize with standardize method with negative control on annotated data

The normalization needs to occur per time step. 
This code cell will split the data into time steps and normalize each time step separately.
Then each normalized time step will be concatenated back together. 

In [3]:
# read in the annotated file
annotated_df = pd.read_parquet(data_file_path)
annotated_df.reset_index(drop=True, inplace=True)
nmfs = annotated_df.columns[annotated_df.columns.str.contains("NMF")].to_list()
features = annotated_df.columns[~annotated_df.columns.isin(nmfs)].to_list()

In [4]:
# normalize annotated data
normalized_df = normalize(
    # df with annotated raw merged single cell features
    profiles=annotated_df,
    # specify samples used as normalization reference (negative control)
    samples="NMF_compound == 'Staurosporine' and NMF_dose == 0.0 and NMF_Time == 0.0",
    # normalization method used
    method="standardize",
    features=features,
    meta_features=nmfs,
)


output(
    normalized_df,
    output_filename=output_dir,
    output_type="parquet",
)
# check to see if the features have been normalized
print(normalized_df.shape)
normalized_df.head()

(1700, 2332)


Unnamed: 0,NMF_plate,NMF_Well,NMF_number_of_singlecells,NMF_compound,NMF_dose,NMF_control,NMF_ImageNumber,NMF_FOV,NMF_Time,NMF_Cells_Number_Object_Number,...,Nuclei_Texture_Variance_CL_488_2_3_02_256,Nuclei_Texture_Variance_CL_488_2_3_03_256,Nuclei_Texture_Variance_CL_561_3_00_256,Nuclei_Texture_Variance_CL_561_3_01_256,Nuclei_Texture_Variance_CL_561_3_02_256,Nuclei_Texture_Variance_CL_561_3_03_256,Nuclei_Texture_Variance_DNA_3_00_256,Nuclei_Texture_Variance_DNA_3_01_256,Nuclei_Texture_Variance_DNA_3_02_256,Nuclei_Texture_Variance_DNA_3_03_256
0,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,6,...,-0.245708,-0.205495,-0.791148,-0.721678,-0.59314,-0.601398,-0.237431,-0.235982,-0.235295,-0.23878
1,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,7,...,-0.245708,-0.205495,-0.511054,-0.721678,-0.808483,-0.286473,-0.237431,-0.235982,-0.235295,-0.23878
2,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,9,...,-0.245708,-0.205495,-0.791148,-0.721678,-0.808483,-0.737941,-0.237431,-0.235982,-0.235295,-0.23878
3,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,11,...,-0.005226,-0.205495,1.808734,1.726902,1.525513,1.515847,-0.237431,-0.235982,-0.235295,-0.23878
4,1,C-02,180,Staurosporine,0.0,negative,1,1,0.0,12,...,-0.203297,-0.16741,-0.547609,-0.460408,-0.531437,-0.520646,-0.237431,-0.235982,-0.235295,-0.23878
