# Normalize annotated single cells using negative control (DSMO 0.025% and DMSO 0.100%)

## Import libraries

In [None]:
import pathlib

import pandas as pd
from pycytominer import normalize
from pycytominer.cyto_utils import output

## Set paths and variables

In [None]:
# directory where combined parquet file are located
data_dir = pathlib.Path("../data/annotated_data")

# directory where the normalized parquet file is saved to
output_dir = pathlib.Path(".,/data/normalized_data")
output_dir.mkdir(exist_ok=True)

## Define dict of paths

In [None]:
# dictionary with each run for the cell type
dict_of_inputs = {
    "run_20230920ChromaLiveTL_24hr4ch_MaxIP": {
        "annotated_file_path": pathlib.Path(
            f"{data_dir}/20230920ChromaLiveTL_24hr4ch_MaxIP/timelapse_4ch_analysis.sqlite"
        ).resolve(),
        "outoput_file_path": pathlib.Path(
            f"{output_dir}/run_20230920ChromaLiveTL_24hr4ch_MaxIP_norm.parquet"
        ).resolve(),
    },
    "run_20231017ChromaLive_6hr_4ch_MaxIP": {
        "annotated_file_path": pathlib.Path(
            f"{data_dir}/20231017ChromaLive_6hr_4ch_MaxIP/timelapse_4ch_analysis.sqlite"
        ).resolve(),
        "outoput_file_path": pathlib.Path(
            f"{output_dir}/run_20231017ChromaLive_6hr_4ch_MaxIP_norm.parquet"
        ).resolve(),
    },
    "run_20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP": {
        "annotated_file_path": pathlib.Path(
            f"{data_dir}/20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP/timelapse_2ch_analysis.sqlite"
        ).resolve(),
        "outoput_file_path": pathlib.Path(
            f"{output_dir}/run_20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP_norm.parquet"
        ).resolve(),
    },
}

## Normalize with standardize method with negative control on annotated data

In [None]:
for info, input_path in dict_of_inputs.items():
    # read in the annotated file
    dict_of_inputs[info]
    # combined_df =
print(dict_of_inputs[info])

# # normalize annotated data
# normalized_df = normalize(
#         # df with annotated raw merged single cell features
#         profiles=combined_df,
#         # specify samples used as normalization reference (negative control)
#         samples="Metadata_inhibitor == 'DMSO' and Metadata_inhibitor_concentration == 0.025 and Metadata_inducer1 == 'DMSO'",
#         # normalization method used
#         method="standardize",
# )

In [3]:
# read in annotated single cell data
combined_df = pd.read_parquet(combined_file_path)
print("Normalizing annotated merged single cells!")

# normalize annotated data
normalized_df = normalize(
    # df with annotated raw merged single cell features
    profiles=combined_df,
    # specify samples used as normalization reference (negative control)
    samples="Metadata_inhibitor == 'DMSO' and Metadata_inhibitor_concentration == 0.025 and Metadata_inducer1 == 'DMSO'",
    # normalization method used
    method="standardize",
)

# save df as parquet file
output(
    df=normalized_df,
    output_filename=normalized_output_file,
    output_type="parquet",
)
print(
    f"Single cells have been normalized for PBMC cells and saved to {pathlib.Path(normalized_output_file).name} !"
)

In [None]:
# check to see if the features have been normalized
print(normalized_df.shape)
normalized_df.head()