# Annotate merged single cells with metadata from platemap file

## Import libraries

In [1]:
import sys
import pathlib

import pandas as pd
from pycytominer import annotate
from pycytominer.cyto_utils import output

## Set paths and variables

In [2]:
# load in platemap file as a pandas dataframe
platemap_path = pathlib.Path("../../../metadata/Interstellar_plate2_platemap.csv")
platemap_df = pd.read_csv(platemap_path)

# directory where parquet files are located
data_dir = pathlib.Path("./data/converted_data")

# directory where the annotated parquet files are saved to
output_dir = pathlib.Path("./data/annotated_data")
output_dir.mkdir(exist_ok=True)

In [3]:
# dictionary with each run for the cell type
# dictionary with path to the parquet file from each run
run_info_dictionary = {
    "batch_1": {
        # path to outputted parquet file
        "single_cell_path": str(
            pathlib.Path(
                f"{data_dir}/PBMC_batch_1.parquet"
            )
        ),
    },
    "batch_2": {
        # path to outputted parquet file
        "single_cell_path": str(
            pathlib.Path(
                f"{data_dir}/PBMC_batch_2.parquet"
            )
        )
    },
    "batch_3": {
        # path to outputted parquet file
        "single_cell_path": str(
            pathlib.Path(
                f"{data_dir}/PBMC_batch_3.parquet"
            )
        )
    },
    "batch_4": {
        # path to outputted parquet file
        "single_cell_path": str(
            pathlib.Path(
                f"{data_dir}/PBMC_batch_4.parquet"
            )
        )
    },
    "batch_5": {
        # path to outputted parquet file
        "single_cell_path": str(
            pathlib.Path(
                f"{data_dir}/PBMC_batch_5.parquet"
            )
        ),
    },
    "batch_6": {
        # path to outputted parquet file
        "single_cell_path": str(
            pathlib.Path(
                f"{data_dir}/PBMC_batch_6.parquet"
            )
        )
    },
    "batch_7": {
        # path to outputted parquet file
        "single_cell_path": str(
            pathlib.Path(
                f"{data_dir}/PBMC_batch_7.parquet"
            )
        )
    }   
}

## Annotate merged single cells

In [4]:
for PBMC_run, info in run_info_dictionary.items():
    # load in converted parquet file as df to use in annotate function
    single_cell_df = pd.read_parquet(info["single_cell_path"])
    output_file = str(pathlib.Path(f"{output_dir}/{PBMC_run}_sc.parquet"))
    print(f"Adding annotations to merged single cells for {PBMC_run}!")

    # add metadata from platemap file to extracted single cell features
    annotated_df = annotate(
        profiles=single_cell_df,
        platemap=platemap_df,
        join_on=["Metadata_well_id", "Image_Metadata_Well"],
    )

    # move metadata well and single cell count to the front of the df (for easy visualization in python)
    well_column = annotated_df.pop("Metadata_Well")
    singlecell_column = annotated_df.pop("Metadata_number_of_singlecells")
    site_column = annotated_df.pop("Image_Metadata_Site")
    # insert the column as the second index column in the dataframe
    annotated_df.insert(1, "Metadata_Well", well_column)
    annotated_df.insert(2, "Metadata_number_of_singlecells", singlecell_column)
    annotated_df.insert(3, "Metadata_Site", site_column)

    # save annotated df as parquet file
    output(
        df=annotated_df,
        output_filename=output_file,
        output_type="parquet",
    )
    print(f"Annotations have been added to {PBMC_run} and saved!")

Adding annotations to merged single cells for SHSY5Y_first_run!
Annotations have been added to SHSY5Y_first_run and saved!
Adding annotations to merged single cells for SHSY5Y_second_run!
Annotations have been added to SHSY5Y_second_run and saved!


In [5]:
# check last annotated df to see if it has been annotated correctly
print(annotated_df.shape)
annotated_df.head()

(290878, 2926)


Unnamed: 0,Metadata_cell_type,Metadata_Well,Metadata_number_of_singlecells,Metadata_incubation inducer (h),Metadata_inhibitor,Metadata_inhibitor_concentration,Metadata_inhibitor_concentration_unit,Metadata_inducer1,Metadata_inducer1_concentration,Metadata_inducer1_concentration_unit,...,Nuclei_Texture_Variance_CorrGasdermin_3_02_256,Nuclei_Texture_Variance_CorrGasdermin_3_03_256,Nuclei_Texture_Variance_CorrMito_3_00_256,Nuclei_Texture_Variance_CorrMito_3_01_256,Nuclei_Texture_Variance_CorrMito_3_02_256,Nuclei_Texture_Variance_CorrMito_3_03_256,Nuclei_Texture_Variance_CorrPM_3_00_256,Nuclei_Texture_Variance_CorrPM_3_01_256,Nuclei_Texture_Variance_CorrPM_3_02_256,Nuclei_Texture_Variance_CorrPM_3_03_256
0,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,0.675652,0.734189,2.200685,1.985866,2.078268,2.242578,0.375111,0.334665,0.375993,0.409973
1,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,2.452565,2.508087,5.378878,4.929226,4.997064,5.105212,1.846014,1.585358,1.575559,1.594405
2,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,0.726404,0.704592,0.429291,0.407217,0.422924,0.405093,0.256297,0.256092,0.259332,0.258697
3,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,0.911628,0.889477,0.968423,0.886025,0.857794,0.864168,0.433241,0.415915,0.42474,0.390825
4,SH-SY5Y,I13,3803,6,Media ctr,,,media ctr,,,...,1.096619,0.911121,1.352762,1.404816,1.321704,1.284312,0.941187,0.975778,0.952819,0.905625
