# Annotate merged single cells with metadata from platemap file

## Import libraries

In [1]:
import argparse
import pathlib
import sys

import lancedb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pycytominer import annotate
from pycytominer.cyto_utils import output

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

  from .autonotebook import tqdm as notebook_tqdm


## Set paths and variables

In [2]:
# load in platemap file as a pandas dataframe
platemap_path = pathlib.Path("../../data/").resolve()

# directory where parquet files are located
data_dir = pathlib.Path("../data/0.converted_data").resolve()

# directory where the annotated parquet files are saved to
output_dir = pathlib.Path("../data/1.annotated_data")
output_dir.mkdir(exist_ok=True)

if not in_notebook:
    print("Running as script")
    # set up arg parser
    parser = argparse.ArgumentParser(description="Single cell extraction")

    parser.add_argument(
        "--well_fov",
        type=str,
        help="Path to the input directory containing the tiff images",
    )

    args = parser.parse_args()
    well_fov = args.well_fov
else:
    print("Running in a notebook")
    well_fov = "C-02_F0003"

Running in a notebook


In [3]:
# dictionary with each run for the cell type
dict_of_inputs = {
    "run_20231017ChromaLive_6hr_4ch_MaxIP": {
        "source_path": pathlib.Path(f"{data_dir}/timelapse/{well_fov}.parquet").resolve(
            strict=True
        ),
        "platemap_path": pathlib.Path(f"{platemap_path}/platemap_6hr_4ch.csv").resolve(
            strict=True
        ),
        "output_file": pathlib.Path(
            f"{output_dir}/timelapse/{well_fov}_sc.parquet"
        ).resolve(),
    },
    "20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP": {
        "source_path": pathlib.Path(
            f"{data_dir}/endpoint/{well_fov}.parquet"
        ).resolve(),
        "platemap_path": pathlib.Path(
            f"{platemap_path}/platemap_AnnexinV_2ch.csv"
        ).resolve(strict=True),
        "output_file": pathlib.Path(
            f"{output_dir}/endpoint/{well_fov}_sc.parquet"
        ).resolve(),
    },
}

## Annotate merged single cells

In [4]:
single_cell_df = pd.read_parquet(f"{data_dir}/timelapse/{well_fov}.parquet")
single_cell_df.head()

Unnamed: 0,Metadata_ImageNumber,Image_Metadata_FOV,Metadata_number_of_singlecells,Image_Metadata_Time,Image_Metadata_Well,Metadata_Cells_Number_Object_Number,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_ImageNumber_1,Metadata_ImageNumber_2,...,Nuclei_Texture_Variance_CL_488_2_3_02_256,Nuclei_Texture_Variance_CL_488_2_3_03_256,Nuclei_Texture_Variance_CL_561_3_00_256,Nuclei_Texture_Variance_CL_561_3_01_256,Nuclei_Texture_Variance_CL_561_3_02_256,Nuclei_Texture_Variance_CL_561_3_03_256,Nuclei_Texture_Variance_DNA_3_00_256,Nuclei_Texture_Variance_DNA_3_01_256,Nuclei_Texture_Variance_DNA_3_02_256,Nuclei_Texture_Variance_DNA_3_03_256
0,1,3,186,1,C-02,1,1,1,1,1,...,0.0,0.0,0.029385,0.018512,0.029509,0.038697,0.0,0.0,0.0,0.0
1,1,3,186,1,C-02,2,2,2,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,3,186,1,C-02,3,3,3,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,3,186,1,C-02,4,4,4,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,3,186,1,C-02,5,5,9,1,1,...,0.0,0.0,0.03392,0.029622,0.027755,0.023243,0.0,0.0,0.0,0.0


In [5]:
for data_run, info in dict_of_inputs.items():
    # load in converted parquet file as df to use in annotate function
    single_cell_df = pd.read_parquet(info["source_path"])
    print(single_cell_df.shape)
    single_cell_df = single_cell_df.rename(
        columns={
            "Image_Metadata_FOV": "Metadata_FOV",
            "Image_Metadata_Time": "Metadata_Time",
        },
    )
    platemap_df = pd.read_csv(info["platemap_path"])

    print(f"Adding annotations to merged single cells for {data_run}!")

    # add metadata from platemap file to extracted single cell features
    annotated_df = annotate(
        profiles=single_cell_df,
        platemap=platemap_df,
        join_on=["Metadata_well", "Image_Metadata_Well"],
    )
    print(annotated_df.shape)

    # move metadata well and single cell count to the front of the df (for easy visualization in python)
    well_column = annotated_df.pop("Metadata_Well")
    singlecell_column = annotated_df.pop("Metadata_number_of_singlecells")
    # insert the column as the second index column in the dataframe
    annotated_df.insert(1, "Metadata_Well", well_column)
    annotated_df.insert(2, "Metadata_number_of_singlecells", singlecell_column)

    # rename metadata columns to match the expected column names
    columns_to_rename = {
        "Nuclei_Location_Center_Y": "Metadata_Nuclei_Location_Center_Y",
        "Nuclei_Location_Center_X": "Metadata_Nuclei_Location_Center_X",
    }
    # Image_FileName cols
    for col in annotated_df.columns:
        if "Image_FileName" in col:
            columns_to_rename[col] = f"Metadata_{col}"
        elif "Image_PathName" in col:
            columns_to_rename[col] = f"Metadata_{col}"
        elif "TrackObjects" in col:
            columns_to_rename[col] = f"Metadata_{col}"
    # rename metadata columns
    annotated_df.rename(columns=columns_to_rename, inplace=True)

    info["output_file"].parent.mkdir(exist_ok=True, parents=True)

    # save annotated df as parquet file
    output(
        df=annotated_df,
        output_filename=info["output_file"],
        output_type="parquet",
    )
    print(
        f"Annotations have been added to {data_run} and saved to {info['output_file']}"
    )
    # check last annotated df to see if it has been annotated correctly
    print(annotated_df.shape)
    annotated_df.head()
del annotated_df

(2426, 2318)
Adding annotations to merged single cells for run_20231017ChromaLive_6hr_4ch_MaxIP!
(2426, 2322)
Annotations have been added to run_20231017ChromaLive_6hr_4ch_MaxIP and saved to /home/lippincm/4TB_A/live_cell_timelapse_apoptosis/5.process_CP_features/data/1.annotated_data/timelapse/C-02_F0003_sc.parquet
(2426, 2322)
(138, 1202)
Adding annotations to merged single cells for 20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP!
(1656, 1206)
Annotations have been added to 20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP and saved to /home/lippincm/4TB_A/live_cell_timelapse_apoptosis/5.process_CP_features/data/1.annotated_data/endpoint/C-02_F0003_sc.parquet
(1656, 1206)
