In [1]:
import pathlib
import sys

import lancedb
import pandas as pd
from pycytominer.cyto_utils import output

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load in platemap file as a pandas dataframe
platemap_path = pathlib.Path("../../data/").resolve()

# directory where the annotated parquet files are saved to
input_dir = pathlib.Path("../data/annotated_data")
input_dir.mkdir(exist_ok=True)

# directory for the output combined files
output_dir = pathlib.Path("../data/combined_terminal_data")
output_dir.mkdir(exist_ok=True)

In [3]:
# dictionary with each run for the cell type
dict_of_inputs = {
    "run_20231017ChromaLive_6hr_4ch_MaxIP": {
        "source_path": pathlib.Path(
            f"{input_dir}/run_20231017ChromaLive_6hr_4ch_MaxIP_sc.parquet"
        ).resolve(strict=True),
        # same file name but different path
        "output_path": pathlib.Path(
            f"{output_dir}/20231017ChromaLive_6hr_4ch_MaxIP_sc.parquet"
        ).resolve(),
    },
    "20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP": {
        "source_path": pathlib.Path(
            f"{input_dir}/20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP_sc.parquet"
        ).resolve(strict=True),
        "output_path": pathlib.Path(
            f"{output_dir}/20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP_sc.parquet"
        ).resolve(),
    },
}

### load the database into the memory

In [4]:
# set and connect to the db
# create the database object
uri = pathlib.Path("../../data/objects_db").resolve()
db = lancedb.connect(uri)
# get the db schema and tables
db.table_names()
# load table
table = db["1.masked_images"]
location_metadata_df = table.to_pandas()
print(location_metadata_df.shape)
location_metadata_df.head()
# change frame to Metadata_Time
location_metadata_df.rename(columns={"frame": "Metadata_Time"}, inplace=True)
# add 1 to Metadata_Time to match the timepoints in the single cell data
location_metadata_df["Metadata_Time"] = location_metadata_df["Metadata_Time"] + 1
# change formatting to leading 4 zeros
location_metadata_df["Metadata_Time"] = location_metadata_df["Metadata_Time"].apply(
    lambda x: f"{x:04}"
)
print(location_metadata_df.shape)
location_metadata_df.head()

(15557, 8)
(15557, 8)


Unnamed: 0,image_set_name,Metadata_Time,object_id,x,y,mask_path,mask_file_name,mask_file_path
0,E-11_F0002,1,1,120.634918,15.555555,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...
1,E-11_F0002,1,2,293.525635,23.012821,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...
2,E-11_F0002,1,3,1115.543457,20.380434,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...
3,E-11_F0002,1,4,46.547619,38.988094,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...
4,E-11_F0002,1,5,1820.597778,37.445652,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...


In [5]:
location_metadata_df["Metadata_Time"].unique()

array(['0001', '0002', '0003', '0004', '0005', '0006', '0007', '0008',
       '0009', '0010', '0011', '0012', '0013', '0014'], dtype=object)

In [6]:
# split the dataframes by terminal time and non-terminal time
terminal_location_metadata_df = location_metadata_df.loc[
    location_metadata_df["Metadata_Time"] == "0014"
]
print(terminal_location_metadata_df.shape)

(675, 8)


### Merge the terminal and single cell data

In [7]:
main_df = pd.read_parquet(
    dict_of_inputs["run_20231017ChromaLive_6hr_4ch_MaxIP"]["source_path"]
)
terminal_df = pd.read_parquet(
    dict_of_inputs["20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP"]["source_path"]
)

print(main_df.shape)
print(terminal_df.shape)

(19382, 2325)
(5916, 2326)


In [12]:
main_df.head()
main_df["Metadata_object_id"].unique()

KeyError: 'Metadata_object_id'

In [8]:
terminal_df.head()

Unnamed: 0,Metadata_image_set_name,Metadata_Time,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,...,Nuclei_Texture_Variance_CL_488_2_3_02_256,Nuclei_Texture_Variance_CL_488_2_3_03_256,Nuclei_Texture_Variance_CL_561_3_00_256,Nuclei_Texture_Variance_CL_561_3_01_256,Nuclei_Texture_Variance_CL_561_3_02_256,Nuclei_Texture_Variance_CL_561_3_03_256,Nuclei_Texture_Variance_DNA_3_00_256,Nuclei_Texture_Variance_DNA_3_01_256,Nuclei_Texture_Variance_DNA_3_02_256,Nuclei_Texture_Variance_DNA_3_03_256
0,C-02_F0001,1,1304,30,1,C-02,162,Staurosporine,0.0,negative,...,0.0,0.0,0.279091,0.274927,0.275532,0.280752,0.0,0.0,0.0,0.0
1,C-02_F0001,1,1171,96,1,C-02,162,Staurosporine,0.0,negative,...,0.004525,0.007092,0.121901,0.120757,0.125966,0.121308,0.0,0.0,0.0,0.0
2,C-02_F0001,1,1679,95,1,C-02,162,Staurosporine,0.0,negative,...,0.0,0.0,0.004073,0.004016,0.004739,0.003484,0.0,0.0,0.0,0.0
3,C-02_F0001,1,553,87,1,C-02,162,Staurosporine,0.0,negative,...,3.050327,3.247508,0.002387,0.002353,0.018176,0.0,0.0,0.0,0.0,0.0
4,C-02_F0001,1,830,25,1,C-02,162,Staurosporine,0.0,negative,...,0.0,0.0,0.024375,0.028145,0.031217,0.014703,0.0,0.0,0.0,0.0
