In [1]:
import pathlib
import sys

import lancedb
import pandas as pd
from pycytominer.cyto_utils import output

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load in platemap file as a pandas dataframe
platemap_path = pathlib.Path("../../data/").resolve()

# directory where the annotated parquet files are saved to
input_dir = pathlib.Path("../data/annotated_data")
input_dir.mkdir(exist_ok=True)

# directory for the output combined files
output_dir = pathlib.Path("../data/combined_terminal_data")
output_dir.mkdir(exist_ok=True)

In [3]:
# dictionary with each run for the cell type
dict_of_inputs = {
    "run_20231017ChromaLive_6hr_4ch_MaxIP": {
        "source_path": pathlib.Path(
            f"{input_dir}/run_20231017ChromaLive_6hr_4ch_MaxIP_sc.parquet"
        ).resolve(strict=True),
        # same file name but different path
        "output_path": pathlib.Path(
            f"{output_dir}/20231017ChromaLive_6hr_4ch_MaxIP_sc.parquet"
        ).resolve(),
    },
    "20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP": {
        "source_path": pathlib.Path(
            f"{input_dir}/20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP_sc.parquet"
        ).resolve(strict=True),
        "output_path": pathlib.Path(
            f"{output_dir}/20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP_sc.parquet"
        ).resolve(),
    },
}

### load the database into the memory

In [6]:
# set and connect to the db
# create the database object
uri = pathlib.Path("../../data/objects_db").resolve()
db = lancedb.connect(uri)
# get the db schema and tables
db.table_names()
# load table
table = db["1.masked_images"]
location_metadata_df = table.to_pandas()
print(location_metadata_df.shape)
location_metadata_df.head()
# change frame to Metadata_Time
location_metadata_df.rename(columns={"frame": "Metadata_Time"}, inplace=True)
# add 1 to Metadata_Time to match the timepoints in the single cell data
location_metadata_df["Metadata_Time"] = location_metadata_df["Metadata_Time"] + 1
# change formatting to leading 4 zeros
location_metadata_df["Metadata_Time"] = location_metadata_df["Metadata_Time"].apply(
    lambda x: f"{x:04}"
)
print(location_metadata_df.shape)
location_metadata_df.head()

(7223, 8)
(7223, 8)


Unnamed: 0,image_set_name,Metadata_Time,object_id,x,y,mask_path,mask_file_name,mask_file_path
0,E-11_F0002,1,1,211.914886,15.531915,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...
1,E-11_F0002,1,2,1818.947388,30.526316,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...
2,E-11_F0002,1,3,1477.800049,49.0,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...
3,E-11_F0002,1,4,529.268311,59.756096,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...
4,E-11_F0002,1,5,1143.46936,88.67347,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...,0.png,/gpfs/alpine1/scratch/mlippincott@xsede.org/li...


In [8]:
location_metadata_df["Metadata_Time"].unique()

array(['0001', '0002', '0003', '0004', '0005', '0006', '0007', '0008',
       '0009', '0010', '0011', '0012', '0013'], dtype=object)

In [7]:
# split the dataframes by terminal time and non-terminal time
terminal_location_metadata_df = location_metadata_df.loc[
    location_metadata_df["Metadata_Time"] == "0014"
]
print(terminal_location_metadata_df.shape)

(0, 8)


### Merge the terminal and single cell data

In [4]:
main_df = pd.read_parquet(
    dict_of_inputs["run_20231017ChromaLive_6hr_4ch_MaxIP"]["source_path"]
)
terminal_df = pd.read_parquet(
    dict_of_inputs["20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP"]["source_path"]
)

print(main_df.shape)
print(terminal_df.shape)

(19382, 2325)
(838, 1208)


In [5]:
terminal_df

Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,Nuclei_Texture_SumVariance_DNA_3_02_256,Nuclei_Texture_SumVariance_DNA_3_03_256,Nuclei_Texture_Variance_AnnexinV_3_00_256,Nuclei_Texture_Variance_AnnexinV_3_01_256,Nuclei_Texture_Variance_AnnexinV_3_02_256,Nuclei_Texture_Variance_AnnexinV_3_03_256,Nuclei_Texture_Variance_DNA_3_00_256,Nuclei_Texture_Variance_DNA_3_01_256,Nuclei_Texture_Variance_DNA_3_02_256,Nuclei_Texture_Variance_DNA_3_03_256
0,1,C-02,125,Staurosporine,0.00,negative,3,0003,0014,1,...,0.000000,0.000000,0.144326,0.109303,0.144046,0.145811,0.000000,0.000000,0.000000,0.000000
1,1,C-02,125,Staurosporine,0.00,negative,3,0003,0014,2,...,0.000000,0.000000,0.072704,0.079166,0.094317,0.070217,0.000000,0.000000,0.000000,0.000000
2,1,C-02,125,Staurosporine,0.00,negative,3,0003,0014,3,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,1,C-02,125,Staurosporine,0.00,negative,3,0003,0014,4,...,,,,,,,,,,
4,1,C-02,125,Staurosporine,0.00,negative,3,0003,0014,5,...,0.000000,0.000000,0.878675,0.923611,0.904514,0.953495,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
833,1,E-11,69,Staurosporine,156.25,test,8,0004,0014,65,...,,,,,,,,,,
834,1,E-11,69,Staurosporine,156.25,test,8,0004,0014,66,...,,,,,,,,,,
835,1,E-11,69,Staurosporine,156.25,test,8,0004,0014,67,...,0.845833,0.733715,0.124223,0.129822,0.133403,0.136181,0.321454,0.309158,0.313824,0.320213
836,1,E-11,69,Staurosporine,156.25,test,8,0004,0014,68,...,4.551870,3.668281,0.000000,0.000000,0.000000,0.000000,1.300164,1.291613,1.288552,1.257302
