### Import Libraries

In [1]:
import pathlib
import math
import uuid
import importlib
import itertools

import pandas as pd
from pycytominer.cyto_utils import DeepProfiler_processing

merge_utils = importlib.import_module("merge-utils")

### Set Load/Save Paths

In [2]:
# paths to load features/index from
cp_features_save_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-CP/"
)
dp_features_save_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-DP/outputs/efn_pretrained/features"
)
dp_index_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-DP/inputs/metadata/index.csv"
)

# path to save merged features to
merged_features_save_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-merged/"
)
merged_features_save_path.mkdir(parents=True, exist_ok=True)

### Merge Features

In [3]:
for cp_output_path in cp_features_save_path.iterdir():
    plate = cp_output_path.name
    print(f"Merging plate {plate} ...")
    
    # REMOVE LATER
    merged_plate_single_cells_save_path = pathlib.Path(f"{merged_features_save_path}/{plate}-merged-single-cell.csv")
    if merged_plate_single_cells_save_path.is_file():
        continue
    
    # list where all merged image data will compiled
    merged_plate_single_cells = []
    
    # load single cell dataframe for CP data
    print("Loading CP features...")
    cp_plate_single_cells = merge_utils.load_cp_feature_data(cp_output_path, plate)
    
    # load single cell dataframe for DP data
    print("Loading DP features...")
    deep_data = DeepProfiler_processing.DeepProfilerData(
        dp_index_path, dp_features_save_path, filename_delimiter="/"
    )
    # get dp index for plate we are interested in
    deep_data.index_df = deep_data.index_df.loc[
        deep_data.index_df["Metadata_Plate"] == plate
    ]
    # convert site to int (instead of string beginning with 0) so DeepProfiler_processing can find output
    deep_data.index_df["Metadata_Site"] = pd.to_numeric(deep_data.index_df["Metadata_Site"])
    deep_single_cell = DeepProfiler_processing.SingleCellDeepProfiler(deep_data)
    dp_plate_single_cells = deep_single_cell.get_single_cells(output=True)
    
    # iterate through each image in the plate (unique image for each plate, well, site combination)
    print("Merging features...")
    wells = dp_plate_single_cells["Metadata_Well"].unique()
    sites = dp_plate_single_cells["Metadata_Site"].unique()
    
    # REMOVE LATER
    # count = 0
    
    for well, site in itertools.product(wells, sites):
        
        # find single cell data for the well, site combination
        cp_image_single_cells = cp_plate_single_cells.loc[
                (cp_plate_single_cells["Metadata_Well"] == well)
                & (cp_plate_single_cells["Metadata_Site"] == site)
            ]
        dp_image_single_cells = dp_plate_single_cells.loc[
            (dp_plate_single_cells["Metadata_Well"] == well)
            & (dp_plate_single_cells["Metadata_Site"] == site)
        ]
        
        # get the merged single-cell image data and add this to the merged plate data
        merged_image_data = merge_utils.merge_CP_DP_image_data(cp_image_single_cells, dp_image_single_cells)
        merged_plate_single_cells.append(merged_image_data)
        
        # count += 1
        # if count >5:
        #     break
    
    # combine all merged image data into one dataframe for the entire plate
    merged_plate_single_cells = pd.concat(merged_plate_single_cells).reset_index(drop=True)
    
    # compress and save merged single-cell data
    # print(f"Saving merged features...")
    # merged_plate_single_cells_save_path = pathlib.Path(f"{merged_features_save_path}/{plate}-merged-single-cell.csv.gz")
    # merged_plate_single_cells.to_csv(merged_plate_single_cells_save_path, compression="gzip", index=False)
    
    # save merged single-cell data REMOVE LATER AND USED COMPRESSED VERSION ABOVE
    print(f"Saving merged features...")
    merged_plate_single_cells_save_path = pathlib.Path(f"{merged_features_save_path}/{plate}-merged-single-cell.csv")
    merged_plate_single_cells.to_csv(merged_plate_single_cells_save_path, index=False)
    
    break

Merging plate SQ00014617 ...
Merging plate SQ00014615 ...
Loading CP features...
Loading DP features...
Merging features...
Saving merged features...


In [4]:
merged_plate_single_cells

Unnamed: 0,Location_Center_X,Location_Center_Y,Metadata_Site,Metadata_Well,Metadata_Plate,Metadata_Plate_Map_Name,Metadata_Reagent,CP__AreaShape_Area,CP__AreaShape_BoundingBoxArea,CP__AreaShape_BoundingBoxMaximum_X,...,DP__efficientnet_1270,DP__efficientnet_1271,DP__efficientnet_1272,DP__efficientnet_1273,DP__efficientnet_1274,DP__efficientnet_1275,DP__efficientnet_1276,DP__efficientnet_1277,DP__efficientnet_1278,DP__efficientnet_1279
0,1815.609682,43.198714,4,G18,SQ00014615,SQ00014615_G18_04,ARID1B-2,2954,3886,1851,...,-0.104212,-0.025121,-0.178093,0.240942,-0.232407,-0.144154,-0.090417,-0.113274,-0.150408,0.943930
1,203.655413,54.296624,4,G18,SQ00014615,SQ00014615_G18_04,ARID1B-2,3732,5175,240,...,-0.076232,-0.191903,-0.166096,0.445674,-0.180844,-0.029980,-0.027969,-0.106613,-0.169251,1.022603
2,1263.738986,75.333333,4,G18,SQ00014615,SQ00014615_G18_04,ARID1B-2,3609,4536,1295,...,-0.143050,-0.096522,-0.141671,0.928452,-0.189959,0.318413,-0.104093,-0.163004,-0.222072,0.986999
3,740.074125,68.358656,4,G18,SQ00014615,SQ00014615_G18_04,ARID1B-2,2172,2842,770,...,-0.100481,-0.215816,-0.183816,0.606042,-0.214819,0.286931,-0.057575,-0.159681,-0.200404,1.549036
4,286.182737,88.632970,4,G18,SQ00014615,SQ00014615_G18_04,ARID1B-2,2572,3551,312,...,-0.075819,-0.179028,-0.190360,0.491413,-0.192930,0.003588,-0.038575,-0.099177,-0.138818,0.908169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
774088,416.805636,2085.631579,9,M10,SQ00014615,SQ00014615_M10_09,ERBB3-1,2413,3584,450,...,-0.103577,0.215974,-0.206824,0.096683,-0.187200,-0.125331,-0.102619,-0.116361,-0.188099,1.660888
774089,776.992817,2100.031657,9,M10,SQ00014615,SQ00014615_M10_09,ERBB3-1,3759,5040,809,...,-0.081855,0.362170,-0.197374,0.194708,-0.246326,0.200861,-0.074231,-0.163784,-0.129421,0.740021
774090,532.853734,2084.372594,9,M10,SQ00014615,SQ00014615_M10_09,ERBB3-1,2598,3337,569,...,-0.090097,-0.026653,-0.216974,0.221559,-0.211150,0.032164,-0.084775,-0.103802,-0.136444,1.042943
774091,1218.812360,2098.119436,9,M10,SQ00014615,SQ00014615_M10_09,ERBB3-1,3123,4221,1251,...,-0.035353,-0.097892,-0.194749,-0.106472,-0.195513,-0.163299,-0.066284,-0.166655,-0.145749,1.082021
