### Import Libraries

In [1]:
import pathlib
import math
import uuid
import importlib
import itertools

import pandas as pd
from pycytominer.cyto_utils import DeepProfiler_processing

merge_utils = importlib.import_module("merge-utils")

### Set Load/Save Paths

In [2]:
# paths to load features/index from
cp_features_save_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-CP/"
)
dp_features_save_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-DP/outputs/efn_pretrained/features"
)
dp_index_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-DP/inputs/metadata/index.csv"
)

# path to save merged features to
merged_features_save_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-merged/"
)
merged_features_save_path.mkdir(parents=True, exist_ok=True)

### Merge Features

In [3]:
for cp_output_path in cp_features_save_path.iterdir():
    plate = cp_output_path.name
    print(f"Merging plate {plate} ...")
    
    # list where all merged image data will compiled
    merged_plate_single_cells = []
    
    # load single cell dataframe for CP data
    print("Loading CP features...")
    cp_plate_single_cells = merge_utils.load_cp_feature_data(cp_output_path, plate)
    
    # load single cell dataframe for DP data
    print("Loading DP features...")
    deep_data = DeepProfiler_processing.DeepProfilerData(
        dp_index_path, dp_features_save_path, filename_delimiter="/"
    )
    # get dp index for plate we are interested in
    deep_data.index_df = deep_data.index_df.loc[
        deep_data.index_df["Metadata_Plate"] == plate
    ]
    # convert site to int (instead of string beginning with 0) so DeepProfiler_processing can find output
    deep_data.index_df["Metadata_Site"] = pd.to_numeric(deep_data.index_df["Metadata_Site"])
    deep_single_cell = DeepProfiler_processing.SingleCellDeepProfiler(deep_data)
    dp_plate_single_cells = deep_single_cell.get_single_cells(output=True)
    
    # iterate through each image in the plate (unique image for each plate, well, site combination)
    print("Merging features...")
    wells = dp_plate_single_cells["Metadata_Well"].unique()
    sites = dp_plate_single_cells["Metadata_Site"].unique()
    
    for well, site in itertools.product(wells, sites):
        
        # find single cell data for the well, site combination
        cp_image_single_cells = cp_plate_single_cells.loc[
                (cp_plate_single_cells["Metadata_Well"] == well)
                & (cp_plate_single_cells["Metadata_Site"] == site)
            ]
        dp_image_single_cells = dp_plate_single_cells.loc[
            (dp_plate_single_cells["Metadata_Well"] == well)
            & (dp_plate_single_cells["Metadata_Site"] == site)
        ]
        
        # get the merged single-cell image data and add this to the merged plate data
        merged_image_data = merge_utils.merge_CP_DP_image_data(cp_image_single_cells, dp_image_single_cells)
        merged_plate_single_cells.append(merged_image_data)
    
    # combine all merged image data into one dataframe for the entire plate
    merged_plate_single_cells = pd.concat(merged_plate_single_cells).reset_index(drop=True)
    
    # compress and save merged single-cell data
    # merged_plate_single_cells_save_path = pathlib.Path(f"{merged_features_save_path}/{plate}-merged-single-cell.csv.gz")
    # merged_plate_single_cells.to_csv(merged_plate_single_cells_save_path, compression="gzip", index=False)
    
    # save merged single-cell data REMOVE LATER AND USED COMPRESSED VERSION ABOVE
    merged_plate_single_cells_save_path = pathlib.Path(f"{merged_features_save_path}/{plate}-merged-single-cell.csv")
    merged_plate_single_cells.to_csv(merged_plate_single_cells_save_path, index=False)
    
    # stop after one plate REMOVE LATER
    break


Merging plate SQ00014617 ...
Loading CP features...
Loading DP features...


In [4]:
merged_plate_single_cells

Unnamed: 0,Location_Center_X,Location_Center_Y,Metadata_Cell_UUID,Metadata_Plate,Metadata_Site,Metadata_Well,Metadata_Plate_Map_Name,Metadata_Reagent,CP__AreaShape_Area,CP__AreaShape_BoundingBoxArea,...,DP__efficientnet_1270,DP__efficientnet_1271,DP__efficientnet_1272,DP__efficientnet_1273,DP__efficientnet_1274,DP__efficientnet_1275,DP__efficientnet_1276,DP__efficientnet_1277,DP__efficientnet_1278,DP__efficientnet_1279
0,646.773973,4.075342,63b7aca5-b627-44f6-b3fc-6b44fff1e5b0,SQ00014617,4,G18,SQ00014617_G18_04,ARID1B-2,146,216,...,-0.040406,-0.043776,-0.198162,-0.100171,-0.038737,-0.119610,-0.087806,-0.246079,-0.113472,1.177017
1,141.984490,42.527520,054eb489-1013-4030-adaa-a005a5dc8e33,SQ00014617,4,G18,SQ00014617_G18_04,ARID1B-2,5287,7242,...,0.027649,-0.190173,-0.157589,-0.174300,-0.209957,-0.188459,-0.072498,-0.143237,-0.155022,0.704835
2,1661.244888,59.342876,a48e08d3-2d98-4ee3-859a-8e6c8e49cb17,SQ00014617,4,G18,SQ00014617_G18_04,ARID1B-2,6113,8424,...,-0.119423,-0.218592,-0.159793,0.054443,-0.252569,-0.057034,-0.112069,-0.121480,-0.201976,0.843459
3,1067.377054,73.602479,08ac8c53-461e-40cf-ab39-e3a612c29209,SQ00014617,4,G18,SQ00014617_G18_04,ARID1B-2,3469,4560,...,-0.121192,0.004875,-0.103759,0.232781,-0.113656,0.092030,-0.065040,-0.189433,-0.171040,1.347896
4,1305.941113,72.588061,e4cb3cbb-bde0-4b27-b3b6-acff8b48c246,SQ00014617,4,G18,SQ00014617_G18_04,ARID1B-2,3702,6156,...,-0.088266,-0.173519,-0.191016,-0.124935,-0.247238,0.367001,-0.080280,-0.129818,-0.130432,1.106908
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
881594,724.760215,2105.157465,c3fbcc4e-ebf4-40cf-a58a-6cffede03d3f,SQ00014617,9,M10,SQ00014617_M10_09,ERBB3-1,5017,6693,...,-0.078183,0.811622,-0.021348,-0.086247,-0.229680,-0.173440,-0.054408,-0.179359,-0.128939,1.731972
881595,1444.983078,2100.191460,6699f147-782f-46a5-b746-9912814d5c3d,SQ00014617,9,M10,SQ00014617_M10_09,ERBB3-1,5082,7802,...,-0.086288,0.613332,-0.050642,-0.089329,-0.238007,-0.178470,-0.050246,-0.186259,-0.062235,0.893341
881596,974.921581,2110.330797,282863f7-4883-4e28-a318-7c4cd8a3bb2b,SQ00014617,9,M10,SQ00014617_M10_09,ERBB3-1,4731,6160,...,-0.117888,0.452052,-0.175615,0.324218,-0.206157,0.002648,-0.090842,-0.115012,-0.172842,0.926185
881597,1317.891086,2113.210241,5140f1fb-1c1f-43d2-bbbe-fe471cb8ade6,SQ00014617,9,M10,SQ00014617_M10_09,ERBB3-1,3691,5214,...,-0.104560,0.750099,-0.029916,-0.034221,-0.084939,-0.175909,-0.079876,-0.174425,-0.055805,1.113946


In [6]:
merged_plate_single_cells_save_path = pathlib.Path(f"{merged_features_save_path}/{plate}-merged-single-cell.csv")
merged_plate_single_cells.to_csv(merged_plate_single_cells_save_path, index=False)