### Import Libraries

In [1]:
import pathlib
import math
import uuid
import importlib
import itertools

import pandas as pd
from pycytominer.cyto_utils import DeepProfiler_processing

merge_utils = importlib.import_module("merge-utils")

### Set Load/Save Paths

In [2]:
# paths to load features/index from
cp_features_save_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-CP/"
)
dp_features_save_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-DP/outputs/efn_pretrained/features"
)
dp_index_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-DP/inputs/metadata/index.csv"
)

# path to save merged features to
merged_features_save_path = pathlib.Path(
    "/media/roshankern/63af2010-c376-459e-a56e-576b170133b6/data/cell-health-nuc-merged/"
)
merged_features_save_path.mkdir(parents=True, exist_ok=True)

### Merge Features

In [3]:
for cp_output_path in cp_features_save_path.iterdir():
    plate = cp_output_path.name
    print(f"Merging plate {plate} ...")
    
    # REMOVE LATER
    merged_plate_single_cells_save_path = pathlib.Path(f"{merged_features_save_path}/{plate}-merged-single-cell.csv")
    if merged_plate_single_cells_save_path.is_file():
        continue
    
    # list where all merged image data will compiled
    merged_plate_single_cells = []
    
    # load single cell dataframe for CP data
    print("Loading CP features...")
    cp_plate_single_cells = merge_utils.load_cp_feature_data(cp_output_path, plate)
    
    # load single cell dataframe for DP data
    print("Loading DP features...")
    deep_data = DeepProfiler_processing.DeepProfilerData(
        dp_index_path, dp_features_save_path, filename_delimiter="/"
    )
    # get dp index for plate we are interested in
    deep_data.index_df = deep_data.index_df.loc[
        deep_data.index_df["Metadata_Plate"] == plate
    ]
    # convert site to int (instead of string beginning with 0) so DeepProfiler_processing can find output
    deep_data.index_df["Metadata_Site"] = pd.to_numeric(deep_data.index_df["Metadata_Site"])
    deep_single_cell = DeepProfiler_processing.SingleCellDeepProfiler(deep_data)
    dp_plate_single_cells = deep_single_cell.get_single_cells(output=True)
    
    # iterate through each image in the plate (unique image for each plate, well, site combination)
    print("Merging features...")
    wells = dp_plate_single_cells["Metadata_Well"].unique()
    sites = dp_plate_single_cells["Metadata_Site"].unique()
    
    # REMOVE LATER
    count = 0
    
    for well, site in itertools.product(wells, sites):
        
        # find single cell data for the well, site combination
        cp_image_single_cells = cp_plate_single_cells.loc[
                (cp_plate_single_cells["Metadata_Well"] == well)
                & (cp_plate_single_cells["Metadata_Site"] == site)
            ]
        dp_image_single_cells = dp_plate_single_cells.loc[
            (dp_plate_single_cells["Metadata_Well"] == well)
            & (dp_plate_single_cells["Metadata_Site"] == site)
        ]
        
        # get the merged single-cell image data and add this to the merged plate data
        merged_image_data = merge_utils.merge_CP_DP_image_data(cp_image_single_cells, dp_image_single_cells)
        merged_plate_single_cells.append(merged_image_data)
        
        count += 1
        if count >5:
            break
    
    # combine all merged image data into one dataframe for the entire plate
    merged_plate_single_cells = pd.concat(merged_plate_single_cells).reset_index(drop=True)
    
    # compress and save merged single-cell data
    # print(f"Saving merged features...")
    # merged_plate_single_cells_save_path = pathlib.Path(f"{merged_features_save_path}/{plate}-merged-single-cell.csv.gz")
    # merged_plate_single_cells.to_csv(merged_plate_single_cells_save_path, compression="gzip", index=False)
    
    # save merged single-cell data REMOVE LATER AND USED COMPRESSED VERSION ABOVE
    print(f"Saving merged features...")
    merged_plate_single_cells_save_path = pathlib.Path(f"{merged_features_save_path}/{plate}-merged-single-cell.csv")
    merged_plate_single_cells.to_csv(merged_plate_single_cells_save_path, index=False)
    
    break

Merging plate SQ00014617 ...
Loading CP features...
Loading DP features...
Merging features...
Saving merged features...


In [4]:
merged_plate_single_cells

Unnamed: 0,Location_Center_X,Location_Center_Y,Metadata_Cell_UUID,Metadata_Site,Metadata_Well,Metadata_Plate,Metadata_Plate_Map_Name,Metadata_Reagent,CP__AreaShape_Area,CP__AreaShape_BoundingBoxArea,...,DP__efficientnet_1270,DP__efficientnet_1271,DP__efficientnet_1272,DP__efficientnet_1273,DP__efficientnet_1274,DP__efficientnet_1275,DP__efficientnet_1276,DP__efficientnet_1277,DP__efficientnet_1278,DP__efficientnet_1279
0,646.773973,4.075342,7e4c9500-6a62-44c0-94a8-627101437637,4,G18,SQ00014617,SQ00014617_G18_04,ARID1B-2,146,216,...,-0.040406,-0.043776,-0.198162,-0.100171,-0.038737,-0.119610,-0.087806,-0.246079,-0.113472,1.177017
1,141.984490,42.527520,983e4aed-5bde-44e6-99f4-88a7cfbbbc44,4,G18,SQ00014617,SQ00014617_G18_04,ARID1B-2,5287,7242,...,0.027649,-0.190173,-0.157589,-0.174300,-0.209957,-0.188459,-0.072498,-0.143237,-0.155022,0.704835
2,1661.244888,59.342876,3e14db6b-6bf8-4c75-a339-d5d28cbfdee2,4,G18,SQ00014617,SQ00014617_G18_04,ARID1B-2,6113,8424,...,-0.119423,-0.218592,-0.159793,0.054443,-0.252569,-0.057034,-0.112069,-0.121480,-0.201976,0.843459
3,1067.377054,73.602479,d525a18f-8a14-4925-b383-2adefabb0f93,4,G18,SQ00014617,SQ00014617_G18_04,ARID1B-2,3469,4560,...,-0.121192,0.004875,-0.103759,0.232781,-0.113656,0.092030,-0.065040,-0.189433,-0.171040,1.347896
4,1305.941113,72.588061,c9c4e64b-08cf-4b44-98a3-f174cc00c3e7,4,G18,SQ00014617,SQ00014617_G18_04,ARID1B-2,3702,6156,...,-0.088266,-0.173519,-0.191016,-0.124935,-0.247238,0.367001,-0.080280,-0.129818,-0.130432,1.106908
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1395,343.307203,2017.323319,24df339f-2923-40b0-85f7-68d5159b7bbf,5,G18,SQ00014617,SQ00014617_G18_05,ARID1B-2,5026,6723,...,-0.148674,-0.139006,-0.143114,0.425970,-0.200837,0.178944,-0.071196,-0.089703,-0.201842,0.394211
1396,690.239186,2030.182619,5ce5e5a5-a4cd-4dd8-aa4c-0d9f01e54ba0,5,G18,SQ00014617,SQ00014617_G18_05,ARID1B-2,5109,6720,...,-0.140892,-0.110732,-0.132020,0.532700,-0.198494,0.820284,-0.075696,-0.098761,-0.157622,0.914479
1397,1191.724266,2072.115130,a4484121-0065-4c19-92b4-01acf9152b6d,5,G18,SQ00014617,SQ00014617_G18_05,ARID1B-2,4838,6968,...,-0.111961,0.179153,-0.214572,0.170579,-0.221362,0.477103,-0.084367,-0.131585,-0.086671,1.367961
1398,615.209564,2103.762131,8aacb2a3-24e3-455b-af9a-9ddc48adc219,5,G18,SQ00014617,SQ00014617_G18_05,ARID1B-2,5688,7700,...,-0.076274,-0.096603,-0.189981,0.936524,-0.182274,0.761809,-0.057677,-0.121445,-0.177512,1.377887
