In [1]:
from faim_hcs.hcs.Experiment import Experiment
from faim_hcs.records.PlateRecord import PlateRecord
from faim_hcs.records.WellRecord import WellRecord
from faim_hcs.records.OrganoidRecord import OrganoidRecord

from glob import glob

import os
from os.path import join

from os.path import isdir, dirname, split, basename, splitext, exists
from tqdm.notebook import tqdm

import re

import pandas as pd

from skimage.measure import regionprops, label
from skimage.morphology import binary_erosion
import numpy as np
import math 
import copy

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

## User input

In [10]:
# Load an existing faim-hcs Experiment from disk. 
exp = Experiment()
exp.load('/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220528_GCPLEX_redo/20220507GCPLEX_R0/summary.csv')



## Merge organoid features into single df

In [14]:
#pool together regionprops_org files
exp.only_iterate_over_wells(False)
exp.reset_iterator()

#create list of dataframes from all well and organoids
org_feat_df_list =[] #length of list is #plates * # wells * #channels * # organoids/well

#add nuclear counts as well, if there is a nuc seg. this is only raw counts, before filtering or nuc/mem linking!
for organoid in exp:
    nuc_count = 0 #default is 0
    try:
        n = organoid.get_measurement('regionprops_nuc_C01')
    except Exception as e:
        print(organoid.organoid_id, organoid.well.well_id, organoid.well.plate.plate_id, 
              "missing nuc seg", e) #usually exception is that no nuc were detected so csv is empty
    else: #if there is no error, count number of nuclei in organoid
        if n is not None:
            nuc_count = n.shape[0]
        
    for meas_name in [k for k,v in organoid.measurements.items() if k.startswith('regionprops_org')]:
        m = organoid.get_measurement(meas_name)
        m["nuc_count"] = nuc_count #add nuc count to measurement
        org_feat_df_list.append(m)

org_feat_df = pd.concat(org_feat_df_list, ignore_index=True, sort=False)


object_58 B03 day2p5 missing nuc seg No columns to parse from file
object_86 B03 day2p5 missing nuc seg No columns to parse from file
object_43 B03 day2p5 missing nuc seg No columns to parse from file
object_75 B03 day2p5 missing nuc seg No columns to parse from file
object_33 B03 day2p5 missing nuc seg No columns to parse from file
object_39 B03 day2p5 missing nuc seg No columns to parse from file
object_23 B03 day2p5 missing nuc seg No columns to parse from file
object_70 B03 day2p5 missing nuc seg No columns to parse from file
object_76 B03 day2p5 missing nuc seg No columns to parse from file
object_83 B03 day2p5 missing nuc seg No columns to parse from file
object_53 B03 day2p5 missing nuc seg No columns to parse from file
object_26 B03 day2p5 missing nuc seg No columns to parse from file
object_13 B05 day2p5 missing nuc seg No columns to parse from file
object_65 B05 day2p5 missing nuc seg No columns to parse from file
object_75 B05 day2p5 missing nuc seg No columns to parse from 

Measurment regionprops_nuc_C01 does not exist in object_13:[].
Measurment regionprops_nuc_C01 does not exist in object_12:[].
Measurment regionprops_nuc_C01 does not exist in object_57:[].
Measurment regionprops_nuc_C01 does not exist in object_49:[].
Measurment regionprops_nuc_C01 does not exist in object_60:[].
Measurment regionprops_nuc_C01 does not exist in object_35:[].
Measurment regionprops_nuc_C01 does not exist in object_54:[].
Measurment regionprops_nuc_C01 does not exist in object_46:[].
Measurment regionprops_nuc_C01 does not exist in object_58:[].
Measurment regionprops_nuc_C01 does not exist in object_17:[].
Measurment regionprops_nuc_C01 does not exist in object_43:[].
Measurment regionprops_nuc_C01 does not exist in object_32:[].
Measurment regionprops_nuc_C01 does not exist in object_11:[].
Measurment regionprops_nuc_C01 does not exist in object_47:[].
Measurment regionprops_nuc_C01 does not exist in object_55:[].
Measurment regionprops_nuc_C01 does not exist in object

object_7 B03 day3p5 missing nuc seg No columns to parse from file
object_65 B05 day3p5 missing nuc seg No columns to parse from file
object_30 B05 day3p5 missing nuc seg No columns to parse from file
object_80 B04 day3p5 missing nuc seg No columns to parse from file
object_33 B04 day3p5 missing nuc seg No columns to parse from file
object_31 B04 day3p5 missing nuc seg No columns to parse from file
object_71 B04 day3p5 missing nuc seg No columns to parse from file
object_15 B06 day3p5 missing nuc seg No columns to parse from file
object_13 B03 day2 missing nuc seg No columns to parse from file
object_12 B03 day2 missing nuc seg No columns to parse from file
object_27 B03 day2 missing nuc seg No columns to parse from file
object_69 B03 day2 missing nuc seg No columns to parse from file
object_99 B03 day2 missing nuc seg No columns to parse from file
object_48 B03 day2 missing nuc seg No columns to parse from file
object_37 B03 day2 missing nuc seg No columns to parse from file
object_55 

In [15]:
#pool together regionprops_ovr files
exp.only_iterate_over_wells(True)
exp.reset_iterator()

#create list of ovr dataframes from all wells
org_ovr_df_list =[] #length of list is # plates * # wells

for well in exp:
    for meas_name in [k for k,v in well.measurements.items() if k.startswith('regionprops_ovr')]:
        m = well.get_measurement(meas_name)
        org_ovr_df_list.append(m)

org_ovr_df = pd.concat(org_ovr_df_list, ignore_index=True, sort=False)

In [16]:
# merge all org and ovr features into single df, includes all plates, wells, and organoids

org_df = pd.merge(org_feat_df, org_ovr_df, how="left", on=["hcs_experiment", "plate_id", "well_id", "organoid_id" ])
org_df = org_df.sort_values(by=["hcs_experiment", "root_dir", "plate_id", "well_id", "org_label", "channel_id"])

#add .fillna('NaN') at the end?

org_df.to_csv(join(exp.get_experiment_dir(),"org_df.csv"), index=False) #saves csv


## Merge linking of organoids into single df - all rounds

In [19]:
# pool together organoid linking files
exp.only_iterate_over_wells(True)
exp.reset_iterator()

#create dictionary of lists containing linking files for each round
org_link_df_dict ={} #each key in dictionary is a round that was linked to R0

#initialize dictionary
for well in exp:
    linkMeasurements = [k for k,v in well.measurements.items() if k.startswith('linking_ovr')]
    
    # if no linking in folder, skip
    if not linkMeasurements:
        continue
    
    for meas_name in linkMeasurements:
        rnd = meas_name[-6:-4] # select the round name that was linked to R0
        org_link_df_dict[rnd] = [] #add to dictionary as empty list

#add linking files to dictionary
exp.only_iterate_over_wells(True)
exp.reset_iterator()
for well in exp:
    linkMeasurements = [k for k,v in well.measurements.items() if k.startswith('linking_ovr')]
    
    # if no linking in folder, skip
    if not linkMeasurements:
        continue
    
    for meas_name in linkMeasurements:
        m = well.get_measurement(meas_name)
        rnd = meas_name[-6:-4]
        org_link_df_dict[rnd].append(m)

# if there is organoid linking, concatenate and save csv
if not len(org_link_df_dict) == 0:
    for key in org_link_df_dict:
        link_df = pd.concat(org_link_df_dict[key], ignore_index=True, sort=False)
        link_df.to_csv(join(exp.get_experiment_dir(),("linking_org_" + key + "_df.csv")), index=False) #saves csv


## PlatyMatch linking into single df

In [11]:
# pool together organoid linking files
exp.only_iterate_over_wells(False)
exp.reset_iterator()

#create dictionary of lists containing linking files for each round
nuc_link_df_dict ={} #each key in dictionary is a round that was linked to R0

#initialize dictionary, nested dictionary for each round
for organoid in exp:
    linkMeasurements = [k for k,v in organoid.measurements.items() if k.startswith('linking_nuc_ffd')]
    
    # if no linking in organoid, skip
    if not linkMeasurements:
        continue
    
    for meas_name in linkMeasurements:
        rnd = meas_name[-6:-4] # select the round name that was linked to R0
        nuc_link_df_dict[rnd] = [] #add to dictionary as empty list

#add linking files to dictionary
exp.only_iterate_over_wells(False)
exp.reset_iterator()
for organoid in exp:
    linkMeasurements = [k for k,v in organoid.measurements.items() if k.startswith('linking_nuc_ffd')]
    
    # if no linking in organoid, skip
    if not linkMeasurements:
        continue
    
    for meas_name in linkMeasurements:
        m = organoid.get_measurement(meas_name)
        rnd = meas_name[-6:-4]
        nuc_link_df_dict[rnd].append(m)

# if there is nuc linking, concatenate and save csv
if not len(nuc_link_df_dict) == 0:
    for key in nuc_link_df_dict:
        nuc_link_df = pd.concat(nuc_link_df_dict[key], ignore_index=True, sort=False)
        nuc_link_df.to_csv(join(exp.get_experiment_dir(),("linking_nuc_" + key + "_df.csv")), index=False) #saves csv


## Merge nuclear features into single df

In [17]:
#pool together regionprops_nuc files
exp.only_iterate_over_wells(False)
exp.reset_iterator()

#create list of dataframes from all well and organoids
nuc_feat_df_list =[] #length of list is #plates * # wells * #channels * # organoids/well

for organoid in exp:
    nucMeasurements = [k for k,v in organoid.measurements.items() if k.startswith('regionprops_nuc')]
    
    # if no nuclear feature extraction in folder, skip
    if not nucMeasurements:
        continue
        
    for meas_name in nucMeasurements:
        try:
            m = organoid.get_measurement(meas_name)
        
        except Exception as e:
            print(organoid.organoid_id, organoid.well.well_id, organoid.well.plate.plate_id, e) #usually exception is that no nuc were detected so csv is empty. in this case, skip organoid
            continue 
        
        nuc_feat_df_list.append(m)


# if there are any nuclear features, concatenate and save csv
if not len(nuc_feat_df_list) == 0:
    nuc_df = pd.concat(nuc_feat_df_list, ignore_index=True, sort=False)
    nuc_df = nuc_df.sort_values(by=["hcs_experiment", "root_dir", "plate_id", "well_id", "org_label", "nuc_id", "channel_id"])
    nuc_df.to_csv(join(exp.get_experiment_dir(),"nuc_df.csv"), index=False) #saves csv


object_58 B03 day2p5 No columns to parse from file
object_58 B03 day2p5 No columns to parse from file
object_58 B03 day2p5 No columns to parse from file
object_58 B03 day2p5 No columns to parse from file
object_86 B03 day2p5 No columns to parse from file
object_86 B03 day2p5 No columns to parse from file
object_86 B03 day2p5 No columns to parse from file
object_86 B03 day2p5 No columns to parse from file
object_43 B03 day2p5 No columns to parse from file
object_43 B03 day2p5 No columns to parse from file
object_43 B03 day2p5 No columns to parse from file
object_43 B03 day2p5 No columns to parse from file
object_75 B03 day2p5 No columns to parse from file
object_75 B03 day2p5 No columns to parse from file
object_75 B03 day2p5 No columns to parse from file
object_75 B03 day2p5 No columns to parse from file
object_33 B03 day2p5 No columns to parse from file
object_33 B03 day2p5 No columns to parse from file
object_33 B03 day2p5 No columns to parse from file
object_33 B03 day2p5 No columns

## Merge membrane features into single df

In [89]:
#pool together regionprops_mem files
exp.only_iterate_over_wells(False)
exp.reset_iterator()

#create list of dataframes from all well and organoids
mem_feat_df_list =[] #length of list is #plates * # wells * #channels * # organoids/well

for organoid in exp:
    memMeasurements = [k for k,v in organoid.measurements.items() if k.startswith('regionprops_mem')]
    
    # if no nuclear feature extraction in folder, skip
    if not memMeasurements:
        continue
        
    for meas_name in memMeasurements:
        try:
            m = organoid.get_measurement(meas_name)
        
        except Exception as e:
            print(organoid.organoid_id, organoid.well.well_id, organoid.well.plate.plate_id, e) #usually exception is that no nuc were detected so csv is empty. in this case, skip organoid
            continue 
        
        mem_feat_df_list.append(m)

# if there are any nuclear features, concatenate and save csv
if not len(mem_feat_df_list) == 0:
    mem_df = pd.concat(mem_feat_df_list, ignore_index=True, sort=False)
    mem_df = mem_df.sort_values(by=["hcs_experiment", "root_dir", "plate_id", "well_id", "org_label", "mem_id", "channel_id"])
    mem_df.to_csv(join(exp.get_experiment_dir(),"mem_df.csv"), index=False) #saves csv

object_26 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_60 B03 day1p5 No columns to parse from file
object_60 B03 day1p5 No columns to parse from file
object_60 B03 day1p5 No columns to parse from file
object_60 B03 day1p5 No columns to parse from file
object_21 B03 day1p5 No columns to parse from file
object_21 B03 day1p5 No columns to parse from file
object_21 B03 day1p5 No columns to parse from file
object_21 B03 day1p5 No columns

## Link together nuclei and cells

In [118]:
# #pool together regionprops_org files
# exp.only_iterate_over_wells(False)
# exp.reset_iterator()

# #create list of dataframes from all well and organoids
# cell_feat_df_list =[] #length of list is #plates * # wells * # organoids/well

# for organoid in exp:
#     nucMeasurements = [k for k,v in organoid.measurements.items() if k.startswith('regionprops_nuc')]
#     memMeasurements = [k for k,v in organoid.measurements.items() if k.startswith('regionprops_mem')]
#     linkMeasurement = [k for k,v in organoid.measurements.items() if k.startswith('linking_nuc_to_mem')]
    
#     # if no nuclear or membrane feature extraction in folder, skip. linking file must be unique
#     if not nucMeasurements:
#         continue
#     if not memMeasurements:
#         continue
#     if not len(linkMeasurement) == 1:
#         continue
    
#     linking = organoid.get_measurement('linking_nuc_to_mem')
#     linking_dict = linking.set_index('nuc_id').T.to_dict('index')['mem_id'] #nuc id is key, membrane id is value
        
#     for meas_name in nucMeasurements:
#         channel = meas_name[-3:]
        
#         try:
#             nuc = organoid.get_measurement(meas_name)
#         except Exception as e:
#             print(organoid.organoid_id, organoid.well.well_id, organoid.well.plate.plate_id, e) #usually exception is that no nuc were detected so csv is empty. in this case, skip organoid
#             continue 
        
#         try:
#             mem = organoid.get_measurement('regionprops_mem_' + channel)
#         except Exception as e:
#             print(organoid.organoid_id, organoid.well.well_id, organoid.well.plate.plate_id, e) #usually exception is that no nuc were detected so csv is empty. in this case, skip organoid
#             continue 
            
#         #select nuclei that are matched to a membrane
#         nuc_filt = nuc.loc[nuc['nuc_id'].isin(linking_dict.keys()), :].copy(deep=True)
#         #select membranes that are matched to a nucleus
#         mem_filt = mem.loc[mem['mem_id'].isin(linking_dict.values()), :]
#         #add column to nuc frame that includes matched membrane id
#         nuc_filt['mem_id'] = nuc_filt['nuc_id'].map(linking_dict)
        
#         #load matching cell file
#         cell = pd.merge(nuc_filt, mem_filt, how="left", on=["hcs_experiment", "root_dir", "plate_id", "well_id", "channel_id", "organoid_id", "org_label", "mem_id"], suffixes=('_nuc', '_mem'))
        
#         #append to list of dataframes
#         cell_feat_df_list.append(cell)
        
# # if there are cell features, concatenate and save csv
# if not len(cell_feat_df_list) == 0:
#     cell_df = pd.concat(cell_feat_df_list, ignore_index=True, sort=False)
#     cell_df = cell_df.sort_values(by=["hcs_experiment", "root_dir", "plate_id", "well_id", "org_label", "nuc_id", "channel_id"])
#     cell_df.to_csv(join(exp.get_experiment_dir(),"cell_df.csv"), index=False) #saves csv

object_67 B04 day2p5 No columns to parse from file
object_67 B04 day2p5 No columns to parse from file
object_67 B04 day2p5 No columns to parse from file
object_67 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_66 B06 day2p5 No columns to parse from file
object_66 B06 day2p5 No columns to parse from file
object_66 B06 day2p5 No columns to parse from file
object_66 B06 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_57 B03 day1p5 No columns

KeyboardInterrupt: 

In [120]:
#pool together regionprops_org files
exp.only_iterate_over_wells(False)
exp.reset_iterator()

#create list of dataframes from all well and organoids
cell_feat_df_list =[] #length of list is #plates * # wells * # organoids/well

for organoid in exp:
    nucMeasurements = [k for k,v in organoid.measurements.items() if k.startswith('regionprops_nuc')]
    memMeasurements = [k for k,v in organoid.measurements.items() if k.startswith('regionprops_mem')]
    linkMeasurement = [k for k,v in organoid.measurements.items() if k.startswith('linking_nuc_to_mem')]
    
    # if no nuclear or membrane feature extraction in folder, skip. linking file must be unique
    if not nucMeasurements:
        continue
    if not memMeasurements:
        continue
    if not len(linkMeasurement) == 1:
        continue
    
    linking = organoid.get_measurement('linking_nuc_to_mem')
    linking_dict = linking.set_index('mem_id').T.to_dict('index')['nuc_id'] #mem id is key, nuc id is value
        
    for meas_name in nucMeasurements:
        channel = meas_name[-3:]
        
        try:
            nuc = organoid.get_measurement(meas_name)
        except Exception as e:
            print(organoid.organoid_id, organoid.well.well_id, organoid.well.plate.plate_id, e) #usually exception is that no nuc were detected so csv is empty. in this case, skip organoid
            continue 
        
        try:
            mem = organoid.get_measurement('regionprops_mem_' + channel)
        except Exception as e:
            print(organoid.organoid_id, organoid.well.well_id, organoid.well.plate.plate_id, e) #usually exception is that no nuc were detected so csv is empty. in this case, skip organoid
            continue 
            
        #select nuclei that are matched to a membrane
        nuc_filt = nuc.loc[nuc['nuc_id'].isin(linking_dict.values()), :].copy(deep=True)
        nuc_filt= nuc_filt.rename(columns={"segmentation_nuc": "segmentation"})
        #select membranes that are matched to a nucleus
        mem_filt = mem.loc[mem['mem_id'].isin(linking_dict.keys()), :].copy(deep=True)
        mem_filt= mem_filt.rename(columns={"segmentation_mem": "segmentation"})

        
        #add column to nuc frame that includes nuc id
        nuc_filt['nuc_id_linked'] = nuc_filt['nuc_id'].astype(int)
        
        #add column to mem frame that includes matched nuc id
        mem_filt['nuc_id_linked'] = mem_filt['mem_id'].map(linking_dict).astype(int)
        
        
        #load matching cell file
        cell = pd.concat([nuc_filt, mem_filt], ignore_index=True)
        #append to list of dataframes
        cell_feat_df_list.append(cell)
        
# if there are cell features, concatenate and save csv
if not len(cell_feat_df_list) == 0:
    cell_df = pd.concat(cell_feat_df_list, ignore_index=True, sort=False)
    
    cell_df = cell_df.reset_index(drop=True)
    cols_to_move = ["hcs_experiment", "root_dir","plate_id","well_id", "org_label", "nuc_id_linked", "channel_id", "nuc_id", "mem_id"]
    cell_df = cell_df[cols_to_move + [col for col in cell_df.columns if col not in cols_to_move]]
    cell_df = cell_df.sort_values(by=["hcs_experiment", "root_dir", "plate_id", "well_id", "org_label", "nuc_id_linked", "channel_id"])

    cell_df.to_csv(join(exp.get_experiment_dir(),"cell_df.csv"), index=False) #saves csv

object_67 B04 day2p5 No columns to parse from file
object_67 B04 day2p5 No columns to parse from file
object_67 B04 day2p5 No columns to parse from file
object_67 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_26 B04 day2p5 No columns to parse from file
object_66 B06 day2p5 No columns to parse from file
object_66 B06 day2p5 No columns to parse from file
object_66 B06 day2p5 No columns to parse from file
object_66 B06 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_77 B03 day2p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_57 B03 day1p5 No columns to parse from file
object_57 B03 day1p5 No columns

In [115]:
cell_df.head()

Unnamed: 0,hcs_experiment,root_dir,plate_id,well_id,org_label,nuc_id_linked,channel_id,nuc_id,mem_id,object_type,organoid_id,segmentation_nuc,intensity_img,x_pos_vox,y_pos_vox,z_pos_vox,volume_pix,mean_intensity,max_intensity,min_intensity,quartile25,quartile50,quartile75,quartile90,quartile95,quartile99,stdev,skew,kurtosis,segmentation_mem
0,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,day1p5,B03,1,1.0,C01,1.0,,nucleus,object_1,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,46.907661,52.16549,87.931539,63321,714.49576,1581.0,245.0,563.0,702.0,836.0,994.0,1120.7,1330.88,215.456444,0.592231,0.456316,
1,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,day1p5,B03,1,1.0,C01,,2.0,membrane,object_1,,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,47.293158,51.441142,80.86522,111114,491.6391,1581.0,91.0,218.0,451.0,714.0,881.0,999.15,1251.0,290.52713,0.596004,-0.466833,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...
2,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,day1p5,B03,1,1.0,C02,1.0,,nucleus,object_1,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,46.907661,52.16549,87.931539,63321,628.599659,2135.0,281.0,499.0,597.0,719.0,860.0,958.7,1245.94,182.527876,1.432031,3.856099,
3,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,day1p5,B03,1,1.0,C02,,2.0,membrane,object_1,,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,47.293158,51.441142,80.86522,111114,581.864896,2527.0,197.0,443.0,541.0,675.0,832.0,948.0,1294.0,201.412917,1.674863,5.125393,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...
4,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,day1p5,B03,1,1.0,C03,1.0,,nucleus,object_1,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,46.907661,52.16549,87.931539,63321,485.166675,1365.0,212.0,408.0,466.0,543.0,628.0,690.0,826.0,109.588436,1.169953,2.501605,


## Organoid linking over multiplexing rounds

## User input

In [28]:
#note all rounds must have the same plate names!
round_names = ['R0', 'R1', 'R2'] #ex. ['R0', 'R1', 'R2', 'R3'] always start with R0 and must be in sequential order

#must be in same order as round_names above
round_directories = ['/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220528_GCPLEX_redo/20220507GCPLEX_R0/summary.csv',
                     '/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220528_GCPLEX_redo/20220507GCPLEX_R1/summary.csv',
                    '/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220528_GCPLEX_redo/20220507GCPLEX_R2/summary.csv']




##### Load the data

In [29]:
exp_list = []
df_list = []

for i in range(len(round_names)):
    e = Experiment()
    e.load(round_directories[i])
    exp_list.append(e)
    
    path = os.path.join(e.get_experiment_dir(), 'org_df.csv')
    isExist = os.path.exists(path)
    
    if not isExist:
        print("ERROR! Run org_df aggregation on round ", round_names[i])
        continue

    # load organoid dataframe
    df = pd.read_csv(path)
    df_list.append(df)
    
# create dictionary with keys round names, values experiment objects or org_dfs
exps = dict(zip(round_names, exp_list))
dfs = dict(zip(round_names, df_list))

#dfs['R0']

In [30]:
link_names = []
link_list = []

for i in range(len(round_names)):
    if i > 0:
        path = os.path.join(exps['R0'].get_experiment_dir(), ('linking_org_R'+str(i)+'_df.csv'))
        isExist = os.path.exists(path)

        if not isExist:
            print("ERROR! Run organoid linking and linking_org_df aggregation of round ", round_names[i])
            continue

        # load linking dataframe
        df = pd.read_csv(path)
        df["id_R0"] = df["plate_id"] + "_" + df["well_id"] + "_" + df["R0_label"].astype(str)
        df["id_RX"] = df["plate_id"] + "_" + df["well_id"] + "_" + df["RX_label"].astype(str)
        link_names.append(round_names[i])
        link_list.append(df)

# create dictionary with keys round names, values linking dfs
links = dict(zip(link_names, link_list))

#### Link all rounds to round 0 numbering

In [31]:
org_df = pd.DataFrame()
#select organoids that are matched to R1
for i in range(len(round_names)-1):
    #i is 0
    right = dfs[round_names[i+1]] #load next round's df
    #right id is the organoid numbering relative to itself, i.e. RX
    right["id"] = right["plate_id"] + "_" + right["well_id"] + "_" + right["organoid_id"].str.split('_').str[1] #right["org_label"].astype(str)
    link = links[round_names[i+1]] #load linking
    link_dict = link.set_index("id_RX").T.to_dict('index')["id_R0"] #RX id is key, R0 is value
    
    #select right organoids that are matched to an R0 organoid
    #right_filt = right.copy(deep=True) #if do not want filtering, uncomment
    right_filt = right.loc[right["id"].isin(link_dict.keys()), :].copy(deep=True)
    
    right_filt["round_id"] = round_names[i+1] #add column with the round id
    right_filt["organoid_id_linked"] = right_filt["id"].map(link_dict) #Link!
    
    if i >0: #need to also select organoids that are in the org_df (i.e. linked R0-R1 organoids)
        linked_organoids = (org_df["plate_id"]  + "_" + org_df["well_id"] + "_" + org_df["org_label_linked"].astype(str)).unique()
        print(linked_organoids)
        right_filt = right_filt.loc[right_filt["organoid_id_linked"].isin(linked_organoids), :].copy(deep=True)
    
    right_filt["organoid_id_linked"] = "object_" + right_filt["organoid_id_linked"].str.split('_').str[2]
    right_filt["org_label_linked"] = right_filt["organoid_id_linked"].str.split('_').str[1].astype(float).astype('Int64')
    

    if i == 0: #if linking R0-R1, use the R0 df
        left = dfs[round_names[0]]
        left["id"] = left["plate_id"] + "_" + left["well_id"] + "_" + left["organoid_id"].str.split('_').str[1]
        left["round_id"] = round_names[0]
        left["organoid_id_linked"] = left["organoid_id"]
        left["org_label_linked"] = left["organoid_id_linked"].str.split('_').str[1].astype(int)
        #select R0 organoids that are matched to the next round 
        #to toggle, uncomment next line and comment subsequent
        left_filt = left.loc[left["id"].isin(link_dict.values()), :].copy(deep=True)
        #left_filt = left.copy(deep=True)
        
        
#WARNING! DID NOT TEST MORE THAN TWO ROUNDS YET, CHECK THAT THIS WORKS
    else: #if previous round linking already exists, use that df 
        left = org_df.copy(deep=True)
        #update "id" to match R0 numbering
        left["id"] = left["plate_id"] + "_" + left["well_id"] + "_" + left["organoid_id_linked"].str.split('_').str[1]
        #filter organoids that missing matches in previous rounds; 
        #so only have organoids that are linked across all rounds
        left_filt = left.loc[left["id"].isin(link_dict.values()), :].copy(deep=True)
        #if want organoids linked in R0-R1 but missing a link R0-R2, etc, uncomment next line
        #left_filt = left.copy(deep=True)
        

    org_df = pd.concat([left_filt, right_filt], ignore_index=True)
    
    #update sorting
    org_df = org_df.reset_index(drop=True)
    cols_to_move = ["plate_id","well_id", "org_label_linked", "round_id"]
    org_df = org_df[cols_to_move + [col for col in org_df.columns if col not in cols_to_move]]
    org_df = org_df.sort_values(by=cols_to_move)
    #multi-indexing
    #org_df = org_df.set_index(["plate_id","well_id", "org_label_linked", "round_id"]).sort_index()

#Save to R0 directory
org_df.to_csv(join(exps['R0'].get_experiment_dir(),("org_df_linked_" + '-'.join(round_names) + ".csv")), index=False) #saves csv


['day1p5_B03_1' 'day1p5_B03_2' 'day1p5_B03_3' ... 'day4_B06_65'
 'day4_B06_66' 'day4_B06_67']


In [32]:
org_df.head(40)

Unnamed: 0,plate_id,well_id,org_label_linked,round_id,hcs_experiment,root_dir,channel_id,object_type,organoid_id,org_label,segmentation_org,intensity_img,x_pos_pix,y_pos_pix,x_pos_weighted_pix,y_pos_weighted_pix,x_massDisp_pix,y_massDisp_pix,mean_intensityMIP,max_intensity,min_intensity,abs_min,area_pix,eccentricity,majorAxisLength,minorAxisLength,axisRatio,eulerNumber,objectBoxRatio,perimeter,circularity,quartile25,quartile50,quartile75,quartile90,quartile95,quartile99,stdev,skew,kurtosis,nuc_without_mem,nuc_total,mem_without_nuc,mem_total,nuc_count,segmentation_ovr,flag_tile_border,x_pos_pix_global,y_pos_pix_global,area_pix_global,id,organoid_id_linked
0,day1p5,B03,1,R0,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C01,organoid,object_1,1,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,80.284971,72.781879,74.881082,76.24184,-5.403889,3.459961,539.219888,1898.0,116.0,108.0,10187.0,0.516293,123.174352,105.487968,0.856412,1,0.427307,379.019336,0.891114,147.0,521.0,856.0,1081.0,1227.7,1486.42,399.261889,0.512794,-0.894206,0.0,4.0,2.0,6.0,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/220509_092328_...,False,11862.284971,459.781879,10187.0,day1p5_B03_1,object_1
1,day1p5,B03,1,R0,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C02,organoid,object_1,1,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,80.284971,72.781879,77.985616,70.632944,-2.299355,-2.148935,792.685481,2897.0,244.0,155.0,10187.0,0.516293,123.174352,105.487968,0.856412,1,0.427307,379.019336,0.891114,598.0,770.0,947.0,1148.4,1312.7,1744.98,297.225611,1.037814,2.771971,0.0,4.0,2.0,6.0,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/220509_092328_...,False,11862.284971,459.781879,10187.0,day1p5_B03_1,object_1
2,day1p5,B03,1,R0,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C03,organoid,object_1,1,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,80.284971,72.781879,77.740654,70.364672,-2.544317,-2.417207,641.267007,1961.0,199.0,143.0,10187.0,0.516293,123.174352,105.487968,0.856412,1,0.427307,379.019336,0.891114,528.0,642.0,754.0,865.0,944.0,1190.0,194.596256,0.439194,1.577212,0.0,4.0,2.0,6.0,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/220509_092328_...,False,11862.284971,459.781879,10187.0,day1p5_B03_1,object_1
3,day1p5,B03,1,R0,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C04,organoid,object_1,1,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_1/220509_092328...,80.284971,72.781879,76.558488,64.7561,-3.726483,-8.025779,3278.987631,16773.0,187.0,119.0,10187.0,0.516293,123.174352,105.487968,0.856412,1,0.427307,379.019336,0.891114,940.0,1982.0,4690.5,8626.8,10160.5,12409.56,3153.667412,1.294163,0.783935,0.0,4.0,2.0,6.0,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/220509_092328_...,False,11862.284971,459.781879,10187.0,day1p5_B03_1,object_1
4,day1p5,B03,1,R1,20220507GCPLEX_R1,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C01,organoid,object_2,2,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,81.449142,80.320939,78.237271,89.624118,-3.211871,9.30318,1447.239582,5370.0,136.0,112.0,11591.0,0.427287,127.886036,115.623824,0.904116,1,0.433698,405.060967,0.887749,217.0,947.0,2579.0,3256.0,3649.5,4428.4,1305.312483,0.500949,-1.112155,,,,,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/AssayPlate_Gre...,False,11779.449142,557.320939,11591.0,day1p5_B03_1,object_1
5,day1p5,B03,1,R1,20220507GCPLEX_R1,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C02,organoid,object_2,2,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,81.449142,80.320939,80.654571,82.093364,-0.794571,1.772426,388.388146,831.0,152.0,134.0,11591.0,0.427287,127.886036,115.623824,0.904116,1,0.433698,405.060967,0.887749,352.0,412.0,458.0,496.0,517.0,556.0,100.687665,-0.744183,-0.282666,,,,,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/AssayPlate_Gre...,False,11779.449142,557.320939,11591.0,day1p5_B03_1,object_1
6,day1p5,B03,1,R1,20220507GCPLEX_R1,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C03,organoid,object_2,2,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,81.449142,80.320939,80.21302,80.806564,-1.236122,0.485625,199.044086,360.0,118.0,108.0,11591.0,0.427287,127.886036,115.623824,0.904116,1,0.433698,405.060967,0.887749,177.0,202.0,222.0,241.0,251.0,274.0,34.425233,-0.137015,-0.341681,,,,,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/AssayPlate_Gre...,False,11779.449142,557.320939,11591.0,day1p5_B03_1,object_1
7,day1p5,B03,1,R1,20220507GCPLEX_R1,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C04,organoid,object_2,2,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,81.449142,80.320939,78.871375,82.72588,-2.577767,2.404941,247.43042,401.0,118.0,111.0,11591.0,0.427287,127.886036,115.623824,0.904116,1,0.433698,405.060967,0.887749,203.0,266.0,293.0,312.0,323.0,345.0,60.71401,-0.608446,-0.826927,,,,,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/AssayPlate_Gre...,False,11779.449142,557.320939,11591.0,day1p5_B03_1,object_1
12688,day1p5,B03,1,R2,20220507GCPLEX_R2,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C01,organoid,object_2,2,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,75.839043,83.842421,70.197152,88.944785,-5.641891,5.102364,469.378132,1489.0,112.0,106.0,10655.0,0.439405,123.151773,110.625867,0.898289,1,0.416618,381.688384,0.919063,143.0,441.0,742.5,934.0,1054.0,1250.46,334.925587,0.499184,-1.001783,,,,,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/AssayPlate_Gre...,False,11796.839043,609.842421,10655.0,day1p5_B03_2,object_1
12689,day1p5,B03,1,R2,20220507GCPLEX_R2,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C02,organoid,object_2,2,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_2/AssayPlate_Gr...,75.839043,83.842421,74.044387,83.970509,-1.794656,0.128087,262.97175,421.0,141.0,125.0,10655.0,0.439405,123.151773,110.625867,0.898289,1,0.416618,381.688384,0.919063,247.0,271.0,291.0,308.0,319.0,342.46,42.878175,-0.839203,0.386565,,,,,4,day1p5/TIF_OVR_MIP_SEG/obj_v0.3/AssayPlate_Gre...,False,11796.839043,609.842421,10655.0,day1p5_B03_2,object_1


In [33]:
org_df.shape

(19028, 52)

## Nuclear linking over multiplexing rounds with PlatyMatch

In [12]:
#note all rounds must have the same plate names!
round_names = ['R0', 'R1', 'R2'] #ex. ['R0', 'R1', 'R2', 'R3'] always start with R0 and must be in sequential order

# #must be in same order as round_names above
# round_directories = ['/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220525_GCPLEX/20220507GCPLEX_R0/summary.csv',
#                      '/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220525_GCPLEX/20220507GCPLEX_R1/summary.csv',
#                     '/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220525_GCPLEX/20220507GCPLEX_R2/summary.csv']

#must be in same order as round_names above
round_directories = ['/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220528_GCPLEX_redo/20220507GCPLEX_R0/summary.csv',
                     '/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220528_GCPLEX_redo/20220507GCPLEX_R1/summary.csv',
                    '/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220528_GCPLEX_redo/20220507GCPLEX_R2/summary.csv']







##### Load the data

In [13]:
exp_list = []
df_list = []

for i in range(len(round_names)):
    e = Experiment()
    e.load(round_directories[i])
    exp_list.append(e)
    
    path = os.path.join(e.get_experiment_dir(), 'nuc_df.csv')
    isExist = os.path.exists(path)
    
    if not isExist:
        print("ERROR! Run nuc_df aggregation on round ", round_names[i])
        continue

    # load nuc dataframe
    df = pd.read_csv(path)
    df_list.append(df)
    
# create dictionary with keys round names, values experiment objects or nuc_dfs
exps = dict(zip(round_names, exp_list))
dfs = dict(zip(round_names, df_list))

#dfs['R0']

In [14]:
link_names = []
link_list = []

for i in range(len(round_names)):
    #skip R0
    if i > 0:
        path = os.path.join(exps['R0'].get_experiment_dir(), ('linking_nuc_R'+str(i)+'_df.csv'))
        print(path)
        isExist = os.path.exists(path)

        if not isExist:
            print("ERROR! Run organoid linking and linking_nuc_df aggregation of round ", round_names[i])
            continue

        # load linking dataframe
        df = pd.read_csv(path)
        df["id_R0"] = df["plate_id"] + "_" + df["well_id"] + "_" + df["R0_organoid_id"].str.split('_').str[1] + "_" + df["R0_nuc_id"].astype(int).astype(str)
        df["id_RX"] = df["plate_id"] + "_" + df["well_id"] + "_" + df["RX_organoid_id"].str.split('_').str[1] + "_" + df["RX_nuc_id"].astype(int).astype(str)
        link_names.append(round_names[i])
        link_list.append(df)

# create dictionary with keys round names, values linking dfs
links = dict(zip(link_names, link_list))

/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220528_GCPLEX_redo/20220507GCPLEX_R0/linking_nuc_R1_df.csv
/tungstenfs/scratch/gliberal/Users/repinico/Microscopy/Analysis/20220528_GCPLEX_redo/20220507GCPLEX_R0/linking_nuc_R2_df.csv


In [15]:
link_list

[       R0_nuc_id  RX_nuc_id R0_organoid_id RX_organoid_id plate_id well_id  \
 0            1.0        1.0      object_12      object_11   day2p5     B04   
 1            2.0        2.0      object_12      object_11   day2p5     B04   
 2            3.0        5.0      object_12      object_11   day2p5     B04   
 3            4.0        4.0      object_12      object_11   day2p5     B04   
 4            5.0        3.0      object_12      object_11   day2p5     B04   
 ...          ...        ...            ...            ...      ...     ...   
 88704      274.0      301.0       object_4       object_5     day4     B03   
 88705      275.0      305.0       object_4       object_5     day4     B03   
 88706      276.0      303.0       object_4       object_5     day4     B03   
 88707      277.0      306.0       object_4       object_5     day4     B03   
 88708      278.0      307.0       object_4       object_5     day4     B03   
 
                  id_R0            id_RX  
 0     

In [16]:
pd.set_option("display.max_rows", 300)
pd.set_option("display.max_columns", 300)

#### Link all rounds to round 0 numbering

In [17]:
nuc_df = pd.DataFrame()
#nuc_id_linked and organoid_id_linked is always the R0 numbering

for i in range(len(round_names)-1):
    right = dfs[round_names[i+1]] #load next round's df
    right["id"] = right["plate_id"] + "_" + right["well_id"] + "_" + right["organoid_id"].str.split('_').str[1] + "_" + right["nuc_id"].astype(int).astype(str)
    link = links[round_names[i+1]] #load linking
    
    #discard all labels in RX that are not mapped to a single label in R0
    link["duplicated"] = link["id_RX"].duplicated() 
    #if want to keep first duplicate of label, remove keep=False
    link_filtered = link[link['duplicated'] == False]
    print("removed", len(link) - len(link_filtered), "duplicated RX nuclei")

    link_dict = link_filtered.set_index("id_RX").T.to_dict('index')["id_R0"] #RX id is key, R0 is value
    
    #select right organoids that are matched to R0 round
    #right_filt = right.copy(deep=True)
    right_filt = right.loc[right["id"].isin(link_dict.keys()), :].copy(deep=True)
    right_filt["round_id"] = round_names[i+1] #add column with the round id
    right_filt["nuc_id_linked"] = right_filt["id"].map(link_dict) #link!
    # add R0 organoid id and label that the RX nuc is linked to
    right_filt["organoid_id_linked"] = "object_" + right_filt["nuc_id_linked"].str.split('_').str[2]
    right_filt["org_label_linked"] = right_filt["organoid_id_linked"].str.split('_').str[1].astype(float).astype('Int64')
    
    if i >0: #need to also select nuclei that are already in the nuc_df (i.e. linked R0-R1 nuclei)
        linked_nuclei = (nuc_df["plate_id"]  + "_" + nuc_df["well_id"] + "_" + nuc_df["org_label_linked"].astype(int).astype(str) + "_" + nuc_df["nuc_id_linked"].astype(int).astype(str)).unique()
        right_filt = right_filt.loc[right_filt["nuc_id_linked"].isin(linked_nuclei), :].copy(deep=True)
    
    # add R0 nuclear label that the RX nuc is linked to
    right_filt["nuc_id_linked"] = right_filt["nuc_id_linked"].str.split('_').str[3].astype(float).astype('Int64')
    
    if i == 0: #if linking R0-R1, use the R0 df
        left = dfs[round_names[0]]
        left["id"] = left["plate_id"] + "_" + left["well_id"] + "_" + left["organoid_id"].str.split('_').str[1] + "_" + left["nuc_id"].astype(int).astype(str)
        left["round_id"] = round_names[0]
        left["nuc_id_linked"] = left["nuc_id"].astype(int)
        left["organoid_id_linked"] = left["organoid_id"]
        left["org_label_linked"] = left["organoid_id_linked"].str.split('_').str[1].astype(int)
        #select R0 nuclei that are matched to the next round (different from organoid linking default!) 
        #if want to change that all R0 nuc kept, comment next line and uncomment subsequent
        left_filt = left.loc[left["id"].isin(link_dict.values()), :].copy(deep=True)
        #left_filt = left.copy(deep=True)
               
    #WARNING! DID NOT TEST MORE THAN TWO ROUNDS YET, CHECK THAT THIS WORKS
    else: #if previous round linking already exists, use that df 
        left = nuc_df.copy(deep=True)
        #update "id" to match R0 numbering
        left["id"] = left["plate_id"] + "_" + left["well_id"] + "_" + left["organoid_id_linked"].str.split('_').str[1] + "_" + left["nuc_id_linked"].astype(int).astype(str)
        #select R0 nuclei that are matched to the RX round
        #if want to change that all R0 nuc kept, comment next line and uncomment subsequent
        #this means that output nuclei are successfully matched across all rounds
        left_filt = left.loc[left["id"].isin(link_dict.values()), :].copy(deep=True)
        #left_filt = left.copy(deep=True)


    nuc_df = pd.concat([left_filt, right_filt], ignore_index=True)
    
    #update sorting
    nuc_df = nuc_df.reset_index(drop=True)
    cols_to_move = ["plate_id","well_id", "org_label_linked", "nuc_id_linked", "round_id"]
    nuc_df = nuc_df[cols_to_move + [col for col in nuc_df.columns if col not in cols_to_move]]
    nuc_df = nuc_df.sort_values(by=cols_to_move)
    
#Save to R0 directory
nuc_df.to_csv(join(exps['R0'].get_experiment_dir(),("nuc_df_linked_" + '-'.join(round_names) + ".csv")), index=False) #saves csv

#multi-indexing
#nuc_df = nuc_df.set_index(["plate_id","well_id", "org_label_linked", "nuc_id_linked", "round_id"]).sort_index()


removed 0 duplicated RX nuclei
removed 0 duplicated RX nuclei


In [40]:
#link_dict

In [41]:
nuc_df = nuc_df.set_index(["plate_id","well_id", "org_label_linked", "nuc_id_linked", "round_id"]).sort_index()
nuc_df.head(n=40)
#print(left.shape[0], left_filt.shape[0])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,hcs_experiment,root_dir,channel_id,object_type,organoid_id,org_label,nuc_id,segmentation_nuc,intensity_img,x_pos_vox,y_pos_vox,z_pos_vox,volume_pix,mean_intensity,max_intensity,min_intensity,quartile25,quartile50,quartile75,quartile90,quartile95,quartile99,stdev,skew,kurtosis,id,organoid_id_linked
plate_id,well_id,org_label_linked,nuc_id_linked,round_id,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
day1p5,B03,2,1,R0,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C01,nucleus,object_2,2,1,day1p5/obj_v0.3_ROI/B03/object_2/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_2/220509_092328...,74.146953,113.478193,53.220794,61632,645.734862,1602.0,215.0,487.0,630.0,792.0,922.0,999.0,1165.0,206.660259,0.391605,-0.267346,day1p5_B03_2_1,object_2
day1p5,B03,2,1,R0,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C02,nucleus,object_2,2,1,day1p5/obj_v0.3_ROI/B03/object_2/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_2/220509_092328...,74.146953,113.478193,53.220794,61632,841.662091,4519.0,340.0,613.0,764.0,962.0,1230.0,1448.0,2278.13,357.419871,2.821364,14.13552,day1p5_B03_2_1,object_2
day1p5,B03,2,1,R0,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C03,nucleus,object_2,2,1,day1p5/obj_v0.3_ROI/B03/object_2/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_2/220509_092328...,74.146953,113.478193,53.220794,61632,611.913941,2878.0,246.0,468.0,559.0,695.0,851.0,988.0,1513.0,224.740186,2.685296,12.936765,day1p5_B03_2_1,object_2
day1p5,B03,2,1,R0,20220507GCPLEX_R0,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C04,nucleus,object_2,2,1,day1p5/obj_v0.3_ROI/B03/object_2/220509_092328...,day1p5/obj_v0.3_ROI/B03/object_2/220509_092328...,74.146953,113.478193,53.220794,61632,946.839223,5152.0,199.0,617.0,821.0,1134.25,1546.0,1893.0,2794.71,493.319733,1.977734,6.087785,day1p5_B03_2_1,object_2
day1p5,B03,2,1,R1,20220507GCPLEX_R1,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C01,nucleus,object_4,4,3,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,50.365575,63.14114,58.410444,66126,1622.446239,3589.0,396.0,1240.0,1621.0,1967.0,2294.0,2509.0,2943.0,521.373487,0.275353,-0.194106,day1p5_B03_2_1,object_2
day1p5,B03,2,1,R1,20220507GCPLEX_R1,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C02,nucleus,object_4,4,3,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,50.365575,63.14114,58.410444,66126,382.572271,890.0,170.0,341.0,377.0,417.0,464.0,502.0,603.0,69.020262,0.88955,2.747146,day1p5_B03_2_1,object_2
day1p5,B03,2,1,R1,20220507GCPLEX_R1,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C03,nucleus,object_4,4,3,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,50.365575,63.14114,58.410444,66126,199.502949,461.0,104.0,172.0,193.0,218.0,250.0,277.0,339.0,41.034608,1.35396,3.293927,day1p5_B03_2_1,object_2
day1p5,B03,2,1,R1,20220507GCPLEX_R1,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C04,nucleus,object_4,4,3,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,50.365575,63.14114,58.410444,66126,280.484076,507.0,121.0,251.0,282.0,311.0,340.0,356.0,389.0,46.525293,-0.052579,0.10143,day1p5_B03_2_1,object_2
day1p5,B03,2,1,R2,20220507GCPLEX_R2,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C01,nucleus,object_4,4,3,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,78.145512,117.085195,53.645867,59088,559.070065,1247.0,182.0,431.0,545.0,675.0,787.0,855.0,998.0,170.446973,0.425874,-0.118639,day1p5_B03_4_3,object_2
day1p5,B03,2,1,R2,20220507GCPLEX_R2,/tungstenfs/scratch/gliberal/Users/repinico/Yo...,C02,nucleus,object_4,4,3,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,day1p5/obj_v0.3_ROI/B03/object_4/AssayPlate_Gr...,78.145512,117.085195,53.645867,59088,243.28092,456.0,137.0,222.0,241.0,262.0,283.0,297.0,329.05,31.594104,0.556675,1.233884,day1p5_B03_4_3,object_2


In [None]:
#TODO make all nuc_id and cell_id integers

In [None]:
#to add marker labels (ex. DAPI, Lyz, Agr2) make a dictionary with 
#key plate_id + channel_id + round_id
#values markers
#add to pooled dataframes
