In [1]:
import sys
import pathlib

import imageio
import numpy as np
import pandas as pd

sys.path.append(str(pathlib.Path('.').absolute().parent.parent))
print(str(pathlib.Path('.').absolute().parent.parent))

from virtual_stain_flow.datasets.PatchDataset import PatchDataset

/home/weishanli/Waylab


In [2]:
## REPLACE WITH YOUR OWN PATHS
analysis_home_path = pathlib.Path('/home/weishanli/Waylab/ALSF_pilot/ALSF_img2img_prototyping')
sc_features_parquet_path = pathlib.Path(
    '/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pilot_data/preprocessed_profiles_SN0313537/single_cell_profiles'
)

loaddata_csv_path = analysis_home_path \
    / '0.data_analysis_and_preprocessing' / 'loaddata_csvs'

if loaddata_csv_path.exists():
    try:
        loaddata_csv = next(loaddata_csv_path.glob('*.csv'))
    except:
        raise FileNotFoundError("No loaddata csv found")
else:
    raise ValueError("Incorrect loaddata csv path")

loaddata_df = pd.read_csv(loaddata_csv)
# subsample to reduce runtime
loaddata_df = loaddata_df.sample(n=100, random_state=42)

sc_features = pd.DataFrame()
for plate in loaddata_df['Metadata_Plate'].unique():
    sc_features_parquet = sc_features_parquet_path / f'{plate}_sc_normalized.parquet'
    if not sc_features_parquet.exists():
        print(f'{sc_features_parquet} does not exist, skipping...')
        continue 
    else:
        sc_features = pd.concat([
            sc_features, 
            pd.read_parquet(
                sc_features_parquet,
                columns=['Metadata_Plate', 'Metadata_Well', 'Metadata_Site', 'Metadata_Cells_Location_Center_X', 'Metadata_Cells_Location_Center_Y']
            )
        ])

PATCH_SIZE = 256

channel_names = [
    "OrigBrightfield",
    "OrigDNA",
    "OrigER",
    "OrigMito",
    "OrigRNA",
    "OrigAGP",
]
input_channel_name = "OrigBrightfield"
target_channel_names = [ch for ch in channel_names if ch != input_channel_name]

pds = PatchDataset(
    _loaddata_csv=loaddata_df,
    _sc_feature=sc_features,
    _input_channel_keys=None,
    _target_channel_keys=None,
    _input_transform=None,
    _target_transform=None,
    patch_size=PATCH_SIZE,
    verbose=True,
    patch_generation_method="random_cell",
    patch_generation_random_seed=42
)

2025-02-20 00:15:46,794 - DEBUG - Dataframe supplied for loaddata_csv, using as is
2025-02-20 00:15:46,794 - DEBUG - Dataframe supplied for sc_feature, using as is
2025-02-20 00:15:46,795 - DEBUG - X and Y columns Metadata_Cells_Location_Center_X, Metadata_Cells_Location_Center_Y detected in sc_feature dataframe, using as the coordinates for cell centers
2025-02-20 00:15:46,795 - DEBUG - Both loaddata_csv and sc_feature supplied, inferring merge fields to associate the two dataframes
2025-02-20 00:15:46,795 - DEBUG - Merge fields inferred: ['Metadata_Site', 'Metadata_Plate', 'Metadata_Well']
2025-02-20 00:15:46,795 - DEBUG - Dataframe supplied for sc_feature, using as is
2025-02-20 00:15:46,816 - DEBUG - Inferring channel keys from loaddata csv
2025-02-20 00:15:46,817 - DEBUG - Channel keys: {'OrigBrightfield', 'OrigAGP', 'OrigDNA', 'OrigRNA', 'OrigMito', 'OrigER'} inferred from loaddata csv
2025-02-20 00:15:46,817 - DEBUG - Setting input channel(s) ...
2025-02-20 00:15:46,817 - DEBUG 

2025-02-20 00:15:46,848 - DEBUG - Image size inferred: 1080 for all images to force redetect image sizes for each view/site set consistent_img_size=False
2025-02-20 00:15:47,226 - DEBUG - Generated 461 patches for 93 site/view


OrigBrightfield/r06c12f02p01-ch1sk1fk1fl1_8_680.tiff

In [6]:
loaddata_df

Unnamed: 0,FileName_OrigBrightfield,PathName_OrigBrightfield,FileName_OrigER,PathName_OrigER,FileName_OrigAGP,PathName_OrigAGP,FileName_OrigMito,PathName_OrigMito,FileName_OrigDNA,PathName_OrigDNA,...,Metadata_AbsPositionZ,Metadata_ChannelID,Metadata_Col,Metadata_FieldID,Metadata_PlaneID,Metadata_PositionX,Metadata_PositionY,Metadata_PositionZ,Metadata_Row,Metadata_Reimaged
2079,r06c22f01p01-ch1sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c22f01p01-ch2sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c22f01p01-ch3sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c22f01p01-ch4sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c22f01p01-ch5sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,...,0.134358,6,22,1,1,0.000000,0.000000,-0.000006,6,False
668,r05c09f03p01-ch1sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r05c09f03p01-ch2sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r05c09f03p01-ch3sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r05c09f03p01-ch4sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r05c09f03p01-ch5sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,...,0.134405,6,9,3,1,0.000000,0.000646,-0.000006,5,False
2073,r05c22f04p01-ch1sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r05c22f04p01-ch2sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r05c22f04p01-ch3sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r05c22f04p01-ch4sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r05c22f04p01-ch5sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,...,0.134366,6,22,4,1,0.000646,0.000646,-0.000006,5,False
1113,r06c13f07p01-ch1sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c13f07p01-ch2sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c13f07p01-ch3sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c13f07p01-ch4sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c13f07p01-ch5sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,...,0.134347,6,13,7,1,-0.000646,-0.000646,-0.000006,6,False
788,r06c10f06p01-ch1sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c10f06p01-ch2sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c10f06p01-ch3sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c10f06p01-ch4sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r06c10f06p01-ch5sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,...,0.134381,6,10,6,1,-0.000646,0.000000,-0.000006,6,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1730,r03c19f03p01-ch1sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r03c19f03p01-ch2sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r03c19f03p01-ch4sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r03c19f03p01-ch3sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r03c19f03p01-ch6sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,...,0.134366,6,19,3,1,0.000000,0.000646,-0.000004,3,True
196,r12c04f08p01-ch1sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r12c04f08p01-ch2sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r12c04f08p01-ch4sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r12c04f08p01-ch3sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r12c04f08p01-ch6sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,...,0.134491,6,4,8,1,0.000000,-0.000646,-0.000004,12,True
367,r07c06f08p01-ch1sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r07c06f08p01-ch2sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r07c06f08p01-ch3sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r07c06f08p01-ch4sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r07c06f08p01-ch5sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,...,0.134447,6,6,8,1,0.000000,-0.000646,-0.000006,7,False
650,r03c09f03p01-ch1sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r03c09f03p01-ch2sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r03c09f03p01-ch3sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r03c09f03p01-ch4sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,r03c09f03p01-ch5sk1fk1fl1.tiff,/home/weishanli/Waylab/ALSF_pilot/data/ALSF_pi...,...,0.134428,6,9,3,1,0.000000,0.000646,-0.000006,3,False


In [4]:
EXAMPLE_PATCH_DATA_EXPORT_PATH = pathlib.Path('.').absolute().parent.parent / 'example_patch_data'
EXAMPLE_PATCH_DATA_EXPORT_PATH.mkdir(exist_ok=True)
INPUT_EXPORT_PATH = EXAMPLE_PATCH_DATA_EXPORT_PATH / input_channel_name
INPUT_EXPORT_PATH.mkdir(exist_ok=True)

In [5]:
for j, channel_name in enumerate(target_channel_names):

    pds.set_input_channel_keys([input_channel_name])
    pds.set_target_channel_keys([channel_name])

    CHANNEL_EXPORT_PATH = EXAMPLE_PATCH_DATA_EXPORT_PATH / channel_name
    CHANNEL_EXPORT_PATH.mkdir(exist_ok=True)

    for i in range(len(pds)):
        input, target = pds[i]
        input_name = pds.input_names
        target_name = pds.target_names
        patch_coord = pds.patch_coords

        if j == 0:
            imageio.imwrite(
                INPUT_EXPORT_PATH / f'{input_name[0].stem}_{patch_coord[0]}_{patch_coord[1]}.tiff', 
                input[0].numpy().astype(np.uint16))

        imageio.imwrite(
            CHANNEL_EXPORT_PATH / f'{target_name[0].stem}_{patch_coord[0]}_{patch_coord[1]}.tiff', 
            target[0].numpy().astype(np.uint16))

2025-02-20 00:15:47,252 - DEBUG - Set input channel(s) as ['OrigBrightfield']
2025-02-20 00:15:47,252 - DEBUG - Set target channel(s) as ['OrigDNA']
2025-02-20 00:15:47,475 - DEBUG - Set input channel(s) as ['OrigBrightfield']
2025-02-20 00:15:47,475 - DEBUG - Set target channel(s) as ['OrigER']
2025-02-20 00:15:47,676 - DEBUG - Set input channel(s) as ['OrigBrightfield']
2025-02-20 00:15:47,677 - DEBUG - Set target channel(s) as ['OrigMito']
2025-02-20 00:15:47,850 - DEBUG - Set input channel(s) as ['OrigBrightfield']
2025-02-20 00:15:47,850 - DEBUG - Set target channel(s) as ['OrigRNA']
2025-02-20 00:15:48,034 - DEBUG - Set input channel(s) as ['OrigBrightfield']
2025-02-20 00:15:48,035 - DEBUG - Set target channel(s) as ['OrigAGP']
