In [54]:
%load_ext autoreload
%autoreload 2
%matplotlib ipympl

import mpl_interactions.ipyplot as iplt
import matplotlib.pyplot as plt
from bsccm import BSCCM
from skimage import transform
import zarr
import numpy as np
from tqdm import tqdm
from numcodecs import Blosc
from skimage import transform

data_root = '/home/hpinkard_waller/2tb_ssd/BSCCM-coherent/'
export_root = '/home/hpinkard_waller/2tb_ssd/BSCCM-coherent-tiny/'
num_cells = 100
COHERENT = True

# data_root = '/home/hpinkard_waller/2tb_ssd/BSCCM/'
# export_root = '/home/hpinkard_waller/2tb_ssd/BSCCM-tiny/'
# num_cells = 1000
# COHERENT = False

bsccm = BSCCM(data_root)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Opening BSCCM (this may take a few seconds)...
Opened BSCCM-coherent


In [55]:
new_file = zarr.open(export_root + 'BSCCM_images.zarr', mode='w')

random_subset = np.random.choice(np.arange(bsccm.index_dataframe.index.size), size=num_cells, replace=False)
random_subset = np.sort(random_subset)

#resave new surface markers
bsccm.index_dataframe.iloc[random_subset].to_csv(export_root + 'BSCCM_index.csv', index=True)
bsccm.surface_marker_dataframe.iloc[random_subset].to_csv(export_root + 'BSCCM_surface_markers.csv', index=True)


new_dframe = bsccm.index_dataframe.iloc[random_subset]
indices = new_dframe.index.to_numpy()
for i in tqdm(indices):

    entry = new_dframe.loc[i]  
    do_histology = not COHERENT and entry['has_matched_histology_cell']


    base_path = entry['data_path']
    fluor_dest = base_path + '/fluor/cell_{}'.format(i)
    lf_dest = base_path + '/led_array/cell_{}'.format(i)
    dpc_dest = base_path + '/dpc/cell_{}'.format(i)
    hist_dest = base_path + '/histology/cell_{}'.format(i)

    fluor_data = np.array([bsccm.read_image(i, contrast_type='fluor', channel=c) for c in bsccm.fluor_channel_names])
    led_array_data = np.array([bsccm.read_image(i, contrast_type='led_array', channel=c) for c in bsccm.led_array_channel_names])
    dpc_data = bsccm.read_image(i, contrast_type='dpc')
    if do_histology:
        histology_data = bsccm.read_image(i, contrast_type='histology', convert_histology_rgb32=False)

    new_file.create_dataset(name=fluor_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                            data=fluor_data, chunks=(1, fluor_data.shape[-2], fluor_data.shape[-1]))
    new_file.create_dataset(name=lf_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                            data=led_array_data, chunks=(1, led_array_data.shape[-2], led_array_data.shape[-1]))
    new_file.create_dataset(name=dpc_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                            data=dpc_data, chunks=(dpc_data.shape[-2], dpc_data.shape[-1]))


    if do_histology:
        new_file.create_dataset(name=hist_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                            data=histology_data, chunks=None)
        

100%|██████████| 100/100 [03:49<00:00,  2.29s/it]


## remove extra fluoresence stuff from BSCCMNIST

In [25]:
import pandas as pd
data_root = '/home/hpinkard_waller/2tb_ssd/BSCCMNIST/'
fluor_dataframe = pd.read_csv(data_root + 'BSCCM_surface_markers.csv', index_col='global_index')

In [35]:
col_to_drop = [col for col in list(fluor_dataframe.columns) if col not in 

['Fluor_690-_shading_corrected',   
'Fluor_627-673_shading_corrected', 
'Fluor_585-625_shading_corrected',             
'Fluor_550-570_shading_corrected', 
'Fluor_500-550_shading_corrected', 
'Fluor_426-446_shading_corrected',
'CD123/HLA-DR/CD14_full_model_unmixed',
'CD3/CD19/CD56_full_model_unmixed', 
'CD45_full_model_unmixed',
'CD16_full_model_unmixed',
'CD45_single_antibody_model_unmixed',
'autofluor_single_antibody_model_unmixed',
'CD123_single_antibody_model_unmixed',
'CD19_single_antibody_model_unmixed',
'CD56_single_antibody_model_unmixed',
'CD14_single_antibody_model_unmixed',
'CD16_single_antibody_model_unmixed',
'HLA-DR_single_antibody_model_unmixed',
'CD3_single_antibody_model_unmixed',]]


for col in col_to_drop:
    fluor_dataframe.drop(col, axis=1, inplace=True)

In [43]:
fluor_dataframe.to_csv(data_root + 'BSCCM_surface_markers.csv', index=True)