# Crop from 128 x 128 images to 96 x 96 images
Also make an MNIST sized version

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib ipympl

import mpl_interactions.ipyplot as iplt
import matplotlib.pyplot as plt
from bsccm import BSCCM
from skimage import transform
import zarr
import numpy as np
from tqdm import tqdm
from numcodecs import Blosc
from skimage import transform

data_root = '/home/hpinkard_waller/2tb_ssd/BSCCM/'
data_root_mnist = '/home/hpinkard_waller/2tb_ssd/BSCCMNIST/'
bsccm = BSCCM(data_root)

In [None]:
new_file = zarr.open(data_root + 'BSCCM_images_cropped.zarr', mode='w')
mnist_file = zarr.open(data_root_mnist + 'BSCCM_images.zarr', mode='w')
COHERENT = False

crop_off = 16
crop_dim = 128 - 2*crop_off

for i in tqdm(range(bsccm.index_dataframe.global_index.size)):

    entry = bsccm.index_dataframe.iloc[i]  
    do_histology = not COHERENT and entry['has_matched_histology_cell']

    base_path = entry['data_path']
    fluor_dest = base_path + '/fluor/cell_{}'.format(i)
    lf_dest = base_path + '/led_array/cell_{}'.format(i)
    dpc_dest = base_path + '/dpc/cell_{}'.format(i)
    hist_dest = base_path + '/histology/cell_{}'.format(i)

    fluor_data = np.array([bsccm.read_image(i, contrast_type='fluor', channel=c)[..., crop_off:-crop_off, crop_off:-crop_off] for c in bsccm.fluor_channel_names])
    led_array_data = np.array([bsccm.read_image(i, contrast_type='led_array', channel=c)[..., crop_off:-crop_off, crop_off:-crop_off] for c in bsccm.led_array_channel_names])
    dpc_data = bsccm.read_image(i, contrast_type='dpc') [..., crop_off:-crop_off, crop_off:-crop_off]
    if do_histology:
        histology_data = bsccm.read_image(i, contrast_type='histology', convert_histology_rgb32=False) [75:-75, 75:-75]

    new_file.create_dataset(name=fluor_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                            data=fluor_data, chunks=(1, crop_dim, crop_dim))
    new_file.create_dataset(name=lf_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                            data=led_array_data, chunks=(1, crop_dim, crop_dim))
    new_file.create_dataset(name=dpc_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                            data=dpc_data, chunks=(crop_dim, crop_dim))


    if do_histology:
        new_file.create_dataset(name=hist_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                            data=histology_data, chunks=None)
        
    #MNIST Version
    if not COHERENT:
        # Downsampled versions for mnist
        fluor_data_mnist = np.array([(transform.resize(img / 16, output_shape=(28, 28), anti_aliasing=True)).astype(np.uint8) for img in fluor_data])
        led_array_data_mnist = np.array([(transform.resize(img / 16, output_shape=(28, 28), anti_aliasing=True)).astype(np.uint8) for img in led_array_data])
        dpc_data_mnist = transform.resize(dpc_data, output_shape=(28, 28), anti_aliasing=True)

        mnist_file.create_dataset(name=fluor_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                            data=fluor_data_mnist, chunks=(1, crop_dim, crop_dim))
        mnist_file.create_dataset(name=lf_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                                data=led_array_data_mnist.astype(np.uint16), chunks=(1, crop_dim, crop_dim))
        mnist_file.create_dataset(name=dpc_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                                data=dpc_data_mnist, chunks=(crop_dim, crop_dim))


        if do_histology:
            histology_data_mnist = transform.resize(histology_data / 16, output_shape=(28, 28, 3), anti_aliasing=True).astype(np.uint8)
            mnist_file.create_dataset(name=hist_dest, compressor=Blosc(cname='zstd', clevel=9, shuffle=Blosc.SHUFFLE),
                                data=histology_data_mnist, chunks=None)        


  4%|▎         | 15371/412941 [1:01:51<25:51:00,  4.27it/s]