In [None]:
%load_ext line_profiler

In [None]:
from segmentation.util.utils_pipeline import segment_video_using_config_2d
from DLC_for_WBFM.utils.projects.utils_project import load_config, safe_cd
from DLC_for_WBFM.utils.video_and_data_conversion.import_video_as_array import get_single_volume
from pathlib import Path
import zarr
import os

In [None]:
project_fname = r'Y:\shared_projects\wbfm\dlc_stacks\Charlie-test-2021_04_26\project_config.yaml'
project_cfg = load_config(project_fname)
segment_fname = project_cfg['subfolder_configs']['segmentation']
project_dir = Path(project_fname).parent
with safe_cd(project_dir):
    segment_cfg = load_config(segment_fname)

this_config = segment_cfg.copy()
this_config['dataset_params'] = project_cfg['dataset_params'].copy()

# Hypothesis 1: early vs. late frames

In [None]:
def benchmark_get_volumes(fname, start_volume, num_frames):
    for i in range(start_volume, start_volume + num_frames):
        _ = get_single_volume(fname, i, num_slices=33)

In [None]:
# fname = project_cfg['red_bigtiff_fname']
# fname = 'Y:/shared_projects/ForCharlie/27082020_freelyimmobilized_trial2_ome_tiff_1601990420/ZIM2051_trial_21_HEAD_mcherry_FULL.ome.tiff'
# LOCAL
fname = r'D:\freely_immobilized\ZIM2051_trial_21_HEAD_mcherry_FULL_bigtiff.btf'

%lprun -f benchmark_get_volumes benchmark_get_volumes(fname, 100, 110)

In [None]:
import tifffile
with tifffile.TiffFile(fname) as f:
    print(f"Number of series in this file: {len(f.series)}")

# Hypothesis 2: single reader object

In [None]:
fname = r'D:\freely_immobilized\ZIM2051_trial_21_HEAD_mcherry_FULL_bigtiff.btf'
import tifffile

def benchmark_get_volumes2(fname, start_volume, num_frames, num_slices=33):
    with tifffile.TiffFile(fname) as f:
        for which_vol in range(start_volume, start_volume + num_frames):
#             start_ind = num_slices*which_vol
#             key = range(start_ind, start_ind+num_slices)
#             tmp = [f.pages[i].asarray() for i in key]
#             dat = (f.pages[start_ind:start_ind+num_slices]).asarray()
            get_single_volume(f, which_vol, num_slices=33)
    return None

In [None]:
%lprun -f benchmark_get_volumes2 benchmark_get_volumes2(fname, 500, 510)

# Related: trying to write better bigtiffs

In [None]:
%load_ext autoreload
%autoreload 2
from video_conversions.bigtiff.ometiff2bigtiff import ometiff2bigtiff

In [None]:
fname = project_cfg['red_bigtiff_fname']

In [None]:
ometiff2bigtiff(fname, actually_write=True, num_slices=33)

In [None]:
# Re-benchmark

fname = 'Y:/shared_projects/ForCharlie/27082020_freelyimmobilized_trial2_ome_tiff_1601990420/ZIM2051_trial_21_HEAD_gcamp_FULL_bigtiff.btf'
%lprun -f benchmark_get_volumes benchmark_get_volumes(fname, 100, 110)

In [None]:
import tifffile

In [None]:
fname = 'Y:/shared_projects/ForCharlie/27082020_freelyimmobilized_trial2_ome_tiff_1601990420/ZIM2051_trial_21_HEAD_gcamp_FULL_bigtiff.btf'

with tifffile.TiffFile(fname) as f:
    print(len(f.series))

In [None]:
# OLD
fname = 'Y:/shared_projects/ForCharlie/27082020_freelyimmobilized_trial2_ome_tiff_1601990420/ZIM2051_trial_21_HEAD_mcherry_FULL.ome.tiff'

with tifffile.TiffFile(fname) as f:
    print(len(f.series))

# Write it as a zarr

In [54]:
fname_in = r'D:\freely_immobilized\ZIM2051_trial_21_HEAD_mcherry_FULL_bigtiff.btf'
# fname_out = r'D:\freely_immobilized\mcherry_test.zarr'
import tifffile


import zarr
import dbm
import numpy as np
import os
from DLC_for_WBFM.utils.video_and_data_conversion.import_video_as_array import get_single_volume
from numcodecs import Blosc

## Write several different formats

In [22]:
# Get some real data

num_frames = 50
with tifffile.TiffFile(fname_in) as f:
    dat = [get_single_volume(f, i, 33) for i in range(num_frames)]

In [37]:
dat = np.array(dat)
dat.shape

(50, 33, 133, 333)

In [42]:
dir_out = r'D:\zarr_tests'
chunks=(1, 33, 133, 333)

In [48]:

fname_out = os.path.join(dir_out, 'tiff_test.btf')
tifffile.imsave(fname_out, dat)

In [95]:
fname_out = os.path.join(dir_out, 'zip_test.zip')
zarr.save_array(fname_out, dat, chunks=chunks)
# with zarr.ZipStore(fname_out, mode='w') as store:
#     zarr.save_array(fname_out, dat, chunks=chunks)

In [100]:
fname_out = os.path.join(dir_out, 'zip_compressor_test.zip')
compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE)
zarr.save_array(fname_out, dat, chunks=chunks, compressor=compressor)

In [44]:
fname_out = os.path.join(dir_out, 'default_test.zarr')
zarr.save_array(fname_out, dat, chunks=chunks)

In [57]:

fname_out = os.path.join(dir_out, 'default_compressor_test.zarr')
compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE)
zarr.save_array(fname_out, dat, chunks=chunks, compressor=compressor)

In [104]:
fname_out = os.path.join(dir_out, 'dbm_test.db')
with zarr.DBMStore(fname_out, open=dbm.open) as store:

    z = zarr.zeros((num_frames, 33, 133, 333), chunks=chunks, store=store, overwrite=True)

    for i in range(num_frames):
        z[i,...] = dat[i,...]

In [105]:
fname_out = os.path.join(dir_out, 'dbm_compressor_test.db')
compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE)
with zarr.DBMStore(fname_out, open=dbm.open) as store:
#     store = zarr.storage.DBMStore(fname_out)

    z = zarr.zeros((num_frames, 33, 133, 333), chunks=chunks, store=store, compressor=compressor, overwrite=True)

    for i in range(num_frames):
        z[i,...] = dat[i,...]

In [52]:
fname_out = os.path.join(dir_out, 'sql_test.sqldb')
store = zarr.SQLiteStore(fname_out)

z = zarr.zeros((num_frames, 33, 133, 333), chunks=chunks, store=store, overwrite=True)
for i in range(num_frames):
    z[i,...] = dat[i,...]

store.close()

In [73]:
fname_out = os.path.join(dir_out, 'sql_compressor_test.sqldb')
store = zarr.SQLiteStore(fname_out)

compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE)
z = zarr.zeros((num_frames, 33, 133, 333), chunks=chunks, store=store, compressor=compressor, overwrite=True)
for i in range(num_frames):
    z[i,...] = dat[i,...]

store.close()

## Test full-file read speeds

In [62]:
%%timeit

fname_out = os.path.join(dir_out, 'tiff_test.btf')
tmp = np.array(tifffile.imread(fname_out))

124 ms ± 3.61 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [67]:
%%timeit

fname_out = os.path.join(dir_out, 'default_test.zarr')
tmp = np.array(zarr.open(fname_out))

166 ms ± 3.99 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [68]:
%%timeit

fname_out = os.path.join(dir_out, 'default_compressor_test.zarr')
tmp = np.array(zarr.open(fname_out))

195 ms ± 4.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [99]:
%%timeit

fname_out = os.path.join(dir_out, 'zip_test.zip')
tmp = np.array(zarr.load(fname_out))

235 ms ± 2.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [101]:
%%timeit

fname_out = os.path.join(dir_out, 'zip_compressor_test.zip')
tmp = np.array(zarr.load(fname_out))

252 ms ± 3.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [148]:
%%timeit

fname_out = os.path.join(dir_out, 'dbm_test.db')
store = zarr.DBMStore(fname_out)
tmp = np.array(zarr.open_array(store=store, mode='r'))
store.close()

872 ms ± 13.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [147]:
%%timeit

fname_out = os.path.join(dir_out, 'dbm_compressor_test.db')
store = zarr.DBMStore(fname_out)
tmp = np.array(zarr.open_array(store=store, mode='r'))
store.close()

804 ms ± 14.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [144]:
%%timeit

fname_out = os.path.join(dir_out, 'sql_test.sqldb')
store = zarr.SQLiteStore(fname_out)
tmp = np.array(zarr.open_array(store=store, mode='r'))
store.close()

1.09 s ± 10.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [146]:
%%timeit

fname_out = os.path.join(dir_out, 'sql_compressor_test.sqldb')
store = zarr.SQLiteStore(fname_out)
tmp = np.array(zarr.open_array(store=store, mode='r'))
store.close()

888 ms ± 13 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Test just last frame

In [149]:
%%timeit

fname_out = os.path.join(dir_out, 'tiff_test.btf')
tmp = get_single_volume(fname_out, 49, 33)
# tmp = np.array(tifffile.imread(fname_out))

17.4 ms ± 1.07 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [150]:
%%timeit

fname_out = os.path.join(dir_out, 'default_test.zarr')
tmp = np.array(zarr.open(fname_out)[49,...])

4.6 ms ± 30.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [151]:
%%timeit

fname_out = os.path.join(dir_out, 'default_compressor_test.zarr')
tmp = np.array(zarr.open(fname_out)[49,...])

5.06 ms ± 28.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [152]:
%%timeit

fname_out = os.path.join(dir_out, 'zip_test.zip')
tmp = np.array(zarr.load(fname_out)[49,...])

190 ms ± 729 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [153]:
%%timeit

fname_out = os.path.join(dir_out, 'zip_compressor_test.zip')
tmp = np.array(zarr.load(fname_out)[49,...])

207 ms ± 4.28 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [154]:
%%timeit

fname_out = os.path.join(dir_out, 'dbm_test.db')
store = zarr.DBMStore(fname_out)
tmp = np.array(zarr.open_array(store=store, mode='r')[49,...])
store.close()

28.8 ms ± 704 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [155]:
%%timeit

fname_out = os.path.join(dir_out, 'dbm_compressor_test.db')
store = zarr.DBMStore(fname_out)
tmp = np.array(zarr.open_array(store=store, mode='r')[49,...])
store.close()

25.6 ms ± 604 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [156]:
%%timeit

fname_out = os.path.join(dir_out, 'sql_test.sqldb')
store = zarr.SQLiteStore(fname_out)
tmp = np.array(zarr.open_array(store=store, mode='r')[49,...])
store.close()

32.6 ms ± 716 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [157]:
%%timeit

fname_out = os.path.join(dir_out, 'sql_compressor_test.sqldb')
store = zarr.SQLiteStore(fname_out)
tmp = np.array(zarr.open_array(store=store, mode='r')[49,...])
store.close()

27.7 ms ± 526 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Don't test reopening the file

In [161]:
fname_out = os.path.join(dir_out, 'tiff_test.btf')
with tifffile.TiffFile(fname_out) as f:
    %timeit tmp = get_single_volume(f, 49, 33)

9.16 ms ± 247 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [173]:
fname_out = os.path.join(dir_out, 'default_test.zarr')
z = zarr.open(fname_out)
%timeit tmp = np.array(z[49,...])

4.11 ms ± 24 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [172]:
fname_out = os.path.join(dir_out, 'default_compressor_test.zarr')
z = zarr.open(fname_out)
%timeit tmp = np.array(z[49,...])

4.83 ms ± 38 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [168]:
fname_out = os.path.join(dir_out, 'dbm_test.db')
store = zarr.DBMStore(fname_out)
z = zarr.open_array(store=store, mode='r')
%timeit tmp = np.array(z[49,...])
store.close()

24.6 ms ± 587 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [169]:
fname_out = os.path.join(dir_out, 'dbm_compressor_test.db')
store = zarr.DBMStore(fname_out)
z = zarr.open_array(store=store, mode='r')
%timeit tmp = np.array(z[49,...])
store.close()

27.7 ms ± 652 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [166]:

fname_out = os.path.join(dir_out, 'sql_compressor_test.sqldb')
store = zarr.SQLiteStore(fname_out)
z = zarr.open_array(store=store, mode='r')
%timeit tmp = np.array(z[49,...])
store.close()

25.6 ms ± 491 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [167]:
fname_out = os.path.join(dir_out, 'sql_compressor_test.sqldb')
store = zarr.SQLiteStore(fname_out)
z = zarr.open_array(store=store, mode='r')
%timeit tmp = np.array(z[49,...])
store.close()

25.7 ms ± 760 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Look at the actually loaded variables

In [142]:
fname_out = os.path.join(dir_out, 'sql_test.sqldb')
store = zarr.SQLiteStore(fname_out)
z = zarr.open_array(store=store, mode='r')
store.close()

array([[[[ 99., 103.,  99., ..., 106., 105.,  96.],
         [103.,  96., 100., ..., 105.,  98., 101.],
         [101.,  96.,  99., ...,  98.,  94.,  99.],
         ...,
         [ 98.,  94., 100., ..., 110., 108., 109.],
         [ 99.,  95., 102., ..., 100., 109., 101.],
         [100., 101., 100., ..., 103., 108., 106.]],

        [[100.,  99.,  93., ...,  98.,  93., 100.],
         [ 99., 100., 101., ..., 104., 104.,  97.],
         [ 95.,  97., 101., ...,  98.,  95.,  94.],
         ...,
         [ 99.,  98.,  99., ..., 109., 113., 114.],
         [ 95., 100.,  98., ..., 100., 107.,  98.],
         [ 64.,  98., 100., ..., 105., 108., 112.]],

        [[ 98., 100., 100., ..., 109.,  96.,  94.],
         [102.,  96., 103., ..., 102., 100., 102.],
         [101.,  97., 100., ..., 100., 100.,  99.],
         ...,
         [ 99.,  94.,  94., ..., 103.,  92.,  98.],
         [103., 100., 100., ...,  98., 114., 103.],
         [ 97., 101., 102., ..., 101., 111., 106.]],

        ...,

  

In [133]:
np.array(z)

array('D:\\zarr_tests\\sql_test.sqldb', dtype='<U28')

In [17]:
# from dbm.gnu import open as dbm_open
with zarr.DBMStore(fname_out, open=dbm.open) as store:
    z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True)