## Ideas:

### Attributes

- illumination (one or two) - how does Flamingo converter check these attributes?
- channels
- time points
- positions
- frame interval - diff between last and first time stamp.
- magnification - probably just parse metadata file.

### Methods

- Save max projection
    - Memory converving option: open/save individual frames in dir
    - Full stack option: concatenate MPs into stack and save stack.
- Downsample 3D
    - Memory converving option: open/save individual 3D frames in dir
    - Full stack option: concatenate MPs into stack and save stack.
    - View in Napari True/False
- Combined Max projection and Downsample 3D
    - Computes both at once as a nice saver. Will use slightly more memory but save a lot of time. 
    - 

### Thoughts

- it would probably be faster to 1) max project each side, then max project those together and 2) downsample 3d volumes before merging instead of the current workflow which is to merge the full volume before max projecting and merging
- measure the time of each step (reading, processing, writing); see if it's worth reading in a separate thread from processing. Might need both multithreading and multiiprocessing I suppose. 

go through the files and make a list of files for each region

go through the files in each region and 

In [14]:
import os
import sys
import numpy as np
from tqdm import tqdm
from pathlib import Path
from datetime import datetime
from tifffile import imread as tiff_read
from tifffile import imwrite as tiff_write
from dask_image.imread import imread as dask_read
#from dask.array.image import imread as dask_read 
import dask.array as da

class Kkpo:

    def __init__(self, file_path=None):
        if file_path == None:
            print('*****'*9)
            print("I can't make a Kakapo without a file path!")
            print('*****'*9)
            sys.exit()
        self.file_path = Path(file_path)
        self.files = [file for file in os.listdir(self.file_path) if all([file.endswith('.tif')]) and not any([file.startswith('.'),
                                                                                                              'MP' in file,
                                                                                                              'max' in file])]

        self.sample_names = [file.split('_')[0] for file in self.files]
        self.timepoint_names = [file.split('_')[1] for file in self.files]
        self.view_names = [file.split('_')[2] for file in self.files]
        self.region_names = [file.split('_')[3] for file in self.files]
        self.tileX_names = [file.split('_')[4] for file in self.files]
        self.tileY_names = [file.split('_')[5] for file in self.files]
        self.channel_names = [file.split('_')[6] for file in self.files]
        self.illum_names = [file.split('_')[7] for file in self.files]
        self.camera_names = [file.split('_')[8] for file in self.files]
        self.planes = [file.split('_')[9] for file in self.files]

        print(f'unique sample names are {np.unique(self.sample_names)}')
        print(f'unique timepoint names are {np.unique(self.timepoint_names)}')
        print(f'unique view names are {np.unique(self.view_names)}')
        print(f'unique region names are {np.unique(self.region_names)}')
        print(f'unique tileX names are {np.unique(self.tileX_names)}')
        print(f'unique tileY names are {np.unique(self.tileY_names)}')
        print(f'unique channel names are {np.unique(self.channel_names)}')
        print(f'unique illum names are {np.unique(self.illum_names)}')
        print(f'unique camera names are {np.unique(self.camera_names)}')
        print(f'unique planes are {np.unique(self.planes)}')

        self.num_samples = len(np.unique(self.sample_names))
        self.num_timepoints = len(np.unique(self.timepoint_names))
        self.num_views = len(np.unique(self.view_names))
        self.num_regions = len(np.unique(self.region_names))
        self.num_tilesX = len(np.unique(self.tileX_names))
        self.num_tilesY = len(np.unique(self.tileY_names))
        self.num_channels = len(np.unique(self.channel_names))
        self.num_illum = len(np.unique(self.illum_names))
        self.num_cameras = len(np.unique(self.camera_names))

    def get_interval(self):
        '''
        Identifies first and last settings file in the kkpo directory. Pulls timestamps from these files
        to determine the exact startina and end time. Calculates the total number of seconds elapsed
        and the frame interval from the number of time points.
        Returns: frame interval in seconds/frame.
        '''
        setting_files = [file for file in os.listdir(self.file_path) if "Settings.txt" in file and not file.startswith('.')]
        first_timepoint_name = [file for file in setting_files if all(match in file for match in [self.timepoint_names[0], self.region_names[0], self.channel_names[0], self.illum_names[0]])]
        last_timepoint_name = [file for file in setting_files if all(match in file for match in [self.timepoint_names[-1], self.region_names[0], self.channel_names[0], self.illum_names[0]])]
        
        # quality control
        if len(first_timepoint_name) != 1 or len(last_timepoint_name) != 1:
            print('*****'*9)
            print(f'EEROR:Problem with the settings file!\n{len(first_timepoint_name)} first and {len(last_timepoint_name)} last timepoint names found.\n',
                  f'Expected 1 first and 1 last timepoint name. Exiting...')
            print('*****'*9)
            sys.exit()
        
        def get_datetime(settings_file: np.ndarray):
            '''
            Extracts the date and time from a settings file.
            Accepts: settings file for a given timepoint as an ndarray
            Returns: datetime object
            '''
            timestamp_line = [line for line in settings_file if 'Date time stamp' in line][0]
            timestamp_val = timestamp_line.split('=')[-1]
            timestamp_date = timestamp_val.split('_')[0]
            timestamp_time = timestamp_val.split('_')[1]
            start_year = int(timestamp_date[0:5])
            start_month = int(timestamp_date[5:7])
            start_day = int(timestamp_date[7:9])
            start_hour = int(timestamp_time[0:2])
            start_minute = int(timestamp_time[2:4])
            start_second = int(timestamp_time[4:6])
            return datetime(start_year, start_month, start_day, start_hour, start_minute, start_second)

        # read the settings file, get the start and end times
        first_timepoint_file = np.loadtxt(self.file_path / first_timepoint_name[0], dtype='str', delimiter = '/n')
        self.start_datetime = get_datetime(first_timepoint_file)
        last_timepoint_file = np.loadtxt(self.file_path / last_timepoint_name[0], dtype='str', delimiter = '/n')
        self.end_datetime = get_datetime(last_timepoint_file)
        self.total_seconds = (self.end_datetime - self.start_datetime).total_seconds()
        self.interval = self.total_seconds / (self.num_timepoints - 1)
        return self.interval

    def save_max_project(self, save_vol = False, downsample = False):
        '''
        Loads each timepoint, channel, stage position, etc (still working out the deets), calculates a max projection,
        and saves the projection to file.
        Accepts: 
         - save_vol (bool) - whether or not to also save the full volume to file.
         - downsample - whether or not to downsample the volume.
        Returns:
         not sure yet.
        '''
        # create a folder to save the max projections
        self.max_proj_path = self.file_path / 'max_projections'
        if not os.path.exists(self.max_proj_path):
            os.mkdir(self.max_proj_path)
        
        max_projection = np.zeros((2048,2048))
        its = self.num_samples*self.num_timepoints*self.num_views*self.num_regions*self.num_tilesX*self.num_tilesY*self.num_channels*self.num_illum*self.num_cameras
        with tqdm(total = its, miniters=its/100) as pbar:
            pbar.set_description('Calculating max projections')
            for samp in range(self.num_samples):
                for time in range(self.num_timepoints):
                    for view in range(self.num_views):
                        for reg in range(self.num_regions):
                            for tileX in range(self.num_tilesX):
                                for tileY in range(self.num_tilesY):
                                    for chan in range(self.num_channels):
                                        for illum in range(self.num_illum):
                                            for cam in range(self.num_cameras):
                                                file_name = [file for file in self.files if all([match in file for match in [ self.sample_names[samp], 
                                                                                                                              self.timepoint_names[time], 
                                                                                                                              self.view_names[view], 
                                                                                                                              self.region_names[reg], 
                                                                                                                              self.tileX_names[tileX], 
                                                                                                                              self.tileY_names[tileY], 
                                                                                                                              self.channel_names[chan], 
                                                                                                                              self.illum_names[illum], 
                                                                                                                              self.camera_names[cam]]])][0]
                                                img = tiff_read(self.file_path / file_name)
                                                max_projection = np.max(img, axis=0)
                                                tiff_write(self.max_proj_path /  f'{self.sample_names[samp]}_{self.timepoint_names[time]}_{self.view_names[view]}_{self.region_names[reg]}_{self.tileX_names[tileX]}_{self.tileY_names[tileY]}_{self.channel_names[chan]}_{self.illum_names[illum]}_{self.camera_names[cam]}_max_projection.tif', max_projection)
                                                pbar.update(1)
    def interact(self):
        ''' 
        Dask/Napari interactive workflow
        '''
        


I want to use dask to identify different regions and open them as dask arrays. 
Then I can use Napari to interactively visualize different regions.

In [20]:
kkpo = Kkpo('/Volumes/bigData/kkpo_test/song')

region_dict = {}
#for region in kkpo.region_names:
region_dict['test'] = dask_read(kkpo.file_path /  ('*'+'R0003'+'*.tif'))
region_dict['test']

unique sample names are ['S000']
unique timepoint names are ['t000000' 't000001' 't000002' 't000003' 't000004' 't000005' 't000006'
 't000007' 't000008' 't000009' 't000010' 't000011']
unique view names are ['V000']
unique region names are ['R0000' 'R0001' 'R0002' 'R0003' 'R0004']
unique tileX names are ['X000']
unique tileY names are ['Y000']
unique channel names are ['C01' 'C02' 'C03']
unique illum names are ['I0']
unique camera names are ['D0']
unique planes are ['P00157.tif' 'P00190.tif' 'P00192.tif' 'P00269.tif' 'P00294.tif']


Unnamed: 0,Array,Chunk
Bytes,51.95 GiB,1.48 GiB
Shape,"(35, 190, 2048, 2048)","(1, 190, 2048, 2048)"
Count,70 Tasks,35 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 51.95 GiB 1.48 GiB Shape (35, 190, 2048, 2048) (1, 190, 2048, 2048) Count 70 Tasks 35 Chunks Type uint16 numpy.ndarray",35  1  2048  2048  190,

Unnamed: 0,Array,Chunk
Bytes,51.95 GiB,1.48 GiB
Shape,"(35, 190, 2048, 2048)","(1, 190, 2048, 2048)"
Count,70 Tasks,35 Chunks
Type,uint16,numpy.ndarray


In [None]:
from dask import distributed
client = distributed.Client()
print(client.dashboard_link)

kkpo = Kkpo('/Volumes/bigData/kkpo_test/song')
channels = [dask_read(kkpo.file_path /  ('*'+'R0003'+'*'+channel+'*.tif')) for channel in kkpo.channel_names]
#stack = da.stack(channels)

In [24]:
from dask_image.imread import imread as dask_read
import dask.array as da

t = dask_read('/Volumes/bigData/kkpo_test/song/S000_t000011_V000_R0002_X000_Y000_C03_I0_D0_P00294.tif')
print(t.shape)
da.to_zarr(t, '/Volumes/bigData/kkpo_test/song/ITWORKED.zarr')

(294, 2048, 2048)
