In [None]:
import os
import re
import sys
import glob
import json
import dask
import shutil
import pickle
import hashlib
import skimage
import datetime
import tifffile
import numpy as np
import pandas as pd

import dask.diagnostics
from matplotlib import pyplot as plt

In [None]:
sys.path.append('..')
from pipeline_process.imaging import image, plate_microscopy_api, utils, viz

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
ess_root = '/gpfsML/ML_group/PlateMicroscopy/'
os.path.isdir(ess_root)

### Parsing all raw TIFF metadata

In [None]:
api = plate_microscopy_api.PlateMicroscopyAPI(ess_root, '../cache/20191029-ess-on-cap/')

In [None]:
# api.cache_os_walk()
api.construct_metadata()
api.append_file_info()
api.cache_metadata(overwrite=True)

In [None]:
len(api.os_walk), api.md.shape[0], api.md.is_raw.sum(), api.md_raw.shape[0]

In [None]:
dst_root = '/gpfsML/ML_group/oc-plate-microscopy/'

tasks = []
for ind, row in api.md_raw.iterrows():
    task = dask.delayed(api.parse_raw_tiff_metadata)(row, src_root=api.root_dir, dst_root=dst_root)
    tasks.append(task)

In [None]:
with dask.diagnostics.ProgressBar():
    dask.compute(*tasks)

### Loading the parsed metadata

In [None]:
@dask.delayed
def load_json(path):
    file = open(path, 'r')
    d = json.load(file)
    file.close()
    return d

@dask.delayed
def load_csv(path):
    df = pd.read_csv(path)
    df['filename'] = path.split(os.sep)[-1]
    df['plate_dir'] = path.split(os.sep)[-2]
    return df

In [None]:
# load all of the global metadata
paths = api.aggregate_filepaths(dst_root, kind='metadata', tag='global-metadata', ext='json')
ds = [load_json(path) for path in paths]
len(paths)

In [None]:
with dask.diagnostics.ProgressBar():
    ds = dask.compute(*ds)

In [None]:
# load all of the mm metadata
paths = api.aggregate_filepaths(dst_root, kind='metadata', tag='mm-metadata', ext='csv')
dfs = [load_csv(path) for path in paths]
len(paths)

In [None]:
with dask.diagnostics.ProgressBar():
    dfs = dask.compute(*dfs)
d = pd.concat(dfs)