#### About 
First make a dataset of 2d static images by getting all the timelapses, and then sampling at intervals (e.g. 20 second intervals). 

In [1]:
import yaml
from pathlib import Path
import os
import pandas as pd
from aicsimageio import AICSImage
import sys
sys.path.append("../../")
sys.path.append("../../allen_data")
import segmentation_core as sc
import pipeline_utils as pu
import utils
import importlib
importlib.reload(pu)
import numpy as np
import torch 
import build_allen_dataset as bad
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from skimage import measure
from torchvision.utils import make_grid
import psutil
import logging
PATH_PROJECT=Path('/pasteur/u/jmhb/confocal-2d-video-processing')
PATH_DATA=Path('/pasteur/data/hiPSCs_January2022')
PATH_FNAMES_TIMELAPSE=Path('/pasteur/data/hiPSCs_January2022/fname-lookup-timelapse.csv')
PATH_FNAMES_ZSTACK=Path('/pasteur/data/hiPSCs_January2022/fname-lookup-zstack.csv')
SEG_FOLDER = os.path.join(PATH_DATA, "seg-framewise", "feb23")

In [2]:
DIR_OUT = "/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-sep15"
DIR_OUT = "/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-oct26-D0-only"

def get_imgs_from_timelapse_segmentation():
    """
    Iterate through the list of segmented files, sample frames, and save those images to file. 
    
    Important parameter choices (that are hardcoded inside the function):
        All the capitlised names at the start of the function. 
        The definition of `ts`, which decides how frequently to sample
    """
    ## filenames for the input 
    dir_out = DIR_OUT
    f_imgs_out = os.path.join(dir_out, "imgs-whole-seg-samples.sav")
    f_meta_out = os.path.join(dir_out, "meta-whole-seg-samples.sav")

     ### filenames from the input 
    df_fnames_timelapse = utils.get_fname_lookup(PATH_FNAMES_TIMELAPSE, PATH_DATA)
    df_fnames_timelapse['path_file_seg'] = [os.path.join(SEG_FOLDER, row.folder, row.fname) for i, row in df_fnames_timelapse.iterrows()]


    ### column labels and empty results array
    columns = list(df_fnames_timelapse.columns) + ['shape','pixel_sz_Z','pixel_sz_Y','pixel_sz_X', 'channel_names', "timestep"]
    all_imgs, all_meta = [], []
    print(len(df_fnames_timelapse), " files")

    ## iterate through image filenames 
    print("Processed: ", end=" ")
    for i, (idx, row) in enumerate(df_fnames_timelapse.iterrows()):
        print(i, end=", ")

        img_aics = AICSImage(row.path_file_seg)    

        ## save relevant metadata
        meta = [*row, img_aics.shape, *img_aics.physical_pixel_sizes,  img_aics.channel_names]
        T,C,Z,Y,X = img_aics.shape
        if Z!=1:
            logging.warning(f"Found image with Z!=1: {row.path_file_seg}")
            logging.warning(f"Skipping")
            continue
        img = img_aics.data 

        ## get the channel order idxs
        map_ch_idx = pu.get_channel_idx_lookup(img_aics.channel_names)
        chs = ['lyso', 'mito', 'golgi', 'peroxy', 'er','nuclei','bodipy']
        ch_idxs = [ map_ch_idx[k] for k in chs ]

        # sample the timeseries at intervals of 20, for at most 80
        ts = np.arange(0,min(T,81),20)
        for t in ts:
            all_imgs.append(img[[t],ch_idxs,0][None])
            this_meta = meta.copy() + [t]
            all_meta.append(this_meta)
            assert len(columns)==len(this_meta), "code error"
        assert len(all_imgs)==len(all_meta)

    all_imgs = torch.from_numpy(np.concatenate(all_imgs).astype(np.int8))
    df_meta_cell = pd.DataFrame(data=all_meta, columns=columns)
    df_meta_cell['cellid'] = df_meta_cell.g1 + "-"+ df_meta_cell.g2 +"-"+ df_meta_cell.g3.apply(str)
    df_meta_cell['cellid_ts'] = df_meta_cell.cellid + "-" + df_meta_cell.timestep.astype(str)

    ## save 
    print(f_imgs_out)
    print(f_meta_out)
    torch.save(all_imgs, f_imgs_out)
    torch.save(df_meta_cell, f_meta_out)


get_imgs_from_timelapse_segmentation()


32  files
Processed:  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-oct26-D0-only/imgs-whole-seg-samples.sav
/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-oct26-D0-only/meta-whole-seg-samples.sav


In [3]:
# f_imgs_out="/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-sep15/imgs-whole-seg-samples.sav"
# f_meta_out="/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-sep15/meta-whole-seg-samples.sav"
# do_save=0
# if do_save:
#     torch.save(all_imgs, f_imgs_out)
#     all_meta.to_csv(f_meta_out)
# else: 
#     all_imgs=torch.load(f_imgs_out)
#     df_meta_cell = torch.load(f_meta_out)

In [17]:
f_imgs_out=os.path.join(DIR_OUT, "imgs-whole-seg-samples.sav")
f_meta_out=os.path.join(DIR_OUT, "meta-whole-seg-samples.sav")
# f_meta_out="/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-sep15/meta-whole-seg-samples.sav"
do_save=0
if do_save:
    torch.save(all_imgs, f_imgs_out)
    all_meta.to_csv(f_meta_out)
else: 
    all_imgs=torch.load(f_imgs_out)
    df_meta_cell = torch.load(f_meta_out)

In [18]:
print(f"{len(df_meta_cell)} images from {len(df_meta_cell.fname.unique())} unique cells from collection groups {df_meta_cell.g1.unique()}")

146 images from 32 unique cells from collection groups ['D0']


In [20]:
## get the mitochondria - sample pixel size 87, then resize to 64. 
def extract_mito_from_img_slices(df_subset, all_imgs, channel=1):
    """
    Args are the saved image and dataframe from having run `get_img_slices`. It has 
    the propery that if df_subset.index==i, then it corresponds to the image 
    `slice in all_imgs[i]`. 
    It's therefore fine if `df_subset` is only a subset of the original DataFrame 
    from the function, as long as the indexing works out. 
    
    Args 
        df_subset (pd.DataFrame): 
        channel (int): channel to index the images from `all_imgs`
    """
    all_crops, all_meta = [], [] 
    
    print(len(df_subset), "things")
    for cnt, (idx, row) in enumerate(df_subset.iterrows()):
        mem_pcnt = psutil.virtual_memory().percent
        print(f"({cnt}, {mem_pcnt})   ", end=" ")
        img = all_imgs[idx]
        # print(idx, img.sum().item(), "    ", end="")
        cellid = row.cellid
        cellid_ts = row.cellid_ts

        img_labels = measure.label(img[channel], connectivity=1)
        uniq_labels = np.unique(img_labels)[1:]

        prop_keys = ['centroid', "area", 'eccentricity', 'convex_area', 'equivalent_diameter','extent','local_centroid' ]
        columns = ['cellid_ts'] + prop_keys + ["this_index"]

        for y, l in enumerate(uniq_labels): 
            # mask and regions 
            mask = np.zeros_like(img_labels)
            mask[img_labels==l]=1

            # extract the basic properties 
            r = measure.regionprops(mask)[0]
            meta = [cellid_ts] + [r[p] for p in prop_keys] + [l]
            l0, l1, u0, u1 = r.bbox

            # do image crop
            slc0, slc1 = slice(l0, u0), slice(l1,u1)
            crop = mask[slc0, slc1]
            crop = bad.center_img(crop[None], dims=2, by_channel=0)

            all_crops.append(crop)
            all_meta.append(meta)

    all_meta = pd.DataFrame(data=all_meta, columns=columns)

    return all_meta, all_crops

# do the data stuff over 2 datasets. Why did I do it over 2 dataset? Probably because of memory problems. 
import torchvision.transforms.functional as TF
data_dir_out = DIR_OUT

df_subset = df_meta_cell[:400]
df_meta_mito, all_crops = extract_mito_from_img_slices(df_subset, all_imgs)
data, _ = bad.put_centered_imgs_to_standard_sz(all_crops, None, sz=87, dims=2, by_channel=0,keep_too_big_cells=1)
data = TF.resize(torch.Tensor(data), 64)
f_out_mito0 = os.path.join(data_dir_out, "mito0.sav")
torch.save([data, df_meta_cell, df_meta_mito,], f_out_mito0)

# del df_meta_mito, all_crops
# df_subset = df_meta_cell[400:]
# df_meta_mito, all_crops = extract_mito_from_img_slices(df_subset, all_imgs)
# data, _ = bad.put_centered_imgs_to_standard_sz(all_crops, None, sz=87, dims=2, by_channel=0,keep_too_big_cells=1)
# data = TF.resize(torch.Tensor(data), 64)
# f_out_mito1 = os.path.join(data_dir_out, "mito1.sav")
# torch.save([data, df_meta_cell, df_meta_mito,], f_out_mito1)


146 things
(0, 15.2)    (1, 15.2)    (2, 15.2)    (3, 15.2)    (4, 15.2)    (5, 15.2)    (6, 15.2)    (7, 15.2)    (8, 15.2)    (9, 15.2)    (10, 15.2)    (11, 15.2)    (12, 15.2)    (13, 15.0)    (14, 15.0)    (15, 15.0)    (16, 15.1)    (17, 15.1)    (18, 15.1)    (19, 15.1)    (20, 15.1)    (21, 15.1)    (22, 15.1)    (23, 15.0)    (24, 15.0)    (25, 15.1)    (26, 15.2)    (27, 15.3)    (28, 15.3)    (29, 15.3)    (30, 15.3)    (31, 15.3)    (32, 15.3)    (33, 15.3)    (34, 15.0)    (35, 15.2)    (36, 15.3)    (37, 15.3)    (38, 15.3)    (39, 15.0)    (40, 15.0)    (41, 15.1)    (42, 15.1)    (43, 15.1)    (44, 15.1)    (45, 15.1)    (46, 15.1)    (47, 15.2)    (48, 15.2)    (49, 15.1)    (50, 15.1)    (51, 15.1)    (52, 15.1)    (53, 15.2)    (54, 15.2)    (55, 15.2)    (56, 15.2)    (57, 15.2)    (58, 15.2)    (59, 15.2)    (60, 15.2)    (61, 15.2)    (62, 15.2)    (63, 15.2)    (64, 15.2)    (65, 15.2)    (66, 15.2)    (67, 15.2)    (68, 15.2)    (69, 15.2)    (70, 15.2)    (71, 

In [24]:
import os
import torch
import pandas as pd
# get data 
data_dir_out = DIR_OUT
f_out_mito0 = os.path.join(data_dir_out, "mito0.sav")
# f_out_mito1 = os.path.join(data_dir_out, "mito1.sav")
[data0, df_meta_cell0, df_meta_mito0] = torch.load(f_out_mito0)
# [data1, df_meta_cell1, df_meta_mito1] = torch.load(f_out_mito1)
# joining 
data = torch.cat([data0])
df_meta_mito = pd.concat([df_meta_mito0,]).reset_index()
# df_meta_cell = df_meta_cell1 # they're the same
# save everything 
f_out = os.path.join(data_dir_out, "mito-data.sav")
torch.save(data, f_out)
print(f_out)
f_out = os.path.join(data_dir_out, "mito-meta-cell.sav")
torch.save(df_meta_cell, f_out)
print(f_out)
f_out = os.path.join(data_dir_out, "mito-meta-mito.sav")
torch.save(df_meta_mito, f_out)
print(f_out)
f_out = os.path.join(data_dir_out, "imgs-whole-seg-samples.sav")
torch.save(all_imgs, f_out)
print(f_out)


/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-oct26-D0-only/mito-data.sav
/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-oct26-D0-only/mito-meta-cell.sav
/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-oct26-D0-only/mito-meta-mito.sav
/pasteur/u/jmhb/confocal-2d-video-processing/analyses/jan22-neural-diff/data-oct26-D0-only/imgs-whole-seg-samples.sav
