In [None]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%reload_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

import sys
sys.path.append('../src')

from datetime import datetime
import os
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view as sww
import pandas as pd
import re

from metadata import metadata
from utils import list_subdir_filter as lsd, unique

global md
md = metadata()

global markers
markers = list(md.markers)

In [None]:
def get_id(f):
    if re.search('A40(\.|_)', f):
        return re.sub('^.*A40.([0-9]{4}).*$', '\\1', f)
    else:
        return re.sub('^.*segmentation_([0-9]+).*$', '\\1', f)

def date_of_files(files):
    try:
        return max([os.path.getctime(f) for f in files if os.path.exists(f)])
    except:
        return 0


all_samples = lsd(os.path.join(md.folders['images'], 'mouse'))
all_imgs = lsd(os.path.join(md.folders['images'], 'mouse'), True, '\.tif')
all_segs = lsd(md.folders['segmented'], True)
pickle_files = lsd(md.folders['regionprops'], False, 'A40_')


all_samples_ids, all_imgs_ids, all_segs_ids, all_pickles = list(map(
    lambda x: unique([get_id(f) for f in x]), 
    [all_samples, all_imgs, all_segs, pickle_files]
))



In [None]:
xfile = pd.read_excel(lsd(md.folders['images'], True, 'xls')[0])
xfile['id_code'] = [re.sub('A40\.', '', l) for l in xfile.Identification]
#xfile = xfile.loc[xfile['id_code'].isin(slide_ids)].sort_values('id_code')

In [None]:
xfile['sam'] = xfile['id_code'].isin(all_samples_ids)
xfile['img'] = xfile['id_code'].isin(all_imgs_ids)
xfile['seg'] = xfile['id_code'].isin(all_segs_ids)
xfile['regions'] = xfile['id_code'].isin(all_pickles)

In [None]:
xfile.loc[xfile.sam & -xfile.img]

In [None]:
seg_dir = os.path.join(md.folders['segmented'], 'cellpose')

xfile['complete_AF_removal'] = [
    sum([
        os.path.exists(os.path.join(md.folders['bg_removed'], f'clean_{marker}_{img_id}.npy')) 
        for marker in markers]
       ) == 9 
    for img_id in xfile.id_code
]

xfile['AF_time'] = [
    datetime.fromtimestamp(date_of_files([
        os.path.join(md.folders['bg_removed'], f'clean_{marker}_{img_id}.npy')
        for marker in markers
    ])).strftime('%Y.%m.%d %H:%M')
    for img_id in xfile.id_code
    ]

xfile['complete_segmentation'] = [(len(lsd(seg_dir, True, f'A40_{i}_[0-9]_dapi1\.npy')) == 10) for i in xfile.id_code]
xfile.sort_values(['Condition', 'id_code'])

In [None]:
id_condition_table = xfile.loc[xfile.img, ['id_code', 'Condition', 'complete_AF_removal', 'complete_segmentation', 'regions']].sort_values(['Condition', 'id_code'])
id_condition_table.to_csv('../data/interim/id_condition_table.csv')
id_condition_table

In [None]:
# seg_dir = os.path.join(md.folders['segmented'], 'cellpose')

# markers = list(md.markers)
# for img_id in sorted(xfile.id_code.unique()):
#     ss = sum([os.path.exists(os.path.join(md.folders['bg_removed'], f'clean_{marker}_{img_id}.npy')) for marker in markers])
#     ll = len(lsd(seg_dir, False, f'A40_{img_id}_dapi1_[0-9][0-9]\.npy'))

#     print(f'{img_id} - {ss} - {ll}')

# # len(lsd(seg_dir, False, f'A40_{tile_code}_dapi1_[0-9][0-9]\.npy')) != 10:
# #         print(f'{tile_code} is sus. Skipping...')
# #         return None