In [1]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%reload_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

import sys
sys.path.append('../src')

from datetime import datetime
import os
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view as sww
import pandas as pd
import re

from metadata import metadata
from utils import list_subdir_filter as lsd, unique

global md
md = metadata()

global markers
markers = list(md.markers)

In [2]:
def get_id(f):
    if re.search('A40(\.|_)', f):
        return re.sub('^.*A40.([0-9]{4}).*$', '\\1', f)
    else:
        return re.sub('^.*segmentation_([0-9]+).*$', '\\1', f)

def date_of_files(files):
    try:
        return max([os.path.getctime(f) for f in files if os.path.exists(f)])
    except:
        return 0


all_samples = lsd(os.path.join(md.folders['images'], 'mouse'))
all_imgs = lsd(os.path.join(md.folders['images'], 'mouse'), True, '\.tif')
all_segs = lsd(md.folders['segmented'], True)
pickle_files = lsd(md.folders['regionprops'], False, 'A40_')


all_samples_ids, all_imgs_ids, all_segs_ids, all_pickles = list(map(
    lambda x: unique([get_id(f) for f in x]), 
    [all_samples, all_imgs, all_segs, pickle_files]
))



In [3]:
xfile = pd.read_excel(lsd(md.folders['images'], True, 'xls')[0])
xfile['id_code'] = [re.sub('A40\.', '', l) for l in xfile.Identification]
#xfile = xfile.loc[xfile['id_code'].isin(slide_ids)].sort_values('id_code')

In [4]:
xfile['sam'] = xfile['id_code'].isin(all_samples_ids)
xfile['img'] = xfile['id_code'].isin(all_imgs_ids)
xfile['seg'] = xfile['id_code'].isin(all_segs_ids)
xfile['regions'] = xfile['id_code'].isin(all_pickles)

In [5]:
xfile.loc[xfile.sam & -xfile.img]

Unnamed: 0,Code,Identification,Group,Condition,File,id_code,sam,img,seg,regions
22,JNUDE_27,A40.2433,ko_ctr_6,ko_ctr,non,2433,True,False,False,False


In [6]:
seg_dir = os.path.join(md.folders['segmented'], 'cellpose')

xfile['complete_AF_removal'] = [
    sum([
        os.path.exists(os.path.join(md.folders['bg_removed'], f'clean_{marker}_{img_id}.npy')) 
        for marker in markers]
       ) == 9 
    for img_id in xfile.id_code
]

xfile['AF_time'] = [
    datetime.fromtimestamp(date_of_files([
        os.path.join(md.folders['bg_removed'], f'clean_{marker}_{img_id}.npy')
        for marker in markers
    ])).strftime('%Y.%m.%d %H:%M')
    for img_id in xfile.id_code
    ]

xfile['complete_segmentation'] = [(len(lsd(seg_dir, True, f'A40_{i}_[0-9]_dapi1\.npy')) == 10) for i in xfile.id_code]
xfile.sort_values(['Condition', 'id_code'])

Unnamed: 0,Code,Identification,Group,Condition,File,id_code,sam,img,seg,regions,complete_AF_removal,AF_time,complete_segmentation
12,JNUDE_16,A40.2419,ko_osi_2,ko_TT,20230914_173737_4_Er644v_Run 53_A40.2419.csv,2419,True,True,True,True,True,2024.06.14 09:39,True
14,JNUDE_18,A40.2428,ko_osi_4,ko_TT,20230919_133019_4_UuguYd_Run 56_A40.2428.csv,2428,False,False,False,False,False,1970.01.01 01:00,False
13,JNUDE_17,A40.2429,ko_osi_3,ko_TT,non,2429,True,True,True,False,True,2024.06.14 11:11,True
16,JNUDE_22,A40.2430,ko_osi_6,ko_TT,20230918_152613_3_9pzeAy_Run 55_A40.2430.csv,2430,True,True,True,True,True,2024.06.14 10:26,True
19,JNUDE_24,A40.2300,ko_ctr_1,ko_ctr,20230831_152523_1_ExRj6r_Run52_A40.2300.csv,2300,True,True,True,True,True,2024.06.14 08:56,True
17,JNUDE_23_re,A40.2313,ko_ctr_3,ko_ctr,A40_2313.csv,2313,False,False,False,False,False,1970.01.01 01:00,False
23,JNUDE_28,A40.2338,ko_ctr_7,ko_ctr,20230829_102724_2_4kJsI2_Run48_A40.2338.csv,2338,True,True,False,False,True,2024.06.14 08:41,False
18,JNUDE_23_li,A40.2393,ko_ctr_3,ko_ctr,20230921_134356_1_EgBQRX_Run 59_A40.2393.csv,2393,True,True,True,True,True,2024.06.14 10:55,True
15,JNUDE_19,A40.2416,ko_ctr_2,ko_ctr,20230914_173736_3_D61Agp_Run 53_A40.2416.csv,2416,True,True,True,True,True,2024.06.14 09:33,True
21,JNUDE_26,A40.2427,ko_ctr_5,ko_ctr,20230830_123250_4_QIfXt0_Run49_A40.2427.csv,2427,True,True,True,True,True,2024.06.14 08:49,True


In [7]:
id_condition_table = xfile.loc[xfile.img, ['id_code', 'Condition', 'complete_AF_removal', 'complete_segmentation', 'regions']].sort_values(['Condition', 'id_code'])
id_condition_table.to_csv('../data/interim/id_condition_table.csv')
id_condition_table

Unnamed: 0,id_code,Condition,complete_AF_removal,complete_segmentation,regions
12,2419,ko_TT,True,True,True
13,2429,ko_TT,True,True,False
16,2430,ko_TT,True,True,True
19,2300,ko_ctr,True,True,True
23,2338,ko_ctr,True,False,False
18,2393,ko_ctr,True,True,True
15,2416,ko_ctr,True,True,True
21,2427,ko_ctr,True,True,True
20,2432,ko_ctr,True,False,False
26,2407,res_TT,False,True,False


In [8]:
# seg_dir = os.path.join(md.folders['segmented'], 'cellpose')

# markers = list(md.markers)
# for img_id in sorted(xfile.id_code.unique()):
#     ss = sum([os.path.exists(os.path.join(md.folders['bg_removed'], f'clean_{marker}_{img_id}.npy')) for marker in markers])
#     ll = len(lsd(seg_dir, False, f'A40_{img_id}_dapi1_[0-9][0-9]\.npy'))

#     print(f'{img_id} - {ss} - {ll}')

# # len(lsd(seg_dir, False, f'A40_{tile_code}_dapi1_[0-9][0-9]\.npy')) != 10:
# #         print(f'{tile_code} is sus. Skipping...')
# #         return None