In [69]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%reload_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

import sys
sys.path.append('../src')

import os
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view as sww
import pandas as pd
import re

from metadata import metadata
from utils import list_subdir_filter as lsd

md = metadata()

In [70]:
def unique(list_):
    return sorted(list(set(list_)))

def get_id(f):
    if re.search('A40(\.|_)', f):
        return re.sub('^.*A40\.([0-9]{4}).*$', '\\1', f)
    else:
        return re.sub('^.*segmentation_([0-9]+).*$', '\\1', f)

all_samples = lsd(os.path.join(md.folders['images'], 'mouse'))
all_imgs = lsd(os.path.join(md.folders['images'], 'mouse'), True, '\.tif')
all_segs = lsd(md.folders['segmented'], True, 'whole')

all_samples_ids, all_imgs_ids, all_segs_ids = list(map(
    lambda x: unique([get_id(f) for f in x]), 
    [all_samples, all_imgs, all_segs]
))



In [72]:
xfile = pd.read_excel(lsd(md.folders['images'], True, 'xls')[0])
xfile['id_code'] = [re.sub('A40\.', '', l) for l in xfile.Identification]
#xfile = xfile.loc[xfile['id_code'].isin(slide_ids)].sort_values('id_code')

In [73]:
xfile['sam'] = xfile['id_code'].isin(all_samples_ids)
xfile['img'] = xfile['id_code'].isin(all_imgs_ids)
xfile['seg'] = xfile['id_code'].isin(all_segs_ids)
xfile

Unnamed: 0,Code,Identification,Group,Condition,File,id_code,sam,img,seg
0,JNUDE_03,A40.2302,wt_ctr_1,wt_ctr,non,2302,False,False,False
1,JNUDE_04,A40.2311,wt_ctr_2,wt_ctr,non,2311,True,True,True
2,JNUDE_05,A40.2303,wt_ctr_4,wt_ctr,20231006_124507_3_NPRUS9_Run 65_A40.2303.csv,2303,True,True,True
3,JNUDE_06,A40.2403,wt_osi_3,wt_TT,A40_2403.csv,2403,True,True,True
4,JNUDE_07,A40.2337,wt_ctr_6,wt_ctr,20230915_150044_1_h1fE0K_Run 54_A40.2337.csv,2337,True,True,False
5,JNUDE_08,A40.2397,wt_osi_7,wt_TT,non,2397,True,True,True
6,JNUDE_09,A40.2404,wt_osi_1,wt_TT,20230915_150044_3_nCS5op_Run 54_A40.2404.csv,2404,True,True,False
7,JNUDE_10,A40.2405,wt_osi_4,wt_TT,A40_2405.csv,2405,True,True,True
8,JNUDE_11,A40.2312,wt_ctr_5,wt_ctr,20230915_150044_2_GulsMx_Run 54_A40.2312.csv,2312,True,True,False
9,JNUDE_12,A40.2406,wt_osi_5,wt_TT,non,2406,True,True,True


In [74]:
xfile.loc[xfile.sam & -xfile.img]

Unnamed: 0,Code,Identification,Group,Condition,File,id_code,sam,img,seg
22,JNUDE_27,A40.2433,ko_ctr_6,ko_ctr,non,2433,True,False,False


In [75]:
xfile.loc[xfile.img & xfile.seg]

Unnamed: 0,Code,Identification,Group,Condition,File,id_code,sam,img,seg
1,JNUDE_04,A40.2311,wt_ctr_2,wt_ctr,non,2311,True,True,True
2,JNUDE_05,A40.2303,wt_ctr_4,wt_ctr,20231006_124507_3_NPRUS9_Run 65_A40.2303.csv,2303,True,True,True
3,JNUDE_06,A40.2403,wt_osi_3,wt_TT,A40_2403.csv,2403,True,True,True
5,JNUDE_08,A40.2397,wt_osi_7,wt_TT,non,2397,True,True,True
7,JNUDE_10,A40.2405,wt_osi_4,wt_TT,A40_2405.csv,2405,True,True,True
9,JNUDE_12,A40.2406,wt_osi_5,wt_TT,non,2406,True,True,True
10,JNUDE_13,A40.2426,wt_osi_6,wt_TT,A40_2426.csv,2426,True,True,True
11,JNUDE_14,A40.2294,wt_ctr_3,wt_ctr,A40_2294.csv,2294,True,True,True
12,JNUDE_16,A40.2419,ko_osi_2,ko_TT,20230914_173737_4_Er644v_Run 53_A40.2419.csv,2419,True,True,True
13,JNUDE_17,A40.2429,ko_osi_3,ko_TT,non,2429,True,True,True


In [76]:
xfile.loc[xfile.img & -xfile.seg].sort_values('id_code').id_code.values.astype(int)#.groupby('Condition').count().img

array([2256, 2304, 2312, 2337, 2339, 2404, 2408, 2432])

In [77]:
xfile.loc[xfile.Condition == 'ko_TT']

Unnamed: 0,Code,Identification,Group,Condition,File,id_code,sam,img,seg
12,JNUDE_16,A40.2419,ko_osi_2,ko_TT,20230914_173737_4_Er644v_Run 53_A40.2419.csv,2419,True,True,True
13,JNUDE_17,A40.2429,ko_osi_3,ko_TT,non,2429,True,True,True
14,JNUDE_18,A40.2428,ko_osi_4,ko_TT,20230919_133019_4_UuguYd_Run 56_A40.2428.csv,2428,False,False,False
16,JNUDE_22,A40.2430,ko_osi_6,ko_TT,20230918_152613_3_9pzeAy_Run 55_A40.2430.csv,2430,True,True,True
