In [38]:
import os
import re
import shutil

import json

import mgitools.os_helpers as os_helpers

In [39]:
sample_dir = '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/'
output_file = '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box.json'

In [18]:
## grab all useful file paths
fps = [fp for fp in os_helpers.listfiles(sample_dir)
      if 'Dapi_dapi' in fp and '.tif' in fp and 'back_' not in fp]

len(fps), fps[:5]

(140,
 ['/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S015_Dapi_dapi_Ki67_cy5/Dapi_dapi_Ki67_cy5_F_1199_07_S015_P002_Z00_cy5.tif',
  '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S015_Dapi_dapi_Ki67_cy5/Dapi_dapi_Ki67_cy5_F_1199_07_S015_P004_Z00_cy5.tif',
  '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S015_Dapi_dapi_Ki67_cy5/Dapi_dapi_Ki67_cy5_F_1199_07_S015_P003_Z00_dapi.tif',
  '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S015_Dapi_dapi_Ki67_cy5/Dapi_dapi_Ki67_cy5_F_1199_07_S015_P004_Z00_dapi.tif',
  '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S015_Dapi_dapi_Ki67_cy5/Dapi_dapi_Ki67_cy5_F_1199_07_S015_P005_Z00_dapi.tif'])

In [34]:
def get_channel(fp):
    cy_channel = re.sub(r'^.*Dapi_dapi_.*Z00_(.*).tif$', r'\1', fp.split('/')[-1])
    
    cy3 = re.sub(r'^.*Dapi_dapi_(.*)_cy3_.*$', r'\1', fp.split('/')[-1])
    cy5 = re.sub(r'^.*Dapi_dapi.*_([^_]*)_cy5_.*$', r'\1', fp.split('/')[-1])
        
    if cy_channel == 'cy3':
        channel = cy3
    elif cy_channel == 'cy5':
        channel = cy5
    elif cy_channel == 'dapi':
        channel = 'dapi'
        
    return channel

sample_to_metadata = {}
for fp in fps:
    sample = re.sub(r'^.*box/(.*)/S.*Dapi_dapi.*$', r'\1', fp)
    channel = get_channel(fp)
    roi = re.sub(r'^.*Dapi_dapi.*_S[0-9]{3}_(P[0-9]{3})_.*$', r'\1', fp.split('/')[-1])
    
    d = {
        'channel': channel,
        'filepath': fp
    }
    
    if (channel == 'dapi' and 'Ki67' in fp) or channel != 'dapi':
        if sample not in sample_to_metadata:
            sample_to_metadata[sample] = {
                roi: [d]
            }
        elif roi not in sample_to_metadata[sample]:
            sample_to_metadata[sample][roi] = [d]
        else:
            sample_to_metadata[sample][roi].append(d)

In [41]:
json.dump(sample_to_metadata, open(output_file, 'w'))

In [35]:
      sample_to_metadata

{'F_1199_07': {'P002': [{'channel': 'Ki67',
    'filepath': '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S015_Dapi_dapi_Ki67_cy5/Dapi_dapi_Ki67_cy5_F_1199_07_S015_P002_Z00_cy5.tif'},
   {'channel': 'dapi',
    'filepath': '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S015_Dapi_dapi_Ki67_cy5/Dapi_dapi_Ki67_cy5_F_1199_07_S015_P002_Z00_dapi.tif'},
   {'channel': 'CD20',
    'filepath': '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S013_Dapi_dapi_CD20_cy3_CD3_cy5/Dapi_dapi_CD20_cy3_CD3_cy5_F_1199_07_S013_P002_Z00_cy3.tif'},
   {'channel': 'CD3',
    'filepath': '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S013_Dapi_dapi_CD20_cy3_CD3_cy5/Dapi_dapi_CD20_cy3_CD3_cy5_F_1199_07_S013_P002_Z00_cy5.tif'},
   {'channel': 'CD56',
    'filepath': '/Users/erikstorrs/Documents/ding/cell-segmentation/data/box/F_1199_07/S003_Dapi_dapi_CD56_cy3_PDL1_cy5/Dapi_dapi_CD56_cy3_PDL1_cy5_F_1199_07_S003_P002_Z00_

In [37]:
for sample, metadata in sample_to_metadata.items():
    for roi, d in metadata.items():
        print(roi)
        print(sorted([x['channel'] for x in d]))

P002
['CD20', 'CD3', 'CD4', 'CD45', 'CD56', 'CD68', 'CD8', 'FOXP3', 'Ki67', 'NaKATPase', 'PD1', 'PDL1', 'SOX10', 'dapi']
P004
['CD20', 'CD3', 'CD4', 'CD45', 'CD56', 'CD68', 'CD8', 'FOXP3', 'Ki67', 'NaKATPase', 'PD1', 'PDL1', 'SOX10', 'dapi']
P003
['CD20', 'CD3', 'CD4', 'CD45', 'CD56', 'CD68', 'CD8', 'FOXP3', 'Ki67', 'NaKATPase', 'PD1', 'PDL1', 'SOX10', 'dapi']
P005
['CD20', 'CD3', 'CD4', 'CD45', 'CD56', 'CD68', 'CD8', 'FOXP3', 'Ki67', 'NaKATPase', 'PD1', 'PDL1', 'SOX10', 'dapi']
P001
['CD20', 'CD3', 'CD4', 'CD45', 'CD56', 'CD68', 'CD8', 'FOXP3', 'Ki67', 'NaKATPase', 'PD1', 'PDL1', 'SOX10', 'dapi']
P000
['CD20', 'CD3', 'CD4', 'CD45', 'CD56', 'CD68', 'CD8', 'FOXP3', 'Ki67', 'NaKATPase', 'PD1', 'PDL1', 'SOX10', 'dapi']
P006
['CD20', 'CD3', 'CD4', 'CD45', 'CD56', 'CD68', 'CD8', 'FOXP3', 'Ki67', 'NaKATPase', 'PD1', 'PDL1', 'SOX10', 'dapi']
