In [18]:
import pandas as pd
import numpy as np
import os
import cv2
import tqdm
import glob

In [19]:
fcsv_path = '/scratch-shared/phil/SPECS_phil/exp_stats/main.csv'
df = pd.read_csv(fcsv_path)

In [20]:
moas = ['Aurora kinase inhibitor', 'tubulin polymerization inhibitor', 'JAK inhibitor', 'protein synthesis inhibitor', 'HDAC inhibitor', 
        'topoisomerase inhibitor', 'PARP inhibitor', 'ATPase inhibitor', 'retinoid receptor agonist', 'HSP inhibitor']

In [21]:
df2 = df[df.moa.isin(moas)]

In [22]:
df2.moa.value_counts()

HDAC inhibitor                      1782
topoisomerase inhibitor             1728
HSP inhibitor                       1296
protein synthesis inhibitor         1242
JAK inhibitor                       1188
PARP inhibitor                      1134
Aurora kinase inhibitor             1080
tubulin polymerization inhibitor    1080
retinoid receptor agonist           1026
ATPase inhibitor                    1026
Name: moa, dtype: int64

In [23]:
fold = glob.glob('/mnt/micro-images-pvc/jonne' + '/specs935*')
imaged_plates = np.unique([x.split('-')[5] for x in fold])

In [24]:
imaged_plates

array(['P015076', 'P015077', 'P015078', 'P015079', 'P015080', 'P015081',
       'P015082', 'P015083', 'P015084', 'P015085', 'P015087', 'P015088',
       'P015089', 'P015090', 'P015091', 'P015092', 'P015093', 'P015094',
       'P015095', 'P015096', 'P015097', 'P015098', 'P015099'], dtype='<U7')

In [25]:
imaged_plates

array(['P015076', 'P015077', 'P015078', 'P015079', 'P015080', 'P015081',
       'P015082', 'P015083', 'P015084', 'P015085', 'P015087', 'P015088',
       'P015089', 'P015090', 'P015091', 'P015092', 'P015093', 'P015094',
       'P015095', 'P015096', 'P015097', 'P015098', 'P015099'], dtype='<U7')

In [26]:
df2 = df2[df2.plate.isin(imaged_plates)] 

In [27]:
df2.moa.value_counts()

HDAC inhibitor                      1782
topoisomerase inhibitor             1728
HSP inhibitor                       1296
protein synthesis inhibitor         1242
JAK inhibitor                       1188
PARP inhibitor                      1134
Aurora kinase inhibitor             1080
tubulin polymerization inhibitor    1080
retinoid receptor agonist           1026
ATPase inhibitor                    1026
Name: moa, dtype: int64

In [28]:
known_plates = df2.plate.unique()
known_plates

array(['P015076', 'P015077', 'P015080', 'P015081', 'P015082', 'P015083',
       'P015084', 'P015085', 'P015090', 'P015091', 'P015092', 'P015093',
       'P015094', 'P015095', 'P015096', 'P015097', 'P015098', 'P015099'],
      dtype=object)

In [29]:
df = df[df.plate.isin(known_plates)]

In [30]:
fold = glob.glob('/mnt/micro-images-pvc/jonne' + '/specs935*')
folds = []
for i in range(len(known_plates)):
    f = [x for x in fold if known_plates[i] in x][-1]
    folds.append(f)

In [31]:
len(glob.glob(folds[-1] + '/*/*/*.tif'))

21560

In [32]:
files = glob.glob(sorted(glob.glob(folds[2] + '/*/*'))[-1] + '/*.tif')
files = sorted([x for x in files if '_thumb' not in x])

In [45]:
columns = ['plate', 'well', 'compound', 'path', 'nuclei', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'moa']
bf_df = pd.DataFrame(columns=columns)

for file_idx in range(len(known_plates)):
    files = glob.glob(sorted(glob.glob(folds[file_idx] + '/*/*'))[-1] + '/*.tif')
    files = sorted([x for x in files if '_thumb' not in x])
    
    if '-run2' in folds[file_idx] and os.path.exists(folds[file_idx].replace('-run2','')):
        files2 = glob.glob(sorted(glob.glob(folds[file_idx].replace('-run2','') + '/*/*'))[-1] + '/*.tif')
        files2 = sorted([x for x in files2 if '_thumb' not in x])
        
        files = np.concatenate((files,files2))
        files = sorted(files)
        
    d = df[df.plate == known_plates[file_idx]]
    
    for well in tqdm.tqdm(sorted(d.well.unique())):
        image_files = [x for x in files if '_' + well + '_' in x]
        if len(image_files) == 0:
            continue
        for site in ['s1','s2', 's3', 's4', 's5']:
            row_files = sorted([x for x in image_files if '_' + site + '_' in x])
            
            path = '/' + np.unique([os.path.join(*x.split('/')[:-1]) for x in row_files])[0]
            
            row = {'plate':known_plates[file_idx], 'well':well, 'compound':d[d.well == well].compound.unique().item(), 
                   'path':path, 'nuclei':[x for x in row_files if '_w1' in x and path in x][0].split('/')[-1], 'C1':[x for x in row_files if '_w2' in x and path in x][0].split('/')[-1], 
                   'C2':[x for x in row_files if '_w3' in x and path in x][0].split('/')[-1], 'C3':[x for x in row_files if '_w4' in x and path in x][0].split('/')[-1], 'C4':[x for x in row_files if '_w5' in x and path in x][0].split('/')[-1], 
                   'C5':[x for x in row_files if '_w6' in x and path in x][0].split('/')[-1], 'C6':[x for x in row_files if '_w7' in x and path in x][0].split('/')[-1], 'moa':d[d.well == well].moa.unique().item()}
            bf_df = bf_df.append(row, ignore_index = True)

100%|██████████| 174/174 [00:07<00:00, 24.38it/s]
100%|██████████| 174/174 [00:07<00:00, 24.80it/s]
100%|██████████| 189/189 [00:07<00:00, 24.89it/s]
100%|██████████| 189/189 [00:08<00:00, 23.34it/s]
100%|██████████| 219/219 [00:09<00:00, 22.80it/s]
100%|██████████| 219/219 [00:10<00:00, 21.62it/s]
100%|██████████| 234/234 [00:11<00:00, 21.15it/s]
100%|██████████| 234/234 [00:11<00:00, 20.82it/s]
100%|██████████| 234/234 [00:10<00:00, 21.59it/s]
100%|██████████| 234/234 [00:12<00:00, 19.33it/s]
100%|██████████| 234/234 [00:12<00:00, 18.38it/s]
100%|██████████| 234/234 [00:13<00:00, 17.51it/s]
100%|██████████| 234/234 [00:14<00:00, 16.16it/s]
100%|██████████| 234/234 [00:14<00:00, 15.82it/s]
100%|██████████| 234/234 [00:15<00:00, 14.86it/s]
100%|██████████| 234/234 [00:16<00:00, 14.62it/s]
100%|██████████| 228/228 [00:15<00:00, 14.32it/s]
100%|██████████| 228/228 [00:16<00:00, 13.62it/s]


In [46]:
bf_df.to_csv('/scratch-shared/phil/SPECS_phil/exp_stats/bf_main.csv', index=False)

In [43]:
os.path.exists('/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015079-U2OS-48h-P2-L2-TL-run2'.replace('-run2',''))

True

In [35]:
file_idx

12

In [40]:
fold

['/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015076-U2OS-48h-P1-L1-TL-lid',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015076-U2OS-48h-P1-L1-TL-paper',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015076-U2OS-48h-P1-L1-TL-sticker',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015076-U2OS-48h-P1-L1-TL-seal',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015076-U2OS-48h-P1-L1-TL-seal-lid',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015076-U2OS-48h-P1-L1-TL',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015077-U2OS-48h-P1-L2-TL',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015078-U2OS-48h-P2-L1-TL',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015079-U2OS-48h-P2-L2-TL',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015079-U2OS-48h-P2-L2-TL-run2',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015080-U2OS-48h-P3-L1-TL',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015080-U2OS-48h-P3-L1-TL-run2',
 '/mnt/micro-images-pvc/jonne/specs935-v1-FA-P015081-U2OS-48h-P3-L2-TL',
 '/mnt/mi