# Load data 


The goal of this notebook is to move a collection of paired input and target images ready to be used for training and inference via the Pix2PixHD GAN model. The core functions used within this notebook are the train_test_move_commands2 which generates the slurm command to move each image accordingly.</br>

Please see the Pix2PixHD readme for the full requirements on how to set the data loaders but in short the files path must be as follows: </br>

**Train:**

   **/path/to/files/train_A** </br>
   
   **/path/to/files/train_B**
   
**Val:**

   **/path/to/files/val_A**   
   
   **/path/to/files/val_B**

**Test:**

   **/path/to/files/test_A**   
   
   **/path/to/files/test_B** </br>
   
### <span style ='color:red'>IMPORTANT!
   
The files are saved ***BARCODE_rXXcXXfXXp01.tiff*** for example 3CG0021772_r01c01f02p01.tiff. This is to ensure that files from different barcodes do not overwright other files. Following this is very important for downstream implementation. </br>

I have chosen to combine the image paths along with all the meta data into a CSV and extract random samples based on my experiment design. The dataset  but you can follow any approach you wish as long as you save the images in the format outlined above.
    



### Import paramters and helper functions

In [3]:
import os
import shutil
import sys
import numpy as np
import pandas as pd
root_dir = '../../../'
sys.path.append(root_dir)

from slurm.sbatch import submit_array
from slurm.commands import move_16bitimage
from slurm.commands import combine_AB
from utils.df_utils  import df_channel_create as channel_create
from utils.df_utils import df_image_folder_match as image_folder_match
from utils.util_utils import extract_samples, find_file, train_test_move_commands2,create_directory



### Define paths

**You will need to as a MINIMUM UPDATE**:
- conda_path
- repo_path
- model_name
- tissue: either Lung, Breast or Ovarian
- output_path: where to save images


In [47]:
## Mandatory to update
conda_path = os.path.join(
    '/hpc/projects/upt/bioimaging_analytics/Tesaro-DNA-Damage/conda_envs/',
    'pytorch_cyclegan'
)

repo_path = os.path.join(
    '/hpc/scratch/nvme3/smt29021/phase1_repo_1122/Structure-Specific-Contrast-Enhancement/workflow/phase2_sampling_normalisation/',
#     'phase2_sampling_normalisation',
)

model_name = "bf_dna_16bit_breast_val_30000imgs_toxic"
tissue = "Lung"

output_path = os.path.join(
    "/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data",
    "APPROACH_Pix2PixHD", tissue, "Step1_Preprocessing/ACT1_Normalise",
    model_name
)


## Non-mandatory to update
py_path = os.path.join(repo_path,
                       'move_tesaro.py'
)

csv_path = os.path.join(
    '/hpc/projects/upt/bioimaging_analytics/Tesaro-DNA-Damage/images_and_metadata.csv'
)

random_val_samples_path = os.path.join(
    "/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/DATA_Processing/"
)

good_lung_val_csv_path = os.path.join(
    '/hpc/projects/upt/bioimaging_analytics/Tesaro-DNA-Damage/error_analysis/lung_val_good_bf.csv',
#     'lu_val_good_bf.csv'
)


### Retrospective move of bf_dna_16bit_lung_val2_30000imgs which are cropped to 1056 1056 need to be 1080.

In [42]:
from pathlib import Path
root_path = os.path.join('/hpc/projects/upt/bioimaging_analytics/Tesaro-DNA-Damage/2020-09/ELN15212_Lu_Ov__DNA_Damage_Val_091020/ELN15212_Lu_Ov_DNA_Damage_Val_Data/Lung/')

folders =os.listdir(root_path)
folders_short = [i[:10] for i in folders]
command_list = []
splits_path = os.path.join('/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/APPROACH_Pix2PixHD/Lung/Step1_Preprocessing/ACT1_Normalise/bf_fitc_16bit_lung_val_30000imgs/val_')
final_path = os.path.join('/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/APPROACH_Pix2PixHD/Lung/Step1_Preprocessing/ACT1_Normalise/bf_cy5_16bit_lung_val_30000imgs/val_')

for i in ['A','B']:
    imgs = sorted([p for p in Path(f'{splits_path+i}/').glob(f'**/*.tiff')])
#     imgs = sorted([p for p in Path(f'{root_path+i}/').glob(f'**/*.tiff')])
    
    for img in imgs:
        img_name = str(img).split('/')[-1][:10]
        folder_index = folders_short.index(img_name)
        new_path = os.path.join(final_path+i,str(img).split('/')[-1])
        if i == 'A':
            orig_path = os.path.join(root_path, folders[folder_index], 'Images/'+str(img).split('/')[-1][11:-5]+'-ch2sk1fk1fl1.tiff')
        else:
            orig_path = os.path.join(root_path, folders[folder_index], 'Images/'+str(img).split('/')[-1][11:-5]+'-ch4sk1fk1fl1.tiff')
        command = strings(py_path,orig_path,new_path) #BF
        command_list.append(command)


In [43]:
len(command_list)
# command

12000

### Load Tesaro CSV

In [48]:
#Import Tesaro CSV
full_dataset = pd.read_csv(csv_path)
print("Number of Images: "+str(full_dataset.shape[0]))
full_dataset.head()

Number of Images: 2562351


Unnamed: 0,image_url,URL,Row,Col,FieldID,PlaneID,ChannelID,ChannelName,AcquisitionType,IlluminationType,...,MainEmissionWavelength,ObjectiveMagnification,ExposureTime,image_folder1,final_cell_line,tissue_final,UID,plate_setting,key,key_final
0,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,r01c01f01p01-ch1sk1fk1fl1.tiff,1,1,1,1,1,DAPI,NipkowConfocal,Epifluorescence,...,456.0,20.0,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",ELN15212_Lu_Ov__DNA_Damage_Val_091020,3CG0021772,Lung,48124c99-6702-4989-8729-1fb8ff4c05bf,Val,3CG0021772_r01c01f01p01-ch1sk1fk1fl1.tiff,3CG0021772_r01c01f01p01.tiff
1,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,r01c01f01p01-ch2sk1fk1fl1.tiff,1,1,1,1,2,Brightfield,NipkowConfocal,Transmitted,...,0.0,20.0,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",ELN15212_Lu_Ov__DNA_Damage_Val_091020,3CG0021772,Lung,4f27f68c-beaa-425d-b19c-58131449790f,Val,3CG0021772_r01c01f01p01-ch2sk1fk1fl1.tiff,3CG0021772_r01c01f01p01.tiff
2,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,r01c01f01p01-ch3sk1fk1fl1.tiff,1,1,1,1,3,Fluorescein (FITC),NipkowConfocal,Epifluorescence,...,522.0,20.0,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",ELN15212_Lu_Ov__DNA_Damage_Val_091020,3CG0021772,Lung,6f589da1-9901-453a-8892-29c7124813ff,Val,3CG0021772_r01c01f01p01-ch3sk1fk1fl1.tiff,3CG0021772_r01c01f01p01.tiff
3,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,r01c01f01p01-ch4sk1fk1fl1.tiff,1,1,1,1,4,Cy5,NipkowConfocal,Epifluorescence,...,706.0,20.0,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",ELN15212_Lu_Ov__DNA_Damage_Val_091020,3CG0021772,Lung,89c8fc27-b44e-488f-845f-a3ed5a2ac35f,Val,3CG0021772_r01c01f01p01-ch4sk1fk1fl1.tiff,3CG0021772_r01c01f01p01.tiff
4,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,r01c01f02p01-ch1sk1fk1fl1.tiff,1,1,2,1,1,DAPI,NipkowConfocal,Epifluorescence,...,456.0,20.0,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",ELN15212_Lu_Ov__DNA_Damage_Val_091020,3CG0021772,Lung,be0bdcff-6744-4279-8cc7-99a04b05bcbd,Val,3CG0021772_r01c01f02p01-ch1sk1fk1fl1.tiff,3CG0021772_r01c01f02p01.tiff


### Toxic/Non-toxic sampling

1. Change filter on df to 1 or 0
2. Load random samples form .txt
3. Generate folders and split samples
4. Generate old and new paths

In [49]:
#Import Good Lung Images
img_data = pd.read_csv(good_lung_val_csv_path)
# img_data = img_data.loc[img_data['toxic']==0]
# print("Number of Images: "+str(img_data.shape[0]))
img_data.head()

Unnamed: 0.1,Unnamed: 0,AbsTime,AcquisitionType,CameraType,ChannelID,ChannelName,Col,ExposureTime,FieldID,IlluminationType,...,Unnamed: 0.1.1.1.1,Unnamed: 0.1.1.1.1.1,final_cell_line,image_folder1,image_url,key,key_final,plate_setting,tissue_final,toxic
0,0,2020-09-12 00:09:33.877000-04:00,NipkowConfocal,AndorZylaCam,2,Brightfield,1,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",1,Transmitted,...,0.0,0.0,3CG0021772,ELN15212_Lu_Ov__DNA_Damage_Val_091020,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,3CG0021772_r01c01f01p01-ch2sk1fk1fl1.tiff,3CG0021772_r01c01f01p01.tiff,Val,Lung,1
1,1,2020-09-12 00:09:34.967000-04:00,NipkowConfocal,AndorZylaCam,2,Brightfield,1,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",2,Transmitted,...,1.0,1.0,3CG0021772,ELN15212_Lu_Ov__DNA_Damage_Val_091020,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,3CG0021772_r01c01f02p01-ch2sk1fk1fl1.tiff,3CG0021772_r01c01f02p01.tiff,Val,Lung,1
2,2,2020-09-12 00:09:36.073000-04:00,NipkowConfocal,AndorZylaCam,2,Brightfield,1,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",3,Transmitted,...,2.0,2.0,3CG0021772,ELN15212_Lu_Ov__DNA_Damage_Val_091020,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,3CG0021772_r01c01f03p01-ch2sk1fk1fl1.tiff,3CG0021772_r01c01f03p01.tiff,Val,Lung,1
3,3,2020-09-12 00:09:37.290000-04:00,NipkowConfocal,AndorZylaCam,2,Brightfield,1,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",4,Transmitted,...,3.0,3.0,3CG0021772,ELN15212_Lu_Ov__DNA_Damage_Val_091020,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,3CG0021772_r01c01f04p01-ch2sk1fk1fl1.tiff,3CG0021772_r01c01f04p01.tiff,Val,Lung,1
4,4,2020-09-12 00:09:38.383000-04:00,NipkowConfocal,AndorZylaCam,2,Brightfield,1,"[[0.987911,0,0,-21.6],[0,-0.987911,0,16.5],[0,...",5,Transmitted,...,4.0,4.0,3CG0021772,ELN15212_Lu_Ov__DNA_Damage_Val_091020,/hpc/projects/upt/bioimaging_analytics/Tesaro-...,3CG0021772_r01c01f05p01-ch2sk1fk1fl1.tiff,3CG0021772_r01c01f05p01.tiff,Val,Lung,1


### Load txt file containing row index of chosen Bright-field samples and then generate the corresponding target paths

In [51]:
'/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/DATA_Processing/'
#Generate bf samples
indexes, _30000_bf = extract_samples("lu_val_nontoxicsamples_30000.txt",img_data,random_val_samples_path)

# _30000_bf = _30000_bf.iloc[:,0]
_30000_bf.columns
_30000_bf.iloc[0,-6]

'/hpc/projects/upt/bioimaging_analytics/Tesaro-DNA-Damage/2020-09/ELN15212_Lu_Ov__DNA_Damage_Val_091020/ELN15212_Lu_Ov_DNA_Damage_Val_Data/Lung/3CG0021816__2020-09-12T01_15_37-Measurement1/Images/r12c13f04p01-ch2sk1fk1fl1.tiff'

In [52]:
dapi_new_path= os.path.join(f'/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/{tissue}/Step1_Preprocessing/ACT1_Normalise/bf_dna_30k_nontoxic/')
fitc_new_path= os.path.join(f'/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/{tissue}/Step1_Preprocessing/ACT1_Normalise/bf_fitc_30k_nontoxic/')
cy5_new_path = os.path.join(f'/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/{tissue}/Step1_Preprocessing/ACT1_Normalise/bf_cy5_30k_nontoxic/')

create_directory(dapi_new_path)
create_directory(fitc_new_path)
create_directory(cy5_new_path)
for i in ['train','val']:
    create_directory(os.path.join(dapi_new_path,i+'_A'))
    create_directory(os.path.join(dapi_new_path,i+'_B'))
    create_directory(os.path.join(fitc_new_path,i+'_A'))
    create_directory(os.path.join(fitc_new_path,i+'_B'))
    create_directory(os.path.join(cy5_new_path,i+'_A'))
    create_directory(os.path.join(cy5_new_path,i+'_B'))

In [54]:
train_30000_bf = _30000_bf.iloc[:21000,:]
val_30000_bf = _30000_bf.iloc[21000:27000,:]
# tes?t_30000_bf = _30000_bf.iloc[27000:,:]
train_30000_bf.iloc[1,1]#()

'2020-09-12 05:10:02.353000-04:00'

In [55]:
train_30000_bf.columns[1]

'AbsTime'

In [28]:
print(paths.iloc[1,1])
print(os.path.join(dapi_new_path,'train_A',paths.iloc[1,1].split('/')[-3][:11])+paths.iloc[1,3].split('/')[-1][:12]+'.tiff')
print(paths.iloc[1,1][:-15]+str(1)+paths.iloc[1,1][-14:])
print(os.path.join(dapi_new_path,'train_B',paths.iloc[row,1].split('/')[-3][:11])+paths.iloc[row,3].split('/')[-1][:12]+'.tiff')

/hpc/projects/upt/bioimaging_analytics/Tesaro-DNA-Damage/2020-09/ELN15212_Lu_Ov__DNA_Damage_Val_091020/ELN15212_Lu_Ov_DNA_Damage_Val_Data/Lung/3CG0021748__2020-09-12T10_26_23-Measurement1/Images/r12c05f02p01-ch3sk1fk1fl1.tiff
/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/Breast/Step1_Preprocessing/ACT1_Normalise/bf_dna_30k_nontoxic/train_A/3CG0021748_r12c05f02p01.tiff
/hpc/projects/upt/bioimaging_analytics/Tesaro-DNA-Damage/2020-09/ELN15212_Lu_Ov__DNA_Damage_Val_091020/ELN15212_Lu_Ov_DNA_Damage_Val_Data/Lung/3CG0021748__2020-09-12T10_26_23-Measurement1/Images/r12c05f02p01-ch1sk1fk1fl1.tiff
/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/Breast/Step1_Preprocessing/ACT1_Normalise/bf_dna_30k_nontoxic/train_B/3CG0021748_r04c04f08p01.tiff


In [56]:

# # #Generate target channels and inputs for move
# _val_4ch = channel_create(_30000_bf,0)
# _val_sig = image_folder_match(full_dataset,_30000_bf,"image_url","Brightfield")

# Generate targets 
command_list = []
target_path_old = []
target_path_new = []
source_path_new = []
source_path_old = []
paths = train_30000_bf
print(f'Total Train {paths.shape[0]}')
col = -6
for row in range(paths.shape[0]):
    
    for t in [1,3,4]:
        if t ==1:
            # BF path
            source_path_old.append(paths.iloc[row,col])
            # Save BF in new loc with new name
            source_path_new.append(os.path.join(dapi_new_path,'train_A',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
            # Stain path
            target_path_old.append(paths.iloc[row,col][:-15]+str(t)+paths.iloc[row,col][-14:])
            target_path_new.append(os.path.join(dapi_new_path,'train_B',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
        if t ==3:
            source_path_old.append(paths.iloc[row,col])
            source_path_new.append(os.path.join(fitc_new_path,'train_A',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
            target_path_old.append(paths.iloc[row,col][:-15]+str(t)+paths.iloc[row,col][-14:])
            target_path_new.append(os.path.join(fitc_new_path,'train_B',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
        if t ==4:
            source_path_old.append(paths.iloc[row,col])
            source_path_new.append(os.path.join(cy5_new_path,'train_A',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
            target_path_old.append(paths.iloc[row,col][:-15]+str(t)+paths.iloc[row,col][-14:])
            target_path_new.append(os.path.join(cy5_new_path,'train_B',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
        else:
            continue
print(len(target_path_old))
print(len(target_path_new))
paths = val_30000_bf      
print(f'Total Val {paths.shape[0]}')
for row in range(paths.shape[0]):
    for t in [1,3,4]:
        if t ==1:
            
            source_path_old.append(paths.iloc[row,col])
            source_path_new.append(os.path.join(dapi_new_path,'val_A',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
            target_path_old.append(paths.iloc[row,col][:-15]+str(t)+paths.iloc[row,col][-14:])
            target_path_new.append(os.path.join(dapi_new_path,'val_B',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
        if t ==3:
            
            source_path_old.append(paths.iloc[row,col])
            source_path_new.append(os.path.join(fitc_new_path,'val_A',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
            target_path_old.append(paths.iloc[row,col][:-15]+str(t)+paths.iloc[row,col][-14:])
            target_path_new.append(os.path.join(fitc_new_path,'val_B',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
        if t ==4:
            
            source_path_old.append(paths.iloc[row,col])
            source_path_new.append(os.path.join(cy5_new_path,'val_A',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
            target_path_old.append(paths.iloc[row,col][:-15]+str(t)+paths.iloc[row,col][-14:])
            target_path_new.append(os.path.join(cy5_new_path,'val_B',paths.iloc[row,col].split('/')[-3][:11])+paths.iloc[row,col].split('/')[-1][:12]+'.tiff')
        else:
            continue
# paths = test_30000_bf
# print(f'Total Test {paths.shape[0]}')

# for row in range(paths.shape[0]):
#     for t in [1,3,4]:
#         if t ==1:
            
#             source_path_old.append(paths.iloc[row,0])
#             source_path_new.append(os.path.join(dapi_new_path,'test_A',paths.iloc[row,-2]))
#             target_path_old.append(paths.iloc[row,0][:-15]+str(t)+paths.iloc[row,0][-14:])
#             target_path_new.append(os.path.join(dapi_new_path,'test_B',paths.iloc[row,-2]))
#         if t ==3:
            
#             source_path_old.append(paths.iloc[row,0])
#             source_path_new.append(os.path.join(fitc_new_path,'test_A',paths.iloc[row,-2]))
#             target_path_old.append(paths.iloc[row,0][:-15]+str(t)+paths.iloc[row,0][-14:])
#             target_path_new.append(os.path.join(fitc_new_path,'test_B',paths.iloc[row,-2]))
#         if t ==4:
            
#             source_path_old.append(paths.iloc[row,0])
#             source_path_new.append(os.path.join(cy5_new_path,'test_A',paths.iloc[row,-2]))
#             target_path_old.append(paths.iloc[row,0][:-15]+str(t)+paths.iloc[row,0][-14:])
#             target_path_new.append(os.path.join(cy5_new_path,'test_B',paths.iloc[row,-2]))
#         else:
#             continue

print(len(target_path_old))
print(len(target_path_new))

Total Train 21000
63000
63000
Total Val 6000
81000
81000


In [57]:
def strings(python,input_file,output_file):
    return f'python {python} --input_file {input_file} --output_file {output_file}'
command_list = []

for i in range(len(target_path_old)):
    command = strings(py_path,target_path_old[i],target_path_new[i]) #BF
    command_list.append(command)
for j in range(len(source_path_old)):
    command = strings(py_path,source_path_old[j],source_path_new[j]) #BF
    command_list.append(command)
print(f'Total Commands {len(command_list)}')
print(command_list[-100:])

Total Commands 162000
['python /hpc/scratch/nvme3/smt29021/phase1_repo_1122/Structure-Specific-Contrast-Enhancement/workflow/phase2_sampling_normalisation/move_tesaro.py --input_file /hpc/projects/upt/bioimaging_analytics/Tesaro-DNA-Damage/2020-09/ELN15212_Lu_Ov__DNA_Damage_Val_091020/ELN15212_Lu_Ov_DNA_Damage_Val_Data/Lung/3CG0021760__2020-09-10T21_30_44-Measurement1/Images/r02c23f06p01-ch2sk1fk1fl1.tiff --output_file /hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/Lung/Step1_Preprocessing/ACT1_Normalise/bf_cy5_30k_nontoxic/val_A/3CG0021760_r02c23f06p01.tiff', 'python /hpc/scratch/nvme3/smt29021/phase1_repo_1122/Structure-Specific-Contrast-Enhancement/workflow/phase2_sampling_normalisation/move_tesaro.py --input_file /hpc/projects/upt/bioimaging_analytics/Tesaro-DNA-Damage/2020-09/ELN15212_Lu_Ov__DNA_Damage_Val_091020/ELN15212_Lu_Ov_DNA_Damage_Val_Data/Lung/3CG0021760__2020-09-10T21_30_44-Measurement1/Images/r09c03f06p01-ch2sk1fk1fl1.tiff --output_fil

In [63]:
from tifffile import imread, imsave
from tqdm import tqdm
# for i in tqdm(range(len(target_path_old))):
#     target = imread(target_path_old[i])
#     imsave(target_path_new[i],target.astype(np.float32),imagej=True) #BF
#     command_list.append(command)
for j in tqdm(range(len(source_path_old))):
    source = imread(source_path_old[j])
    imsave(source_path_new[j],source.astype(np.float32),imagej=True)
# print(f'Total Commands {len(command_list)}')
# print(command_list[-100:])

100%|██████████| 81000/81000 [1:12:37<00:00, 18.59it/s]


In [59]:
### Job Settings ###
job_name = 'BrNT_move'
node_setting = ''
node_setting = node_setting+' --job-name={}'.format(job_name)
node_setting = node_setting+' --time=10-00:00'
node_setting = node_setting+' --nodes=1'
node_setting = node_setting+' --partition=cpu'
#node_setting = node_setting+' --gres=gpu:1'
# node_setting = node_setting+' --mem=99500'
node_setting = node_setting+' --ntasks-per-node=50'
node_setting = node_setting+' --output=./slurm_outs/nontoxic/"slurm-%A_%a.out"'
node_setting = node_setting[1:]

os.makedirs('./slurm_outs/nontoxic/', exist_ok=True)

submit_array(root_dir, command_list, node_setting, job_name,repo_path,conda_path)

Submitted batch job 62408408



'Submitted batch job 62408408\n'

### Hack to get the original paths for lung 30k samples


In [None]:
output_path = '/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/APPROACH_Pix2PixHD/Lung/Step1_Preprocessing/ACT1_Normalise/bf_cy5_16bit_lung_val_30000imgs/test_B/'
output_path_new = '/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/APPROACH_Pix2PixHD/Lung/Step1_Preprocessing/ACT1_Normalise/bf_cy5_16bit_lung_val_30000imgs_1080/test_B/'
create_directory(output_path_new)
bfs = find_file(output_path,'.tiff')
print(len(bfs))
channel = ""
samples.iloc[0,0][:-15]+'2'+samples.iloc[0,0][-14:]

In [None]:
lung_img_data = img_data.loc[img_data['tissue_final'] =='Lung']
lung_img_data.shape

source_path = []
target_path = []

samples= lung_img_data[lung_img_data['key_final'].isin(bfs)]


def strings(python,input_file,output_file):
    return f'python {python} --input_file {input_file} --output_file {output_file}'
command_list = []

for i in range(samples.shape[0]):
#     command = strings(py_path,samples.iloc[i,0],os.path.join(output_path_new,samples.iloc[i,-1])) #BF

    command = strings(py_path,
samples.iloc[i,0][:-15]+'4'+samples.iloc[0,0][-14:],os.path.join(output_path_new,samples.iloc[i,-1]))

    command_list.append(command)

### Generate folders, slurm command and run job

In [None]:
# command_list, a_paths, b_paths = train_test_move_commands2(
#     py_path,lung_val_7000_sig,lung_val_7000_dapi,indexes_7000,output_path,model="pix2pixHD")
print('Number of Jobs:{}'.format(len(command_list)))
print(command_list[:2])



In [None]:
### Job Settings ###
job_name = 'Lung_cy5_testB'

node_setting = ''
node_setting = node_setting+' --job-name={}'.format(job_name)
node_setting = node_setting+' --time=0:60:00'
node_setting = node_setting+' --nodes=4'
node_setting = node_setting+' --partition=cpu'
#node_setting = node_setting+' --gres=gpu:1'
node_setting = node_setting+' --ntasks-per-node=1'
node_setting = node_setting+' --output=./slurm_outs/"slurm-%A_%a.out"'
node_setting = node_setting[1:]

os.makedirs('./slurm_outs', exist_ok=True)

submit_array(root_dir, command_list[:2], node_setting, job_name,repo_path,conda_path,False)

### End