# Training

Having successfully moved the dataset into the required location and format you are now ready to begin training of the model. This notebook is used to define the hyper-paramters neceassary for training the Pix2PixHD GAN model as well as scheduling the training within HPC environment. 

### Load libraries and helper functions

In [2]:
import os 
import sys


root_dir = '../../../'
sys.path.append(root_dir)

from slurm.commands import train_pix2pixHD
from slurm.sbatch import submit_array

In [17]:
stains = ['dna','Cy5','FITC']
celltypes = ['Breast','Lung','Ovarian']
command_list = []
# phentypes = ['toxic']
for stain in stains:
    for celltype in celltypes:
        # Define a model name
        model_name = f'bf_{stain.lower()}_30k_toxic_{celltype.lower()}' #'dapi_HE_run_32float'
        # Path for conda environment 
        conda_path = os.path.join(
            '/hpc/user_apps/bioimaging_analytics/conda_environments/pix2pixHD_CUDA11',
        #     'pytorch_1.8.1_py3.7',
        )

        # Path for location of source code
        repo_path = os.path.join(
            '/hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py')
        if stain == 'dna':
            dataroot = os.path.join(
            f'/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/{celltype}/Step1_Preprocessing/ACT1_Normalise/bf_{stain.lower()}_30k_toxic/')
#               f'/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/APPROACH_Pix2PixHD/{celltype}/Step1_Preprocessing/ACT1_Normalise/',model_name)
        else:
            dataroot = os.path.join(
#                 '/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/Breast/Step1_Preprocessing/ACT1_Normalise/bf_cy5_30k_toxic/train_B/')

            f'/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/{celltype}/Step1_Preprocessing/ACT1_Normalise/bf_{stain.lower()}_30k_toxic/')
#               f'/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/APPROACH_Pix2PixHD/{celltype}/Step1_Preprocessing/ACT1_Normalise/', model_name )

        # Path for saving training outputs including weights and validation scores
        output_dir = os.path.join(
            '/hpc/projects/upt/samuel_tonks_experimental_space/experiments/Tesaro-DNA-Damage/cuong_group_experiments/ACT2_Train/',
        #     'APPROACH_Pix2PixHD',
        #     cell_type,
        #     'Step1_Preprocessing',
        #     'ACT2_Train',
#             model_name,
        )

        # Path for chosen training file see source code for explanation of files to choose from.
        py_file = os.path.join(
            repo_path,
#             'train_16bit.py'
        )
        for seed in ['42','123']:
            arg_dict = {}
            arg_dict['--dataroot'] = dataroot
            arg_dict['--data_type'] = '16'
            arg_dict['--batchSize'] = '4'
            arg_dict['--checkpoints_dir'] = os.path.join(output_dir,f'{model_name}_seed{seed}') 
            arg_dict['--label_nc'] = '0'
            arg_dict['--name'] = f'{model_name}_seed{seed}'
            arg_dict['--no_instance'] = ''
            arg_dict['--resize_or_crop'] = 'none'
            arg_dict['--input_nc'] = '1'
            arg_dict['--output_nc'] = '1'
            arg_dict['--seed'] = seed

            # arg_dict['--no_vgg_loss'] = ''
            arg_dict['--nThreads'] = '1'
            arg_dict['--gpu_ids'] = '0'
            arg_dict['--loadSize'] = '1080'
            # arg_dict['--ndf'] = '32'
            arg_dict['--norm'] = 'instance'
            arg_dict['--use_dropout'] = ''
            # arg_dict['--dropout_variation_inf'] = 'False'
            ## APEX Training only
            # arg_dict['--fp16'] = '' 

            ## Used only is retraining from epoch
            # arg_dict['--continue_train'] = ''
            # arg_dict['--which_epoch'] = 'latest'
            command = train_pix2pixHD(
                py_file,
                arg_dict)
            command_list.append(command)
            
            print(command)
### Job Settings ###
job_name = '{}_{}'.format(model_name, seed)

node_setting = ''
node_setting = node_setting+' --job-name={}'.format(job_name)
node_setting = node_setting+' --time=10-00:00'
node_setting = node_setting+' --nodes=1'
# node_setting = node_setting+' --dependency=afterok:{}'.format('43523352')

node_setting = node_setting+' --partition=gpu'
node_setting = node_setting+' --gres=gpu:a6000:1'
node_setting = node_setting+' --ntasks-per-node=1'
node_setting = node_setting+' --output=./slurm_outs/"slurm-%A_%a.out"'
node_setting = node_setting[1:]

os.makedirs('./slurm_outs', exist_ok=True)

submit_array(root_dir, command_list[1:], node_setting, job_name,repo_path, conda_path)

python /hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py --dataroot /hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/Breast/Step1_Preprocessing/ACT1_Normalise/bf_dna_30k_toxic/ --data_type 16 --batchSize 4 --checkpoints_dir /hpc/projects/upt/samuel_tonks_experimental_space/experiments/Tesaro-DNA-Damage/cuong_group_experiments/ACT2_Train/bf_dna_30k_toxic_breast_seed42 --label_nc 0 --name bf_dna_30k_toxic_breast_seed42 --no_instance  --resize_or_crop none --input_nc 1 --output_nc 1 --seed 42 --nThreads 1 --gpu_ids 0 --loadSize 1080 --norm instance --use_dropout  
python /hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py --dataroot /hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/Breast/Step1_Preprocessing/ACT1_Normalise/bf_dna_30k_toxic/ --data_type 16 --batchSize 4 --checkpoints_

'Submitted batch job 65670702\n'

In [15]:
len(command_list)

18

### Define general paramters

In [3]:
stains = ['dna','Cy5','FITC']
celltypes = ['Breast','Lung','Ovarian']
phentypes = ['nontoxic', 'toxic']
for stain in stains:
    for celltype in celltypes:
        # Define a model name
        model_name = f'bf_{stain.lower()}_16bit_{celltype.lower()}_val_30000imgs' #'dapi_HE_run_32float'
        # Path for conda environment 
        conda_path = os.path.join(
            '/hpc/user_apps/bioimaging_analytics/conda_environments/pix2pixHD_CUDA11',
        #     'pytorch_1.8.1_py3.7',
        )

        # Path for location of source code
        repo_path = os.path.join(
            '/hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py')
    
        dataroot = os.path.join(
            f'/hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/APPROACH_Pix2PixHD//{celltype}/Step1_Preprocessing/ACT1_Normalise/bf_{stain.lower()}_16bit_{celltype.lower()}_val_30000imgs/')
        # Path for saving training outputs including weights and validation scores
        output_dir = os.path.join(
            '/hpc/projects/upt/samuel_tonks_experimental_space/experiments/Tesaro-DNA-Damage/cuong_group_experiments/ACT2_Train/',
        #     'APPROACH_Pix2PixHD',
        #     cell_type,
        #     'Step1_Preprocessing',
        #     'ACT2_Train',
#             model_name,
        )

        # Path for chosen training file see source code for explanation of files to choose from.
        py_file = os.path.join(
            repo_path,
#             'train_16bit.py'
        )
        for seed in ['42','123']:
            arg_dict = {}
            arg_dict['--dataroot'] = dataroot
            arg_dict['--data_type'] = '16'
            arg_dict['--batchSize'] = '16'
            arg_dict['--checkpoints_dir'] = os.path.join(output_dir,f'{model_name}_seed{seed}') 
            arg_dict['--label_nc'] = '0'
            arg_dict['--name'] = f'{model_name}_seed{seed}'
            arg_dict['--no_instance'] = ''
            arg_dict['--resize_or_crop'] = 'none'
            arg_dict['--input_nc'] = '1'
            arg_dict['--output_nc'] = '1'
            arg_dict['--seed'] = seed

            # arg_dict['--no_vgg_loss'] = ''
            arg_dict['--nThreads'] = '1'
            arg_dict['--gpu_ids'] = '0'
            arg_dict['--loadSize'] = '1080'
            # arg_dict['--ndf'] = '32'
            arg_dict['--norm'] = 'instance'
            arg_dict['--use_dropout'] = ''
            # arg_dict['--dropout_variation_inf'] = 'False'
            ## APEX Training only
            # arg_dict['--fp16'] = '' 

            ## Used only is retraining from epoch
            # arg_dict['--continue_train'] = ''
            # arg_dict['--which_epoch'] = 'latest'
            command = train_pix2pixHD(
                py_file,
                arg_dict
            )
            command_list.append(command)
            print(command)

python /hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py --dataroot /hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/APPROACH_Pix2PixHD//Breast/Step1_Preprocessing/ACT1_Normalise/bf_dna_16bit_breast_val_30000imgs/ --data_type 16 --batchSize 16 --checkpoints_dir /hpc/projects/upt/samuel_tonks_experimental_space/experiments/Tesaro-DNA-Damage/cuong_group_experiments/ACT2_Train/bf_dna_16bit_breast_val_30000imgs_seed42 --label_nc 0 --name bf_dna_16bit_breast_val_30000imgs_seed42 --no_instance  --resize_or_crop none --input_nc 1 --output_nc 1 --seed 42 --nThreads 1 --gpu_ids 0 --loadSize 1080 --norm instance --use_dropout  
python /hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py --dataroot /hpc/scratch/rdip1/smt29021/Tesaro-DNA-Damage/Data/APPROACH_Pix2PixHD//Breast/Step1_Preprocessing/ACT1_Normalise/bf_dna_16bit_breast_val_30000imgs/ --data_typ

In [6]:
len(command_list)

18

In [17]:
import json
with open(os.path.join(output_dir,'notoxic_phenotype_train_runs.json'), 'w', encoding= 'utf-8') as f:
    json.dump(command_list, f, ensure_ascii=False, indent=4)

In [18]:
x = open(os.path.join(output_dir,'notoxic_phenotype_train_runs.json'))
data = json.load(x)
data

['python /hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py --dataroot /hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/CellProfiler_Wells/nontoxic/Ovarian/bf_fitc_16bit_nontoxic/ --data_type 16 --batchSize 16 --checkpoints_dir /hpc/projects/upt/samuel_tonks_experimental_space/experiments/Tesaro-DNA-Damage/cuong_group_experiments/ACT2_Train/bf_fitc_30k_nontoxic_seed123 --label_nc 0 --name bf_fitc_30k_nontoxic_seed123 --no_instance  --resize_or_crop none --input_nc 1 --output_nc 1 --seed 123 --nThreads 1 --gpu_ids 0 --loadSize 1080 --norm instance --use_dropout  ',
 'python /hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py --dataroot /hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/CellProfiler_Wells/nontoxic/Breast/bf_dna_30k_nontoxic/ --data_type 16 --batchSize 16 --checkpoints_dir /hpc/

### Update model hyper-paramters

See source_code/pix2pixhd/ for full explanation of each of the below hyper-parameters. 

#### Must update
- **'--dataroot'**: path/to/folder/containing/train_A </br>
- **'--data_type'**: Bit of input images. Either 8 or 16 

In [63]:
for seed in ['42','123']:
    arg_dict = {}
    arg_dict['--dataroot'] = os.path.join(dataroot
    )
    arg_dict['--data_type'] = '16'
    arg_dict['--batchSize'] = '4'
    arg_dict['--checkpoints_dir'] = output_dir 
    arg_dict['--label_nc'] = '0'
    arg_dict['--name'] = model_name
    arg_dict['--no_instance'] = ''
    arg_dict['--resize_or_crop'] = 'none'
    arg_dict['--input_nc'] = '1'
    arg_dict['--output_nc'] = '1'
    arg_dict['--seed'] = seed

    # arg_dict['--no_vgg_loss'] = ''
    arg_dict['--nThreads'] = '1'
    arg_dict['--gpu_ids'] = '0'
    arg_dict['--loadSize'] = '1080'
    # arg_dict['--ndf'] = '32'
    arg_dict['--norm'] = 'instance'
    arg_dict['--use_dropout'] = ''
    # arg_dict['--dropout_variation_inf'] = 'False'
    ## APEX Training only
    # arg_dict['--fp16'] = '' 

    ## Used only is retraining from epoch
    # arg_dict['--continue_train'] = ''
    # arg_dict['--which_epoch'] = 'latest'
    command = train_pix2pixHD(
        py_file,
        arg_dict
    )
    command_list = []
    command_list.append(command)
    print(command)

### Generate  and run slurm command

In [25]:
command = train_pix2pixHD(
    py_file,
    arg_dict
)
command_list = []
command_list.append(command)
print(command)

python /hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py --dataroot /hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/Ovarian/Step1_Preprocessing/ACT1_Normalise/bf_fitc_30k_nontoxic/ --data_type 16 --batchSize 16 --checkpoints_dir /hpc/projects/upt/samuel_tonks_experimental_space/experiments/Tesaro-DNA-Damage/cuong_group_experiments/ACT2_Train/bf_fitc_30k_nontoxic_seed123 --label_nc 0 --name bf_fitc_30k_nontoxic_seed123 --no_instance  --resize_or_crop none --input_nc 1 --output_nc 1 --seed 123 --nThreads 1 --gpu_ids 0 --loadSize 1080 --norm instance --use_dropout  


In [32]:
### Job Settings ###
job_name = '{}'.format(model_name)

node_setting = ''
node_setting = node_setting+' --job-name={}'.format(job_name)
node_setting = node_setting+' --time=10:00:00'
node_setting = node_setting+' --nodes=1'
# node_setting = node_setting+' --dependency=afterok:{}'.format('43523352')

node_setting = node_setting+' --partition=gpu'
node_setting = node_setting+' --gres=gpu:a6000:1'
node_setting = node_setting+' --ntasks-per-node=1'
node_setting = node_setting+' --output=./slurm_outs/"slurm-%A_%a.out"'
node_setting = node_setting[1:]

os.makedirs('./slurm_outs', exist_ok=True)

submit_array(root_dir, command_list, node_setting, job_name,repo_path, conda_path)

Submitted batch job 61836610



'Submitted batch job 61836610\n'

In [28]:
command_list

['python /hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_tesaro.py --dataroot /hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tesaro-DNA-Damage/Data/Ovarian/Step1_Preprocessing/ACT1_Normalise/bf_fitc_30k_nontoxic/ --data_type 16 --batchSize 16 --checkpoints_dir /hpc/projects/upt/samuel_tonks_experimental_space/experiments/Tesaro-DNA-Damage/cuong_group_experiments/ACT2_Train/bf_fitc_30k_nontoxic_seed123 --label_nc 0 --name bf_fitc_30k_nontoxic_seed123 --no_instance  --resize_or_crop none --input_nc 1 --output_nc 1 --seed 123 --nThreads 1 --gpu_ids 0 --loadSize 1080 --norm instance --use_dropout  ']