# Training

Having successfully moved the dataset into the required location and format you are now ready to begin training of the model. This notebook is used to define the hyper-paramters neceassary for training the Pix2PixHD GAN model as well as scheduling the training within HPC environment. 

### Load libraries and helper functions

In [2]:
import os 
import sys
root_dir = '../../../'
sys.path.append(root_dir)
from utils.util_utils import find_file
from slurm.commands import train_pix2pixHD
from slurm.sbatch import submit_array

In [3]:
from tifffile import imread
import numpy as np
f = find_file( '/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tissue/af_he/jan_roger_training_eval_sets/mixed_samples/32float0to1/DAPI/test_A//'
, '.tiff')
x = imread( '/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tissue/af_he/jan_roger_training_eval_sets/mixed_samples/32float0to1/DAPI/test_A//'+f[0])
y = x[:,:,1]
y = y[:,:,np.newaxis]
z = x[:,:,0]
z = z[:,:,np.newaxis]
print(y.shape)
h = np.concatenate((y,z), axis=-1)
h.shape

(256, 256, 1)


(256, 256, 2)

### Define general paramters

In [4]:
# Define a model name
model_name = 'af_he_060923' #'dapi_HE_run_32float'
cell_type = 'Lung'

# Path for conda environment 
conda_path = os.path.join(
    '/hpc/user_apps/bioimaging_analytics/conda_environments/pix2pixHD_CUDA11',
#     'pytorch_1.8.1_py3.7',
)
 
# Path for location of source code
repo_path = os.path.join(
    '/hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/'
)

# Path for saving training outputs including weights and validation scores
output_dir = os.path.join(
    '/hpc/projects/upt/samuel_tonks_experimental_space/experiments/Jan_AF_HnE/AFs_HE/',
    'APPROACH_Pix2PixHD',
    cell_type,
    'Step1_Preprocessing',
    'ACT2_Train',
    model_name,
)

# Path for chosen training file see source code for explanation of files to choose from.
py_file = os.path.join(
    repo_path,
    'train_afs_he.py'
)

### Update model hyper-paramters

See source_code/pix2pixhd/ for full explanation of each of the below hyper-parameters. 

#### Must update
- **'--dataroot'**: path/to/folder/containing/train_A </br>
- **'--data_type'**: Bit of input images. Either 8 or 16 

In [5]:
arg_dict = {}
arg_dict['--dataroot'] = os.path.join(
    '/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tissue/af_he/jan_roger_training_eval_sets/mixed_samples/32float0to1//'
)
arg_dict['--model'] = 'pix2pixHD_af_he'
arg_dict['--batchSize'] = '8'
arg_dict['--checkpoints_dir'] = output_dir 
arg_dict['--label_nc'] = '2'
arg_dict['--name'] = model_name
arg_dict['--no_instance'] = ''
arg_dict['--resize_or_crop'] = 'none'
arg_dict['--input_nc'] = '2'
arg_dict['--output_nc'] = '3'
arg_dict['--data_type'] = '8'
# arg_dict['--no_html'] = True
# arg_dict['--isTrain'] = 'True'

# arg_dict['--no_vgg_loss'] = ''
arg_dict['--nThreads'] = '1'
arg_dict['--gpu_ids'] = '0'
arg_dict['--loadSize'] = '256'
# arg_dict['--ndf'] = '32'
arg_dict['--norm'] = 'instance'
arg_dict['--use_dropout'] = ''
# arg_dict['--dropout_variation_inf'] = 'False'
## APEX Training only
# arg_dict['--fp16'] = '' 

## Used only is retraining from epoch
# arg_dict['--continue_train'] = ''
# arg_dict['--which_epoch'] = 'latest'


### Generate  and run slurm command

In [6]:
command = train_pix2pixHD(
    py_file,
    arg_dict
)
command_list = []
command_list.append(command)
print(command)

python /hpc/projects/upt/samuel_tonks_experimental_space/repos/gskgithub/virtual_staining/source_code/pix2pixHD_n/train_afs_he.py --dataroot /hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tissue/af_he/jan_roger_training_eval_sets/mixed_samples/32float0to1// --model pix2pixHD_af_he --batchSize 8 --checkpoints_dir /hpc/projects/upt/samuel_tonks_experimental_space/experiments/Jan_AF_HnE/AFs_HE/APPROACH_Pix2PixHD/Lung/Step1_Preprocessing/ACT2_Train/af_he_060923 --label_nc 2 --name af_he_060923 --no_instance  --resize_or_crop none --input_nc 2 --output_nc 3 --data_type 8 --nThreads 1 --gpu_ids 0 --loadSize 256 --norm instance --use_dropout  


In [9]:
### Job Settings ###
job_name = '{}'.format(model_name)

node_setting = ''
node_setting = node_setting+' --job-name={}'.format(job_name)
node_setting = node_setting+' --time=4-00:00'
node_setting = node_setting+' --nodes=1'
# node_setting = node_setting+' --dependency=afterok:{}'.format('43523352')

node_setting = node_setting+' --partition=gpu'
node_setting = node_setting+' --gres=gpu:v100:1'
node_setting = node_setting+' --ntasks-per-node=1'
node_setting = node_setting+' --output=./slurm_outs/"slurm-%A_%a.out"'
node_setting = node_setting[1:]

os.makedirs('./slurm_outs', exist_ok=True)

submit_array(root_dir, command_list, node_setting, job_name,repo_path, conda_path)

Submitted batch job 62493370



'Submitted batch job 62493370\n'

In [15]:
from utils.util_utils import find_file
from tifffile import imread
import numpy as np

In [18]:
cy3_tr = find_file(
'/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tissue/af_he/jan_roger_training_eval_sets/mixed_samples/32float/Cy3/train_A/'
, ".tiff")

FileNotFoundError: [Errno 2] No such file or directory: '/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tissue/af_he/jan_roger_training_eval_sets/mixed_samples/32float/Cy3/train_A/'

In [17]:
for i in cy3_tr:
    print(i)
    
    x = imread(
    '/hpc/projects/upt/samuel_tonks_experimental_space/datasets/Tissue/af_he/jan_roger_training_eval_sets/mixed_samples/32float0to1/Cy3/train_A/'+i
)
    print(x[:,:,0].shape)
    print(np.max(x))

NameError: name 'cy3_tr' is not defined