# Run Resnet50 intel-tensorflow benchmarks on single node with synthetic data

In [None]:
%cd 

### Write PBS benchmark commands into a file benchmark_1node.sh

In [None]:
%%writefile benchmark_1node.sh
#!/bin/bash

### Job name
#PBS -N one_node_job

### Output files
#PBS -o one_node_job.stdout
#PBS -e one_node_job.stderr

### Request for nodes and number of processes per node
#PBS -l nodes=1:ppn=2

### activate horovod environment
source ~/inteltfhorovod/bin/activate

### Put all the nodes in hstfile without duplicates
cp $PBS_NODEFILE hostf
sort hostf | uniq > hstfile

### run training benchmarks
mpiexec -np 2  -ppn 2 -hostfile hstfile  -genv I_MPI_DEBUG 4  -genv KMP_BLOCKTIME 1 -genv KMP_AFFINITY granularity=fine,verbose,compact,1,0 -genv OMP_NUM_THREADS 5 -genv I_MPI_PIN_DOMAIN 6:compact --genv HOROVOD_FUSION_THRESHOLD 134217728 python ~/benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --model resnet50 --batch_size 128 --num_batches 20  --mkl --num_intra_threads 6  --num_inter_threads 2 --kmp_blocktime 0 --data_format NHWC --variable_update horovod  --horovod_device cpu

###deactivate environment
deactivate

### Check content of benchmark_1node.sh

In [None]:
!cat benchmark_1node.sh

### Optional step : Remove all old output files

In [None]:
!rm -rf benchmark_1node.sh.*

### Submit to queue. 

In [None]:
!qsub ./benchmark_1node.sh

### check Job status

In [None]:
!qstat

### If the job status is finished, check the the output file with proper output name.

In [None]:
!cat one_node_job.stdout

### Notedown the number of images/sec for training

In [None]:
!grep -e "total images"  one_node_job.stdout

### close the notebook