From 00831647bba7aaadd8532aeab7d1df3654e5c905 Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 11:01:08 +0800
Subject: [PATCH 01/11] yan-test

---
 .../run-example-test-ray-integration.sh       | 403 +++++++++++++-----
 .../learn/bigdl/attention/transformer.py      |  17 +-
 .../bigdl/imageInference/imageInference.py    |  10 +-
 .../learn/horovod/pytorch_estimator.py        |  11 +-
 .../learn/horovod/simple_horovod_pytorch.py   |   6 +
 .../orca/example/learn/mxnet/lenet_mnist.py   |   1 +
 python/orca/example/learn/openvino/predict.py |   3 +-
 .../async_parameter_server.py                 |   9 +-
 .../parameter_server/sync_parameter_server.py |   6 +-
 .../example/ray_on_spark/rl_pong/rl_pong.py   |   3 +-
 .../rllib/multiagent_two_trainers.py          |  32 +-
 .../tfpark/estimator/estimator_dataset.py     |  25 +-
 .../tfpark/estimator/estimator_inception.py   |  21 +-
 .../tfpark/gan/gan_train_and_evaluate.py      |  27 +-
 .../example/tfpark/keras/keras_dataset.py     |  40 +-
 .../example/tfpark/keras/keras_ndarray.py     |  33 +-
 .../example/tfpark/tf_optimizer/evaluate.py   |  53 ++-
 .../orca/example/tfpark/tf_optimizer/train.py |  39 +-
 18 files changed, 564 insertions(+), 175 deletions(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index 94c12d1233b..41106e3c11e 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -42,110 +42,313 @@ python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostRegressor
 now=$(date "+%s")
 time3=$((now-start))
 
-ray stop -f
-
-#echo "#4 Start rl_pong example"
-#start=$(date "+%s")
-#python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/rl_pong/rl_pong.py --iterations 10
-#now=$(date "+%s")
-#time4=$((now-start))
-#
-#echo "#5 Start multiagent example"
-#start=$(date "+%s")
-#python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py --iterations 5
-#now=$(date "+%s")
-#time5=$((now-start))
-#
-#echo "#6 Start async_parameter example"
-#start=$(date "+%s")
-#python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py --iterations 10
-#now=$(date "+%s")
-#time6=$((now-start))
-#
-#echo "#7 Start sync_parameter example"
-#start=$(date "+%s")
-#python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py --iterations 10
-#now=$(date "+%s")
-#time7=$((now-start))
-#
-#echo "#8 Start mxnet lenet example"
-#start=$(date "+%s")
-#
-## get_mnist_iterator in MXNet requires the data to be placed in the `data` folder of the running directory.
-## The running directory of integration test is ${ANALYTICS_ZOO_ROOT}.
-#if [ -f data/mnist.zip ]
-#then
-#    echo "mnist.zip already exists"
-#else
-#    wget -nv $FTP_URI/analytics-zoo-data/mnist.zip -P data
-#fi
-#unzip -q data/mnist.zip -d data
-#
-#python ${BIGDL_ROOT}/python/orca/example/learn/mxnet/lenet_mnist.py -e 1 -b 256
-#now=$(date "+%s")
-#time8=$((now-start))
-#
-#echo "#9 Start fashion_mnist example with Tensorboard visualization"
-#start=$(date "+%s")
-#
-#if [ -d ${BIGDL_ROOT}/python/orca/example/learn/pytorch/fashion_mnist/data ]
-#then
-#    echo "fashion-mnist already exists"
-#else
-#    wget -nv $FTP_URI/analytics-zoo-data/data/fashion-mnist.zip -P ${BIGDL_ROOT}/python/orca/example/learn/pytorch/fashion_mnist/
-#    unzip ${BIGDL_ROOT}/python/orca/example/learn/pytorch/fashion_mnist/fashion-mnist.zip
-#fi
-#
-#sed "s/epochs=5/epochs=1/g;s/batch_size=4/batch_size=256/g" \
-#    ${BIGDL_ROOT}/python/orca/example/learn/pytorch/fashion_mnist/fashion_mnist.py \
-#    > ${BIGDL_ROOT}/python/orca/example/learn/pytorch/fashion_mnist/fashion_mnist_tmp.py
-#
-#python ${BIGDL_ROOT}/python/orca/example/learn/pytorch/fashion_mnist/fashion_mnist_tmp.py --backend torch_distributed
-#now=$(date "+%s")
-#time9=$((now-start))
-#
-#
-#echo "#10 start example for orca super-resolution"
-#start=$(date "+%s")
-#
-#if [ ! -f BSDS300-images.tgz ]; then
-#  wget -nv $FTP_URI/analytics-zoo-data/BSDS300-images.tgz
-#fi
-#if [ ! -d dataset/BSDS300/images ]; then
-#  mkdir dataset
-#  tar -xzf BSDS300-images.tgz -C dataset
-#fi
-#
-#python ${BIGDL_ROOT}/python/orca/example/learn/pytorch/super_resolution/super_resolution.py --backend torch_distributed
-#
-#now=$(date "+%s")
-#time10=$((now-start))
-#
-#
-#echo "#11 start example for orca cifar10"
-#start=$(date "+%s")
-#
-#if [ -d ${BIGDL_ROOT}/python/orca/example/learn/pytorch/cifar10/data ]; then
-#  echo "Cifar10 already exists"
-#else
-#  wget -nv $FTP_URI/analytics-zoo-data/cifar10.zip -P ${BIGDL_ROOT}/python/orca/example/learn/pytorch/cifar10
-#  unzip ${BIGDL_ROOT}/python/orca/example/learn/pytorch/cifar10/cifar10.zip
-#fi
-#
-#python ${BIGDL_ROOT}/python/orca/example/learn/pytorch/cifar10/cifar10.py --backend torch_distributed
-#
-#now=$(date "+%s")
-#time11=$((now-start))
+set -e
+
+echo "#4 start test for orca bigdl transformer"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca transformer failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#4 Total time cost ${time} seconds"
+
+
+echo "#5 start test for orca bigdl imageInference"
+#timer
+start=$(date "+%s")
+if [ -f models/bigdl_inception-v1_imagenet_0.4.0.model ]; then
+  echo "analytics-zoo-models/bigdl_inception-v1_imagenet_0.4.0.model already exists."
+else
+  wget -nv $FTP_URI/analytics-zoo-models/image-classification/bigdl_inception-v1_imagenet_0.4.0.model \
+    -P models
+fi
+run the example
+python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/imageInference/imageInference.py \
+  -m models/bigdl_inception-v1_imagenet_0.4.0.model \
+  -f ${HDFS_URI}/kaggle/train_100 --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca imageInference failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#5 Total time cost ${time} seconds"
+
+echo "#6 start test for orca pytorch_estimator"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/learn/horovod/pytorch_estimator.py --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca pytorch_estimator failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#6 Total time cost ${time} seconds"
+
+# echo "#7 start test for orca simple_pytorch"
+# #timer
+# start=$(date "+%s")
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/learn/horovod/simple_horovod_pytorch.py --cluster_mode yarn-client
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca simple_pytorch failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#7 Total time cost ${time} seconds"
+
+# echo "#8 start test for orca mxnet"
+# #timer
+# start=$(date "+%s")
+
+# # if [ -f ${BIGDL_ROOT}/data/mnist.zip ]
+# # then
+# #     echo "mnist.zip already exists"
+# # else
+# #     wget -nv $FTP_URI/analytics-zoo-data/mnist.zip -P ${BIGDL_ROOT}/data
+# # fi
+# # unzip -q ${BIGDL_ROOT}/data/mnist.zip -d ${BIGDL_ROOT}/data
+
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/learn/mxnet/lenet_mnist.py #--cluster_mode yarn-client
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca mxnet failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#8 Total time cost ${time} seconds"
+
+echo "#prepare dataset for ray_on_spark"
+wget -nv $FTP_URI/analytics-zoo-data/mnist/train-labels-idx1-ubyte.gz
+wget -nv $FTP_URI/analytics-zoo-data/mnist/train-images-idx3-ubyte.gz
+wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-labels-idx1-ubyte.gz
+wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-images-idx3-ubyte.gz
+zip ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/MNIST_data.zip train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
+
+echo "#9 start test for orca ros async"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py \
+  --iterations 20 --num_workers 2 --cluster_mode yarn
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca ros async failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#9 Total time cost ${time} seconds"
+
+echo "#10 start test for orca ros sync"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py \
+  --iterations 20 --num_workers 2 --cluster_mode yarn
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca ros sync failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#10 Total time cost ${time} seconds"
+
+echo "#11 start test for orca rllib"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py \
+  --iterations 5 \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca ros rllib failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#11 Total time cost ${time} seconds"
+
+echo "#12 start test for orca rl_pong"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/rl_pong/rl_pong.py \
+  --iterations 5 \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca ros rl_pong failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#12 Total time cost ${time} seconds"
+
+echo "#13 start test for orca tfpark keras_dataset"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/tfpark/keras/keras_dataset.py \
+  --data_path ${HDFS_URI}/mnist \
+  --max_epoch 5 \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca ros rl_pong failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#13 Total time cost ${time} seconds"
+
+echo "#14 start test for orca tfpark keras_dataset"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/tfpark/keras/keras_ndarray.py \
+  --max_epoch 5 \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca ros rl_pong failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#14 Total time cost ${time} seconds"
+
+# echo "#15 start test for orca tfpark gan"
+# #timer
+# start=$(date "+%s")
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/tfpark/gan/gan_train_and_evaluate.py \
+#   --cluster_mode yarn-client
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca tfpark gan failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#15 Total time cost ${time} seconds"
+
+echo "#16 start test for orca tfpark estimator_dataset"
+#timer 
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/tfpark/estimator/estimator_dataset.py \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca tfpark estimator_dataset"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#16 Total time cost ${time} seconds"
+
+echo "#17 start test for orca tfpark estimator_inception"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/tfpark/estimator/estimator_inception.py \
+  --image-path ${HDFS_URI}/dogs_cats \
+  --num-classes 2 \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca tfpark estimator_inception failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#17 Total time cost ${time} seconds"
+
+echo "#18 start test for orca tfpark optimizer train"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/tfpark/tf_optimizer/train.py \
+  --max_epoch 1 \
+  --data_num 1000 \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca tfpark optimizer train failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#18 Total time cost ${time} seconds"
+
+echo "#19 start test for orca tfpark optimizer evaluate"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/tfpark/tf_optimizer/evaluate.py \
+  --data_num 1000 \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca tfpark optimizer evaluate failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#19 Total time cost ${time} seconds"
+
 echo "Ray example tests finished"
 
 echo "#1 auto-estimator-pytorch time used:$time1 seconds"
 echo "#2 auto-xgboost-classifier time used:$time2 seconds"
 echo "#3 auto-xgboost-regressor time used:$time3 seconds"
-#echo "#4 orca rl_pong time used:$time4 seconds"
-#echo "#5 orca async_parameter_server time used:$time5 seconds"
-#echo "#6 orca sync_parameter_server time used:$time6 seconds"
+echo "#4 bigdl transformer time used:$time4 seconds"
+echo "#5 bigdl imageInference time used:$time5 seconds"
+echo "#6 horovod pytorch_estimator time used:$time6 seconds"
 #echo "#7 orca multiagent_two_trainers time used:$time7 seconds"
 #echo "#8 mxnet_lenet time used:$time8 seconds"
-#echo "#9 fashion-mnist time used:$time9 seconds"
-#echo "#10 orca super-resolution example time used:$time10 seconds"
-#echo "#11 orca cifar10 example time used:$time11 seconds"
+echo "#9 paramerter_server async time used:$time9 seconds"
+echo "#10 paramerter_server sync example time used:$time10 seconds"
+echo "#11 paramerter_server rllib example time used:$time11 seconds"
+echo "#12 paramerter_server rl_pong example time used:$time12 seconds"
+echo "#13 tfaprk keras_dataset example time used:$time13 seconds"
+echo "#14 tfaprk keras_ndarray example time used:$time14 seconds"
+#echo "#15 tfaprk gan_train_and_evaluate example time used:$time15 seconds"
+echo "#16 tfaprk estimator_dataset example time used:$time16 seconds"
+echo "#17 tfaprk estimator_inception example time used:$time17 seconds"
+echo "#18 tfaprk opt_train example time used:$time18 seconds"
+echo "#19 tfaprk opt_evaluate example time used:$time19 seconds"
diff --git a/python/orca/example/learn/bigdl/attention/transformer.py b/python/orca/example/learn/bigdl/attention/transformer.py
index dc5a3cf4f56..737860acd8e 100644
--- a/python/orca/example/learn/bigdl/attention/transformer.py
+++ b/python/orca/example/learn/bigdl/attention/transformer.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 #
 
-
 import argparse
 import numpy as np
 from tensorflow.python.keras.datasets import imdb
@@ -35,7 +34,7 @@
 cluster_mode = args.cluster_mode
 conf = {"spark.executor.extraJavaOptions": "-Xss512m",
         "spark.driver.extraJavaOptions": "-Xss512m"}
-max_features = 20000
+max_features = 2000
 max_len = 200
 
 if cluster_mode == "local":
@@ -44,12 +43,19 @@
                            driver_memory="20g",
                            conf=conf
                            )
-elif cluster_mode == "yarn":
-    sc = init_orca_context(cluster_mode="yarn-client", num_nodes=8, cores=8,
+elif cluster_mode.startswith("yarn"):
+    if cluster_mode == "yarn_client":
+        sc = init_orca_context(cluster_mode="yarn-client", num_nodes=8, cores=8,
                            memory="100g",
                            driver_memory="20g",
                            conf=conf
-                           )
+                            )
+    else:
+        sc = init_orca_context(cluster_mode="yarn-cluster", num_nodes=8, cores=8,
+                            memory="100g",
+                            driver_memory="20g",
+                            conf=conf
+                            )
 elif cluster_mode == "spark-submit":
     sc = init_orca_context(cluster_mode="spark-submit")                           
 else:
@@ -106,4 +112,3 @@
 
 print("finished...")
 stop_orca_context()
-
diff --git a/python/orca/example/learn/bigdl/imageInference/imageInference.py b/python/orca/example/learn/bigdl/imageInference/imageInference.py
index 6db83b5c5d2..4f4d747ee60 100644
--- a/python/orca/example/learn/bigdl/imageInference/imageInference.py
+++ b/python/orca/example/learn/bigdl/imageInference/imageInference.py
@@ -20,6 +20,7 @@
 from pyspark.sql.functions import col, udf
 from pyspark.sql.types import StringType, DoubleType
 
+from bigdl.dllib.nncontext import *
 from bigdl.dllib.feature.image import *
 from bigdl.dllib.nnframes import *
 from bigdl.orca.learn.bigdl.estimator import Estimator
@@ -53,7 +54,7 @@ def inference(image_path, model_path, batch_size, sc):
                       help="training data path.")
     parser.add_option("--b", "--batch_size", type=int, dest="batch_size", default="56",
                       help="The number of samples per gradient update. Default is 56.")
-    parser.add_option('--cluster_mode', type=str, default="local",
+    parser.add_option('--cluster_mode', type=str, dest="clusterMode", default="local",
                       help='The mode for the Spark cluster. local, yarn or spark-submit.')
 
     (options, args) = parser.parse_args(sys.argv)
@@ -69,8 +70,11 @@ def inference(image_path, model_path, batch_size, sc):
     cluster_mode = options.cluster_mode
     if cluster_mode == "local":
         sc = init_orca_context(memory="3g")
-    elif cluster_mode == "yarn":
-        sc = init_orca_context(cluster_mode="yarn-client", num_nodes=2, memory="3g")
+    elif cluster_mode.startswith("yarn"):
+        if cluster_mode == "yarn-client":
+            sc = init_orca_context(cluster_mode="yarn-client", num_nodes=2, memory="3g")
+        else:
+            sc = init_orca_context(cluster_mode="yarn-cluster", num_nodes=2, memory="3g")
     elif cluster_mode == "spark-submit":
         sc = init_orca_context(cluster_mode="spark-submit")
     else:
diff --git a/python/orca/example/learn/horovod/pytorch_estimator.py b/python/orca/example/learn/horovod/pytorch_estimator.py
index a0d39e1389c..b826524c160 100644
--- a/python/orca/example/learn/horovod/pytorch_estimator.py
+++ b/python/orca/example/learn/horovod/pytorch_estimator.py
@@ -132,9 +132,13 @@ def train_example(workers_per_node):
     if args.cluster_mode == "local":
         init_orca_context(cluster_mode="local", cores=args.cores,
                           num_nodes=args.num_nodes, memory=args.memory)
-    elif args.cluster_mode == "yarn":
-        init_orca_context(cluster_mode="yarn-client", cores=args.cores,
-                          num_nodes=args.num_nodes, memory=args.memory)
+    elif args.cluster_mode.startswith("yarn"):
+        if args.cluster_mode == "yarn-client":
+            init_orca_context(cluster_mode="yarn-client", cores=args.cores,
+                            num_nodes=args.num_nodes, memory=args.memory)
+        else:
+            init_orca_context(cluster_mode="yarn-cluster", cores=args.cores,
+                            num_nodes=args.num_nodes, memory=args.memory)
     elif args.cluster_mode == "k8s":
         if not args.k8s_master or not args.container_image \
                 or not args.k8s_driver_host or not args.k8s_driver_port:
@@ -150,4 +154,3 @@ def train_example(workers_per_node):
         init_orca_context(cluster_mode="spark-submit")
     train_example(workers_per_node=args.workers_per_node)
     stop_orca_context()
-
diff --git a/python/orca/example/learn/horovod/simple_horovod_pytorch.py b/python/orca/example/learn/horovod/simple_horovod_pytorch.py
index f2dcf12961c..9c043586c44 100644
--- a/python/orca/example/learn/horovod/simple_horovod_pytorch.py
+++ b/python/orca/example/learn/horovod/simple_horovod_pytorch.py
@@ -68,6 +68,12 @@ class AppURLopener(urllib.FancyURLopener):
     # Horovod: limit # of CPU threads to be used per worker.
     torch.set_num_threads(4)
 
+    # new_mirror = 'https://ossci-datasets.s3.amazonaws.com/mnist'
+    # datasets.MNIST.resources = [
+    # ('/'.join([new_mirror, url.split('/')[-1]]), md5)
+    # for url, md5 in datasets.MNIST.resources
+    # ]
+
     kwargs = {}
     train_dataset = \
         datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True,
diff --git a/python/orca/example/learn/mxnet/lenet_mnist.py b/python/orca/example/learn/mxnet/lenet_mnist.py
index 41cd5968683..9ffca119f3e 100644
--- a/python/orca/example/learn/mxnet/lenet_mnist.py
+++ b/python/orca/example/learn/mxnet/lenet_mnist.py
@@ -121,3 +121,4 @@ def get_metrics(config):
                   epochs=opt.epochs, batch_size=opt.batch_size)
     estimator.shutdown()
     stop_orca_context()
+    
\ No newline at end of file
diff --git a/python/orca/example/learn/openvino/predict.py b/python/orca/example/learn/openvino/predict.py
index c385f6de8d9..58e06178af0 100644
--- a/python/orca/example/learn/openvino/predict.py
+++ b/python/orca/example/learn/openvino/predict.py
@@ -55,7 +55,7 @@ def crop(img, w, h):
 
     if args.cluster_mode == "local":
         init_orca_context(cores=args.core_num, memory=args.memory)
-    elif args.cluster_mode == "yarn":
+    elif args.cluster_mode.startswith("yarn"):
         init_orca_context(cluster_mode=args.cluster_mode, cores=args.core_num,
                           num_nodes=args.executor_num, memory=args.memory)
     elif args.cluster_mode == "spark-submit":
@@ -78,4 +78,3 @@ def crop(img, w, h):
     assert result[1].shape == (args.data_num, 255, 26, 26)
     assert result[2].shape == (args.data_num, 255, 52, 52)
     stop_orca_context()
-
diff --git a/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py b/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py
index 92e135d41c3..71dc245f891 100644
--- a/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py
+++ b/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py
@@ -24,8 +24,9 @@
 import os
 import time
 
+from python.orca.example.ray_on_spark.parameter_server import model
 import ray
-import model
+#import model
 
 from bigdl.orca import init_orca_context, stop_orca_context
 from bigdl.orca import OrcaContext
@@ -97,7 +98,7 @@ def worker_task(ps, worker_index, batch_size=50):
 if __name__ == "__main__":
     args = parser.parse_args()
     cluster_mode = args.cluster_mode
-    if cluster_mode == "yarn":
+    if cluster_mode.startswith("yarn"):
         sc = init_orca_context(cluster_mode=cluster_mode,
                                cores=args.executor_cores,
                                memory=args.executor_memory,
@@ -106,8 +107,7 @@ def worker_task(ps, worker_index, batch_size=50):
                                driver_memory=args.driver_memory,
                                driver_cores=args.driver_cores,
                                extra_executor_memory_for_ray=args.extra_executor_memory_for_ray,
-                               object_store_memory=args.object_store_memory,
-                               additional_archive="MNIST_data.zip#MNIST_data")
+                               object_store_memory=args.object_store_memory)
         ray_ctx = OrcaContext.get_ray_context()
     elif cluster_mode == "local":
         sc = init_orca_context(cores=args.driver_cores)
@@ -143,4 +143,3 @@ def worker_task(ps, worker_index, batch_size=50):
         time.sleep(1)
     ray_ctx.stop()
     stop_orca_context()
-
diff --git a/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py b/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py
index dab0fda1a89..6a22d3aba7f 100644
--- a/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py
+++ b/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py
@@ -22,9 +22,10 @@
 import argparse
 import os
 
+from python.orca.example.ray_on_spark.parameter_server import model
 import numpy as np
 import ray
-import model
+#import model
 
 from bigdl.orca import init_orca_context, stop_orca_context
 from bigdl.orca import OrcaContext
@@ -88,7 +89,7 @@ def compute_gradients(self, weights):
 if __name__ == "__main__":
     args = parser.parse_args()
     cluster_mode = args.cluster_mode
-    if cluster_mode == "yarn":
+    if cluster_mode.startswith("yarn"):
         sc = init_orca_context(cluster_mode=cluster_mode,
                                cores=args.executor_cores,
                                memory=args.executor_memory,
@@ -139,4 +140,3 @@ def compute_gradients(self, weights):
         i += 1
     ray_ctx.stop()
     stop_orca_context()
-
diff --git a/python/orca/example/ray_on_spark/rl_pong/rl_pong.py b/python/orca/example/ray_on_spark/rl_pong/rl_pong.py
index e1cfc0116ba..4acab920e45 100644
--- a/python/orca/example/ray_on_spark/rl_pong/rl_pong.py
+++ b/python/orca/example/ray_on_spark/rl_pong/rl_pong.py
@@ -210,7 +210,7 @@ def compute_gradient(self, model):
 
     args = parser.parse_args()
     cluster_mode = args.cluster_mode
-    if cluster_mode == "yarn":
+    if cluster_mode.startswith("yarn"):
         sc = init_orca_context(cluster_mode=cluster_mode,
                                cores=args.executor_cores,
                                memory=args.executor_memory,
@@ -282,4 +282,3 @@ def compute_gradient(self, model):
 
     ray_ctx.stop()
     stop_orca_context()
-
diff --git a/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py b/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py
index 5058e9e134f..3c73f8a356c 100644
--- a/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py
+++ b/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py
@@ -72,16 +72,27 @@
 if __name__ == "__main__":
     args = parser.parse_args()
     cluster_mode = args.cluster_mode
-    if cluster_mode == "yarn":
-        sc = init_orca_context(cluster_mode="yarn",
-                               cores=args.executor_cores,
-                               memory=args.executor_memory,
-                               init_ray_on_spark=True,
-                               driver_memory=args.driver_memory,
-                               driver_cores=args.driver_cores,
-                               num_executors=args.slave_num,
-                               extra_executor_memory_for_ray=args.extra_executor_memory_for_ray,
-                               object_store_memory=args.object_store_memory)
+    if cluster_mode.startswith("yarn"):
+        if cluster_mode == "yarn-client":
+            sc = init_orca_context(cluster_mode="yarn-client",
+                                cores=args.executor_cores,
+                                memory=args.executor_memory,
+                                init_ray_on_spark=True,
+                                driver_memory=args.driver_memory,
+                                driver_cores=args.driver_cores,
+                                num_executors=args.slave_num,
+                                extra_executor_memory_for_ray=args.extra_executor_memory_for_ray,
+                                object_store_memory=args.object_store_memory)
+        else:
+            sc = init_orca_context(cluster_mode="yarn-cluster",
+                                cores=args.executor_cores,
+                                memory=args.executor_memory,
+                                init_ray_on_spark=True,
+                                driver_memory=args.driver_memory,
+                                driver_cores=args.driver_cores,
+                                num_executors=args.slave_num,
+                                extra_executor_memory_for_ray=args.extra_executor_memory_for_ray,
+                                object_store_memory=args.object_store_memory)
         ray_ctx = OrcaContext.get_ray_context()
     elif cluster_mode == "local":
         sc = init_orca_context(cores=args.driver_cores)
@@ -159,4 +170,3 @@ def policy_mapping_fn(agent_id):
 
     ray_ctx.stop()
     stop_orca_context()
-
diff --git a/python/orca/example/tfpark/estimator/estimator_dataset.py b/python/orca/example/tfpark/estimator/estimator_dataset.py
index 294dafff5e4..68f277d7160 100644
--- a/python/orca/example/tfpark/estimator/estimator_dataset.py
+++ b/python/orca/example/tfpark/estimator/estimator_dataset.py
@@ -19,7 +19,15 @@
 from bigdl.dllib.nncontext import init_nncontext
 from bigdl.orca.tfpark import TFDataset, TFEstimator
 from bigdl.orca.tfpark import ZooOptimizer
+from bigdl.dllib.utils.common import *
 
+import os
+import argparse
+
+parser = argparse.ArgumentParser(description="Run the tfpark keras "
+                                             "dataset example.")
+parser.add_argument('--cluster_mode', type=str, default="local",
+                    help='The mode for the Spark cluster. local, yarn or spark-submit.')
 
 def get_data(dataset):
     from bigdl.dllib.feature.dataset import mnist
@@ -29,7 +37,22 @@ def get_data(dataset):
 
 
 def main():
-    sc = init_nncontext()
+    args = parser.parse_args()
+    cluster_mode = args.cluster_mode
+    if cluster_mode.startswith("yarn"):
+        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
+        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
+                "set the environment variable HADOOP_CONF_DIR"
+        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
+            .set("spark.executor.cores", 2) \
+            .set("spark.executor.instances", 2) \
+            .set("spark.driver.memory", "2g")
+        if cluster_mode == "yarn-client":
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf)
+        else:
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf)
+    else:
+        sc = init_nncontext()
 
     def model_fn(features, labels, mode):
         from nets import lenet
diff --git a/python/orca/example/tfpark/estimator/estimator_inception.py b/python/orca/example/tfpark/estimator/estimator_inception.py
index cbbbbc9b727..dec3dc61cf3 100644
--- a/python/orca/example/tfpark/estimator/estimator_inception.py
+++ b/python/orca/example/tfpark/estimator/estimator_inception.py
@@ -14,6 +14,8 @@
 # limitations under the License.
 #
 from optparse import OptionParser
+import sys
+import os
 
 import tensorflow as tf
 
@@ -23,11 +25,26 @@
 from bigdl.dllib.feature.image.imageset import *
 from bigdl.orca.tfpark import TFDataset, TFEstimator
 from bigdl.orca.tfpark import ZooOptimizer
+from bigdl.dllib.utils.common import *
 
 
 def main(option):
     batch_size = 16 if not option.batch_size else int(option.batch_size)
-    sc = init_nncontext()
+    cluster_mode = options.cluster_mode
+    if cluster_mode.startswith("yarn"):
+        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
+        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
+                "set the environment variable HADOOP_CONF_DIR"
+        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
+            .set("spark.executor.cores", 2) \
+            .set("spark.executor.instances", 2) \
+            .set("spark.driver.memory", "2g")
+        if cluster_mode == "yarn-client":
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf)
+        else:
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf)
+    else:
+        sc = init_nncontext()
 
     def input_fn(mode, params):
 
@@ -88,6 +105,8 @@ def model_fn(features, labels, mode, params):
     parser.add_option("--image-path", dest="image_path")
     parser.add_option("--num-classes", dest="num_classes")
     parser.add_option("--batch_size", dest="batch_size")
+    parser.add_option('--cluster_mode', type=str, default="local",
+                    help='The mode for the Spark cluster. local, yarn or spark-submit.')
 
     (options, args) = parser.parse_args(sys.argv)
     main(options)
diff --git a/python/orca/example/tfpark/gan/gan_train_and_evaluate.py b/python/orca/example/tfpark/gan/gan_train_and_evaluate.py
index 6e6a4a32cd8..9e8b842041b 100644
--- a/python/orca/example/tfpark/gan/gan_train_and_evaluate.py
+++ b/python/orca/example/tfpark/gan/gan_train_and_evaluate.py
@@ -19,6 +19,7 @@
 from bigdl.dllib.nncontext import init_nncontext
 from bigdl.orca.tfpark import TFDataset
 from bigdl.orca.tfpark import ZooOptimizer
+from bigdl.dllib.utils.common import *
 import numpy as np
 import matplotlib.pyplot as plt
 
@@ -26,9 +27,15 @@
 from tensorflow_gan.python.losses.losses_impl import *
 import tensorflow_datasets as tfds
 
+import os
+import argparse
+
 MODEL_DIR = "/tmp/gan_model"
 NOISE_DIM = 64
 
+parser = argparse.ArgumentParser()
+parser.add_argument('--cluster_mode', type=str, default="local",
+                    help='The mode for the Spark cluster. local, yarn or spark-submit.')
 
 def eval():
 
@@ -53,7 +60,23 @@ def eval():
 
 
 if __name__ == "__main__":
-    sc = init_nncontext()
+    conf = {}
+    args = parser.parse_args()
+    cluster_mode = args.cluster_mode
+    if cluster_mode.startswith("yarn"):
+        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
+        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
+                "set the environment variable HADOOP_CONF_DIR"
+        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
+            .set("spark.executor.cores", 2) \
+            .set("spark.executor.instances", 2) \
+            .set("spark.driver.memory", "2g")
+        if cluster_mode == "yarn-client":
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf)
+        else:
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf)
+    else:
+        sc = init_nncontext()
 
     def input_fn():
         def map_func(data):
@@ -67,7 +90,7 @@ def map_func(data):
 
         ds = tfds.load("mnist", split="train")
         ds = ds.map(map_func)
-        dataset = TFDataset.from_tf_data_dataset(ds, batch_size=36)
+        dataset = TFDataset.from_tf_data_dataset(ds, batch_size=56)
         return dataset
 
     opt = GANEstimator(
diff --git a/python/orca/example/tfpark/keras/keras_dataset.py b/python/orca/example/tfpark/keras/keras_dataset.py
index ae9f477621c..a268971b8ec 100644
--- a/python/orca/example/tfpark/keras/keras_dataset.py
+++ b/python/orca/example/tfpark/keras/keras_dataset.py
@@ -13,17 +13,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import sys
+import argparse
+import os
 
 import tensorflow as tf
 import numpy as np
 from bigdl.dllib.nncontext import init_nncontext
 from bigdl.orca.tfpark import KerasModel, TFDataset
+from bigdl.dllib.utils.common import *
 
+parser = argparse.ArgumentParser(description="Run the tfpark keras "
+                                             "dataset example.")
+parser.add_argument('--data_path', type=str, default='/tmp/mnist',
+                    help='training data path.')                                             
+parser.add_argument('--max_epoch', type=int, default=5,
+                    help='Set max_epoch for training, it should be integer.')
+parser.add_argument('--cluster_mode', type=str, default="local",
+                    help='The mode for the Spark cluster. local, yarn or spark-submit.')
 
 def get_data_rdd(dataset, sc):
+    data_path = args.data_path
     from bigdl.dllib.feature.dataset import mnist
-    (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset)
+    (images_data, labels_data) = mnist.read_data_sets(data_path, dataset)
     image_rdd = sc.parallelize(images_data)
     labels_rdd = sc.parallelize(labels_data)
     rdd = image_rdd.zip(labels_rdd) \
@@ -31,9 +42,25 @@ def get_data_rdd(dataset, sc):
                                 np.array(rec_tuple[1])))
     return rdd
 
-
 def main(max_epoch):
-    sc = init_nncontext()
+    args = parser.parse_args()
+    cluster_mode = args.cluster_mode
+    if cluster_mode.startswith("yarn"):
+        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
+        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
+                "set the environment variable HADOOP_CONF_DIR"
+        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
+            .set("spark.executor.cores", 2) \
+            .set("spark.executor.instances", 2) \
+            .set("spark.executorEnv.HTTP_PROXY", "http://child-prc.intel.com:913") \
+            .set("spark.executorEnv.HTTPS_PROXY", "http://child-prc.intel.com:913") \
+            .set("spark.driver.memory", "2g")
+        if cluster_mode == "yarn-client":
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf)
+        else:
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf)
+    else:
+        sc = init_nncontext()
 
     training_rdd = get_data_rdd("train", sc)
     testing_rdd = get_data_rdd("test", sc)
@@ -79,8 +106,7 @@ def main(max_epoch):
 
 if __name__ == '__main__':
 
-    max_epoch = 5
+    args = parser.parse_args()
+    max_epoch = args.max_epoch
 
-    if len(sys.argv) > 1:
-        max_epoch = int(sys.argv[1])
     main(max_epoch)
diff --git a/python/orca/example/tfpark/keras/keras_ndarray.py b/python/orca/example/tfpark/keras/keras_ndarray.py
index 9694f696095..8c5219ff1a7 100644
--- a/python/orca/example/tfpark/keras/keras_ndarray.py
+++ b/python/orca/example/tfpark/keras/keras_ndarray.py
@@ -13,16 +13,40 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import sys
+import argparse
+import os
 
 import tensorflow as tf
 from bigdl.dllib.nncontext import init_nncontext
 from bigdl.dllib.feature.dataset import mnist
 from bigdl.orca.tfpark import KerasModel
+from bigdl.dllib.utils.common import *
 
+parser = argparse.ArgumentParser(description="Run the tfpark keras "
+                                             "dataset example.")
+parser.add_argument('--max_epoch', type=int, default=5,
+                    help='Set max_epoch for training, it should be integer.')
+parser.add_argument('--cluster_mode', type=str, default="local",
+                    help='The mode for the Spark cluster. local, yarn or spark-submit.')
 
 def main(max_epoch):
-    _ = init_nncontext()
+    args = parser.parse_args()
+    cluster_mode = args.cluster_mode
+    if cluster_mode.startswith("yarn"):
+        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
+        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
+                "set the environment variable HADOOP_CONF_DIR"
+        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
+            .set("spark.executor.cores", 2) \
+            .set("spark.executor.instances", 2) \
+            .set("spark.driver.memory", "2g")
+        if cluster_mode == "yarn-client":
+            _ = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf)
+        else:
+            _ = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf)
+    else:
+        _ = init_nncontext()
+        
 
     (training_images_data, training_labels_data) = mnist.read_data_sets("/tmp/mnist", "train")
     (testing_images_data, testing_labels_data) = mnist.read_data_sets("/tmp/mnist", "test")
@@ -65,8 +89,7 @@ def main(max_epoch):
 
 if __name__ == '__main__':
 
-    max_epoch = 5
+    args = parser.parse_args()
+    max_epoch = args.max_epoch
 
-    if len(sys.argv) > 1:
-        max_epoch = int(sys.argv[1])
     main(max_epoch)
diff --git a/python/orca/example/tfpark/tf_optimizer/evaluate.py b/python/orca/example/tfpark/tf_optimizer/evaluate.py
index 570f3ea874f..365b00f877e 100644
--- a/python/orca/example/tfpark/tf_optimizer/evaluate.py
+++ b/python/orca/example/tfpark/tf_optimizer/evaluate.py
@@ -13,12 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from optparse import OptionParser
+import argparse
 import tensorflow as tf
 from bigdl.dllib.nncontext import init_nncontext
 from bigdl.orca.tfpark import TFDataset, TFPredictor
+from bigdl.dllib.utils.common import *
+
 import numpy as np
 import sys
+import os
 
 from bigdl.dllib.feature.dataset import mnist
 
@@ -27,11 +30,35 @@
 
 slim = tf.contrib.slim
 
-
-def main(options, data_num):
-
-    data_path = '/tmp/mnist' if not options.data_path else options.data_path
-    sc = init_nncontext()
+parser = argparse.ArgumentParser(description="Run the tfpark keras "
+                                             "dataset example.")
+parser.add_argument('--data_num', type=int, default=10000,
+                help='Set data_num for evaluation, it should be integer.') 
+parser.add_argument("--data_path", type=str, default='/tmp/mnist',
+                help='Assert the data_path for evaluation' )
+parser.add_argument('--cluster_mode', type=str, default="local",
+                help='The mode for the Spark cluster. local, yarn or spark-submit.')
+
+def main(data_num):
+
+    data_path = '/tmp/mnist' if not args.data_path else args.data_path
+    cluster_mode = args.cluster_mode
+    if cluster_mode.startswith("yarn"):
+        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
+        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
+                "set the environment variable HADOOP_CONF_DIR"
+        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
+            .set("spark.executor.cores", 2) \
+            .set("spark.executor.instances", 2) \
+            .set("spark.executorEnv.HTTP_PROXY", "http://child-prc.intel.com:913") \
+            .set("spark.executorEnv.HTTPS_PROXY", "http://child-prc.intel.com:913") \
+            .set("spark.driver.memory", "2g")
+        if cluster_mode == "yarn-client":
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf)
+        else:
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf)
+    else:
+        sc = init_nncontext()
 
     # get data, pre-process and create TFDataset
     (images_data, labels_data) = mnist.read_data_sets(data_path, "test")
@@ -65,13 +92,7 @@ def main(options, data_num):
 
 if __name__ == '__main__':
 
-    data_num = 10000
-
-    if len(sys.argv) > 1:
-        data_num = int(sys.argv[1])
-
-    parser = OptionParser()
-    parser.add_option("--data_path", dest="data_path")
-    (options, args) = parser.parse_args(sys.argv)
-
-    main(options, data_num)
+    args = parser.parse_args()
+    data_num = args.data_num
+    
+    main(data_num)
diff --git a/python/orca/example/tfpark/tf_optimizer/train.py b/python/orca/example/tfpark/tf_optimizer/train.py
index 1c8ed563e95..b067dfee43b 100644
--- a/python/orca/example/tfpark/tf_optimizer/train.py
+++ b/python/orca/example/tfpark/tf_optimizer/train.py
@@ -17,17 +17,29 @@
 from bigdl.dllib.nncontext import init_nncontext
 from bigdl.orca.tfpark import TFOptimizer, TFDataset
 from bigdl.dllib.optim.optimizer import *
+from bigdl.dllib.utils.common import *
 import numpy as np
-import sys
 
 from bigdl.dllib.feature.dataset import mnist
 from bigdl.dllib.feature.dataset.transformer import *
 
+import os
+import sys
+import argparse
+
 sys.path.append("/tmp/models/slim")  # add the slim library
 from nets import lenet
 
 slim = tf.contrib.slim
 
+parser = argparse.ArgumentParser(description="Run the tfpark keras "
+                                             "dataset example.")
+parser.add_argument('--max_epoch', type=int, default=5,
+                    help='Set max_epoch for training, it should be integer.')
+parser.add_argument('--data_num', type=int, default=60000,
+                    help='Set data_num for training, it should be integer.')                    
+parser.add_argument('--cluster_mode', type=str, default="local",
+                    help='The mode for the Spark cluster. local, yarn or spark-submit.')
 
 def accuracy(logits, labels):
     predictions = tf.argmax(logits, axis=1, output_type=labels.dtype)
@@ -36,7 +48,22 @@ def accuracy(logits, labels):
 
 
 def main(max_epoch, data_num):
-    sc = init_nncontext()
+    args = parser.parse_args()
+    cluster_mode = args.cluster_mode
+    if cluster_mode.startswith("yarn"):
+        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
+        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
+                "set the environment variable HADOOP_CONF_DIR"
+        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
+            .set("spark.executor.cores", 2) \
+            .set("spark.executor.instances", 2) \
+            .set("spark.driver.memory", "2g")
+        if cluster_mode == "yarn-client":
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf)
+        else:
+            sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf)
+    else:
+        sc = init_nncontext()
 
     # get data, pre-process and create TFDataset
     (train_images_data, train_labels_data) = mnist.read_data_sets("/tmp/mnist", "train")
@@ -73,10 +100,8 @@ def main(max_epoch, data_num):
 
 if __name__ == '__main__':
 
-    max_epoch = 5
-    data_num = 60000
+    args = parser.parse_args()
+    max_epoch = args.max_epoch
+    data_num = args.data_num
 
-    if len(sys.argv) > 1:
-        max_epoch = int(sys.argv[1])
-        data_num = int(sys.argv[2])
     main(max_epoch, data_num)

From 8f1ce0564fbd932a241f67b0b86fca7e98e9d37d Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 12:29:47 +0800
Subject: [PATCH 02/11] no horovod

---
 .../run-example-test-ray-integration.sh       | 104 +++++++++---------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index 41106e3c11e..b80b6111120 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -29,7 +29,7 @@ python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostClassifie
 now=$(date "+%s")
 time2=$((now-start))
 
-echo "#3 Start autoxgboost example"
+echo "#3 Start autoxgboost example"s
 if [ -f ${BIGDL_ROOT}/data/incd.csv ]
 then
     echo "incd.csv already exists"
@@ -49,7 +49,7 @@ echo "#4 start test for orca bigdl transformer"
 start=$(date "+%s")
 #run the example
 python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
-  --cluster_mode yarn-client
+  --cluster_mode yarn_client
 exit_status=$?
 if [ $exit_status -ne 0 ]; then
   clear_up
@@ -84,20 +84,20 @@ now=$(date "+%s")
 time=$((now - start))
 echo "#5 Total time cost ${time} seconds"
 
-echo "#6 start test for orca pytorch_estimator"
-#timer
-start=$(date "+%s")
-#run the example
-python ${BIGDL_ROOT}/python/orca/example/learn/horovod/pytorch_estimator.py --cluster_mode yarn-client
-exit_status=$?
-if [ $exit_status -ne 0 ]; then
-  clear_up
-  echo "orca pytorch_estimator failed"
-  exit $exit_status
-fi
-now=$(date "+%s")
-time=$((now - start))
-echo "#6 Total time cost ${time} seconds"
+# echo "#6 start test for orca pytorch_estimator"
+# #timer
+# start=$(date "+%s")
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/learn/horovod/pytorch_estimator.py --cluster_mode yarn-client
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca pytorch_estimator failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#6 Total time cost ${time} seconds"
 
 # echo "#7 start test for orca simple_pytorch"
 # #timer
@@ -138,44 +138,44 @@ echo "#6 Total time cost ${time} seconds"
 # time=$((now - start))
 # echo "#8 Total time cost ${time} seconds"
 
-echo "#prepare dataset for ray_on_spark"
-wget -nv $FTP_URI/analytics-zoo-data/mnist/train-labels-idx1-ubyte.gz
-wget -nv $FTP_URI/analytics-zoo-data/mnist/train-images-idx3-ubyte.gz
-wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-labels-idx1-ubyte.gz
-wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-images-idx3-ubyte.gz
-zip ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/MNIST_data.zip train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
+# echo "#prepare dataset for ray_on_spark"
+# wget -nv $FTP_URI/analytics-zoo-data/mnist/train-labels-idx1-ubyte.gz
+# wget -nv $FTP_URI/analytics-zoo-data/mnist/train-images-idx3-ubyte.gz
+# wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-labels-idx1-ubyte.gz
+# wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-images-idx3-ubyte.gz
+# zip ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/MNIST_data.zip train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
 
-echo "#9 start test for orca ros async"
-#timer
-start=$(date "+%s")
-#run the example
-python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py \
-  --iterations 20 --num_workers 2 --cluster_mode yarn
-exit_status=$?
-if [ $exit_status -ne 0 ]; then
-  clear_up
-  echo "orca ros async failed"
-  exit $exit_status
-fi
-now=$(date "+%s")
-time=$((now - start))
-echo "#9 Total time cost ${time} seconds"
+# echo "#9 start test for orca ros async"
+# #timer
+# start=$(date "+%s")
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py \
+#   --iterations 20 --num_workers 2 --cluster_mode yarn
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca ros async failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#9 Total time cost ${time} seconds"
 
-echo "#10 start test for orca ros sync"
-#timer
-start=$(date "+%s")
-#run the example
-python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py \
-  --iterations 20 --num_workers 2 --cluster_mode yarn
-exit_status=$?
-if [ $exit_status -ne 0 ]; then
-  clear_up
-  echo "orca ros sync failed"
-  exit $exit_status
-fi
-now=$(date "+%s")
-time=$((now - start))
-echo "#10 Total time cost ${time} seconds"
+# echo "#10 start test for orca ros sync"
+# #timer
+# start=$(date "+%s")
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py \
+#   --iterations 20 --num_workers 2 --cluster_mode yarn
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca ros sync failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#10 Total time cost ${time} seconds"
 
 echo "#11 start test for orca rllib"
 #timer

From 120c2103a1f9e748f42a87638b9a64cfed565aac Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 13:12:32 +0800
Subject: [PATCH 03/11] skip automl

---
 .../run-example-test-ray-integration.sh       | 58 +++++++++----------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index b80b6111120..0f809d76bbf 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -9,40 +9,40 @@ clear_up () {
 
 set -e
 
-echo "#start orca ray example tests"
-echo "#1 Start autoestimator example"
-start=$(date "+%s")
-python ${BIGDL_ROOT}/python/orca/example/automl/autoestimator/autoestimator_pytorch.py --trials 5 --epochs 2 --cluster_mode yarn
-now=$(date "+%s")
-time1=$((now-start))
+# echo "#start orca ray example tests"
+# echo "#1 Start autoestimator example"
+# start=$(date "+%s")
+# python ${BIGDL_ROOT}/python/orca/example/automl/autoestimator/autoestimator_pytorch.py --trials 5 --epochs 2 --cluster_mode yarn
+# now=$(date "+%s")
+# time1=$((now-start))
 
-echo "#2 Start autoxgboost example"
-if [ -f ${BIGDL_ROOT}/data/airline_14col.data ]
-then
-    echo "airline_14col.data already exists"
-else
-    wget -nv $FTP_URI/analytics-zoo-data/airline_14col.data -P ${BIGDL_ROOT}/data/
-fi
+# echo "#2 Start autoxgboost example"
+# if [ -f ${BIGDL_ROOT}/data/airline_14col.data ]
+# then
+#     echo "airline_14col.data already exists"
+# else
+#     wget -nv $FTP_URI/analytics-zoo-data/airline_14col.data -P ${BIGDL_ROOT}/data/
+# fi
 
-start=$(date "+%s")
-python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostClassifier.py -p ${BIGDL_ROOT}/data/airline_14col.data --cluster_mode yarn
-now=$(date "+%s")
-time2=$((now-start))
+# start=$(date "+%s")
+# python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostClassifier.py -p ${BIGDL_ROOT}/data/airline_14col.data --cluster_mode yarn
+# now=$(date "+%s")
+# time2=$((now-start))
 
-echo "#3 Start autoxgboost example"s
-if [ -f ${BIGDL_ROOT}/data/incd.csv ]
-then
-    echo "incd.csv already exists"
-else
-    wget -nv $FTP_URI/analytics-zoo-data/incd.csv -P ${BIGDL_ROOT}/data/
-fi
+# echo "#3 Start autoxgboost example"
+# if [ -f ${BIGDL_ROOT}/data/incd.csv ]
+# then
+#     echo "incd.csv already exists"
+# else
+#     wget -nv $FTP_URI/analytics-zoo-data/incd.csv -P ${BIGDL_ROOT}/data/
+# fi
 
-start=$(date "+%s")
-python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostRegressor.py -p ${BIGDL_ROOT}/data/incd.csv --cluster_mode yarn
-now=$(date "+%s")
-time3=$((now-start))
+# start=$(date "+%s")
+# python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostRegressor.py -p ${BIGDL_ROOT}/data/incd.csv --cluster_mode yarn
+# now=$(date "+%s")
+# time3=$((now-start))
 
-set -e
+# set -e
 
 echo "#4 start test for orca bigdl transformer"
 #timer

From a0709cc5c99dd195919bc71e1c1abc680dfcd22c Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 13:43:21 +0800
Subject: [PATCH 04/11] update

---
 .../run-example-test-ray-integration.sh       | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index 0f809d76bbf..0c0927c9179 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -44,21 +44,21 @@ set -e
 
 # set -e
 
-echo "#4 start test for orca bigdl transformer"
-#timer
-start=$(date "+%s")
-#run the example
-python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
-  --cluster_mode yarn_client
-exit_status=$?
-if [ $exit_status -ne 0 ]; then
-  clear_up
-  echo "orca transformer failed"
-  exit $exit_status
-fi
-now=$(date "+%s")
-time=$((now - start))
-echo "#4 Total time cost ${time} seconds"
+# echo "#4 start test for orca bigdl transformer"
+# #timer
+# start=$(date "+%s")
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
+#   --cluster_mode yarn_client
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca transformer failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#4 Total time cost ${time} seconds"
 
 
 echo "#5 start test for orca bigdl imageInference"

From 8d246b7d355f86100b5f538d7c70cfe0266df252 Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 13:46:37 +0800
Subject: [PATCH 05/11] update

---
 python/orca/dev/example/run-example-test-ray-integration.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index 0c0927c9179..8db128c1967 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -70,7 +70,7 @@ else
   wget -nv $FTP_URI/analytics-zoo-models/image-classification/bigdl_inception-v1_imagenet_0.4.0.model \
     -P models
 fi
-run the example
+#run the example
 python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/imageInference/imageInference.py \
   -m models/bigdl_inception-v1_imagenet_0.4.0.model \
   -f ${HDFS_URI}/kaggle/train_100 --cluster_mode yarn-client

From 5e65e24d63b2389444db93c85b3a476ff179ddb0 Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 13:52:08 +0800
Subject: [PATCH 06/11] hot fix imageInference

---
 .../orca/example/learn/bigdl/imageInference/imageInference.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/orca/example/learn/bigdl/imageInference/imageInference.py b/python/orca/example/learn/bigdl/imageInference/imageInference.py
index 4f4d747ee60..7714814afba 100644
--- a/python/orca/example/learn/bigdl/imageInference/imageInference.py
+++ b/python/orca/example/learn/bigdl/imageInference/imageInference.py
@@ -54,7 +54,7 @@ def inference(image_path, model_path, batch_size, sc):
                       help="training data path.")
     parser.add_option("--b", "--batch_size", type=int, dest="batch_size", default="56",
                       help="The number of samples per gradient update. Default is 56.")
-    parser.add_option('--cluster_mode', type=str, dest="clusterMode", default="local",
+    parser.add_option('--cluster_mode', type=str, dest="cluster_mode", default="local",
                       help='The mode for the Spark cluster. local, yarn or spark-submit.')
 
     (options, args) = parser.parse_args(sys.argv)

From 76b7cbae71950e0ff9c21d80d9728d18f1aa46ea Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 15:02:45 +0800
Subject: [PATCH 07/11] new test

---
 .../run-example-test-ray-integration.sh       | 32 +++++++++----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index 8db128c1967..715b625e43e 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -42,23 +42,21 @@ set -e
 # now=$(date "+%s")
 # time3=$((now-start))
 
-# set -e
-
-# echo "#4 start test for orca bigdl transformer"
-# #timer
-# start=$(date "+%s")
-# #run the example
-# python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
-#   --cluster_mode yarn_client
-# exit_status=$?
-# if [ $exit_status -ne 0 ]; then
-#   clear_up
-#   echo "orca transformer failed"
-#   exit $exit_status
-# fi
-# now=$(date "+%s")
-# time=$((now - start))
-# echo "#4 Total time cost ${time} seconds"
+echo "#4 start test for orca bigdl transformer"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
+  --cluster_mode yarn_client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca transformer failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#4 Total time cost ${time} seconds"
 
 
 echo "#5 start test for orca bigdl imageInference"

From 0e4f808a1e3b3f015b94adef5bc8fb17df59bab6 Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 17:27:32 +0800
Subject: [PATCH 08/11] ros test

---
 .../run-example-test-ray-integration.sh       | 74 +++++++++----------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index 715b625e43e..6bdfa49c1ae 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -42,45 +42,45 @@ set -e
 # now=$(date "+%s")
 # time3=$((now-start))
 
-echo "#4 start test for orca bigdl transformer"
-#timer
-start=$(date "+%s")
-#run the example
-python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
-  --cluster_mode yarn_client
-exit_status=$?
-if [ $exit_status -ne 0 ]; then
-  clear_up
-  echo "orca transformer failed"
-  exit $exit_status
-fi
-now=$(date "+%s")
-time=$((now - start))
-echo "#4 Total time cost ${time} seconds"
+# echo "#4 start test for orca bigdl transformer"
+# #timer
+# start=$(date "+%s")
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
+#   --cluster_mode yarn_client
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca transformer failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#4 Total time cost ${time} seconds"
 
 
-echo "#5 start test for orca bigdl imageInference"
-#timer
-start=$(date "+%s")
-if [ -f models/bigdl_inception-v1_imagenet_0.4.0.model ]; then
-  echo "analytics-zoo-models/bigdl_inception-v1_imagenet_0.4.0.model already exists."
-else
-  wget -nv $FTP_URI/analytics-zoo-models/image-classification/bigdl_inception-v1_imagenet_0.4.0.model \
-    -P models
-fi
-#run the example
-python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/imageInference/imageInference.py \
-  -m models/bigdl_inception-v1_imagenet_0.4.0.model \
-  -f ${HDFS_URI}/kaggle/train_100 --cluster_mode yarn-client
-exit_status=$?
-if [ $exit_status -ne 0 ]; then
-  clear_up
-  echo "orca imageInference failed"
-  exit $exit_status
-fi
-now=$(date "+%s")
-time=$((now - start))
-echo "#5 Total time cost ${time} seconds"
+# echo "#5 start test for orca bigdl imageInference"
+# #timer
+# start=$(date "+%s")
+# if [ -f models/bigdl_inception-v1_imagenet_0.4.0.model ]; then
+#   echo "analytics-zoo-models/bigdl_inception-v1_imagenet_0.4.0.model already exists."
+# else
+#   wget -nv $FTP_URI/analytics-zoo-models/image-classification/bigdl_inception-v1_imagenet_0.4.0.model \
+#     -P models
+# fi
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/imageInference/imageInference.py \
+#   -m models/bigdl_inception-v1_imagenet_0.4.0.model \
+#   -f ${HDFS_URI}/kaggle/train_100 --cluster_mode yarn-client
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca imageInference failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#5 Total time cost ${time} seconds"
 
 # echo "#6 start test for orca pytorch_estimator"
 # #timer

From 6889b1b4bc1df1636fc35e9b7d99088e772ccf5b Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 17:41:34 +0800
Subject: [PATCH 09/11] update

---
 .../run-example-test-ray-integration.sh       | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index 6bdfa49c1ae..3a5724ec0d5 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -175,22 +175,22 @@ set -e
 # time=$((now - start))
 # echo "#10 Total time cost ${time} seconds"
 
-echo "#11 start test for orca rllib"
-#timer
-start=$(date "+%s")
-#run the example
-python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py \
-  --iterations 5 \
-  --cluster_mode yarn-client
-exit_status=$?
-if [ $exit_status -ne 0 ]; then
-  clear_up
-  echo "orca ros rllib failed"
-  exit $exit_status
-fi
-now=$(date "+%s")
-time=$((now - start))
-echo "#11 Total time cost ${time} seconds"
+# echo "#11 start test for orca rllib"
+# #timer
+# start=$(date "+%s")
+# #run the example
+# python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py \
+#   --iterations 5 \
+#   --cluster_mode yarn-client
+# exit_status=$?
+# if [ $exit_status -ne 0 ]; then
+#   clear_up
+#   echo "orca ros rllib failed"
+#   exit $exit_status
+# fi
+# now=$(date "+%s")
+# time=$((now - start))
+# echo "#11 Total time cost ${time} seconds"
 
 echo "#12 start test for orca rl_pong"
 #timer

From 1005238fa583328fec60274bc08a5fbdc6f72c2d Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 18:27:33 +0800
Subject: [PATCH 10/11] update

---
 .../run-example-test-ray-integration.sh       | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index 3a5724ec0d5..6bdfa49c1ae 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -175,22 +175,22 @@ set -e
 # time=$((now - start))
 # echo "#10 Total time cost ${time} seconds"
 
-# echo "#11 start test for orca rllib"
-# #timer
-# start=$(date "+%s")
-# #run the example
-# python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py \
-#   --iterations 5 \
-#   --cluster_mode yarn-client
-# exit_status=$?
-# if [ $exit_status -ne 0 ]; then
-#   clear_up
-#   echo "orca ros rllib failed"
-#   exit $exit_status
-# fi
-# now=$(date "+%s")
-# time=$((now - start))
-# echo "#11 Total time cost ${time} seconds"
+echo "#11 start test for orca rllib"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/rllib/multiagent_two_trainers.py \
+  --iterations 5 \
+  --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca ros rllib failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#11 Total time cost ${time} seconds"
 
 echo "#12 start test for orca rl_pong"
 #timer

From 6b73caa9fd03e0b93791480dda31d2880915aa42 Mon Sep 17 00:00:00 2001
From: sgwhat <ge.song@intel.com>
Date: Tue, 23 Nov 2021 18:41:44 +0800
Subject: [PATCH 11/11] update

---
 .../run-example-test-ray-integration.sh       | 231 +++++++++---------
 1 file changed, 115 insertions(+), 116 deletions(-)

diff --git a/python/orca/dev/example/run-example-test-ray-integration.sh b/python/orca/dev/example/run-example-test-ray-integration.sh
index 6bdfa49c1ae..9645e77591e 100644
--- a/python/orca/dev/example/run-example-test-ray-integration.sh
+++ b/python/orca/dev/example/run-example-test-ray-integration.sh
@@ -9,93 +9,92 @@ clear_up () {
 
 set -e
 
-# echo "#start orca ray example tests"
-# echo "#1 Start autoestimator example"
-# start=$(date "+%s")
-# python ${BIGDL_ROOT}/python/orca/example/automl/autoestimator/autoestimator_pytorch.py --trials 5 --epochs 2 --cluster_mode yarn
-# now=$(date "+%s")
-# time1=$((now-start))
-
-# echo "#2 Start autoxgboost example"
-# if [ -f ${BIGDL_ROOT}/data/airline_14col.data ]
-# then
-#     echo "airline_14col.data already exists"
-# else
-#     wget -nv $FTP_URI/analytics-zoo-data/airline_14col.data -P ${BIGDL_ROOT}/data/
-# fi
+echo "#start orca ray example tests"
+echo "#1 Start autoestimator example"
+start=$(date "+%s")
+python ${BIGDL_ROOT}/python/orca/example/automl/autoestimator/autoestimator_pytorch.py --trials 5 --epochs 2 --cluster_mode yarn
+now=$(date "+%s")
+time1=$((now-start))
 
-# start=$(date "+%s")
-# python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostClassifier.py -p ${BIGDL_ROOT}/data/airline_14col.data --cluster_mode yarn
-# now=$(date "+%s")
-# time2=$((now-start))
+echo "#2 Start autoxgboost example"
+if [ -f ${BIGDL_ROOT}/data/airline_14col.data ]
+then
+    echo "airline_14col.data already exists"
+else
+    wget -nv $FTP_URI/analytics-zoo-data/airline_14col.data -P ${BIGDL_ROOT}/data/
+fi
 
-# echo "#3 Start autoxgboost example"
-# if [ -f ${BIGDL_ROOT}/data/incd.csv ]
-# then
-#     echo "incd.csv already exists"
-# else
-#     wget -nv $FTP_URI/analytics-zoo-data/incd.csv -P ${BIGDL_ROOT}/data/
-# fi
+start=$(date "+%s")
+python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostClassifier.py -p ${BIGDL_ROOT}/data/airline_14col.data --cluster_mode yarn
+now=$(date "+%s")
+time2=$((now-start))
 
-# start=$(date "+%s")
-# python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostRegressor.py -p ${BIGDL_ROOT}/data/incd.csv --cluster_mode yarn
-# now=$(date "+%s")
-# time3=$((now-start))
+echo "#3 Start autoxgboost example"
+if [ -f ${BIGDL_ROOT}/data/incd.csv ]
+then
+    echo "incd.csv already exists"
+else
+    wget -nv $FTP_URI/analytics-zoo-data/incd.csv -P ${BIGDL_ROOT}/data/
+fi
 
-# echo "#4 start test for orca bigdl transformer"
-# #timer
-# start=$(date "+%s")
-# #run the example
-# python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
-#   --cluster_mode yarn_client
-# exit_status=$?
-# if [ $exit_status -ne 0 ]; then
-#   clear_up
-#   echo "orca transformer failed"
-#   exit $exit_status
-# fi
-# now=$(date "+%s")
-# time=$((now - start))
-# echo "#4 Total time cost ${time} seconds"
+start=$(date "+%s")
+python ${BIGDL_ROOT}/python/orca/example/automl/autoxgboost/AutoXGBoostRegressor.py -p ${BIGDL_ROOT}/data/incd.csv --cluster_mode yarn
+now=$(date "+%s")
+time3=$((now-start))
 
+echo "#4 start test for orca bigdl transformer"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/attention/transformer.py \
+  --cluster_mode yarn_client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca transformer failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#4 Total time cost ${time} seconds"
 
-# echo "#5 start test for orca bigdl imageInference"
-# #timer
-# start=$(date "+%s")
-# if [ -f models/bigdl_inception-v1_imagenet_0.4.0.model ]; then
-#   echo "analytics-zoo-models/bigdl_inception-v1_imagenet_0.4.0.model already exists."
-# else
-#   wget -nv $FTP_URI/analytics-zoo-models/image-classification/bigdl_inception-v1_imagenet_0.4.0.model \
-#     -P models
-# fi
-# #run the example
-# python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/imageInference/imageInference.py \
-#   -m models/bigdl_inception-v1_imagenet_0.4.0.model \
-#   -f ${HDFS_URI}/kaggle/train_100 --cluster_mode yarn-client
-# exit_status=$?
-# if [ $exit_status -ne 0 ]; then
-#   clear_up
-#   echo "orca imageInference failed"
-#   exit $exit_status
-# fi
-# now=$(date "+%s")
-# time=$((now - start))
-# echo "#5 Total time cost ${time} seconds"
+echo "#5 start test for orca bigdl imageInference"
+#timer
+start=$(date "+%s")
+if [ -f models/bigdl_inception-v1_imagenet_0.4.0.model ]; then
+  echo "analytics-zoo-models/bigdl_inception-v1_imagenet_0.4.0.model already exists."
+else
+  wget -nv $FTP_URI/analytics-zoo-models/image-classification/bigdl_inception-v1_imagenet_0.4.0.model \
+    -P models
+fi
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/learn/bigdl/imageInference/imageInference.py \
+  -m models/bigdl_inception-v1_imagenet_0.4.0.model \
+  -f ${HDFS_URI}/kaggle/train_100 --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca imageInference failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#5 Total time cost ${time} seconds"
 
-# echo "#6 start test for orca pytorch_estimator"
-# #timer
-# start=$(date "+%s")
-# #run the example
-# python ${BIGDL_ROOT}/python/orca/example/learn/horovod/pytorch_estimator.py --cluster_mode yarn-client
-# exit_status=$?
-# if [ $exit_status -ne 0 ]; then
-#   clear_up
-#   echo "orca pytorch_estimator failed"
-#   exit $exit_status
-# fi
-# now=$(date "+%s")
-# time=$((now - start))
-# echo "#6 Total time cost ${time} seconds"
+echo "#6 start test for orca pytorch_estimator"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/learn/horovod/pytorch_estimator.py --cluster_mode yarn-client
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca pytorch_estimator failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#6 Total time cost ${time} seconds"
 
 # echo "#7 start test for orca simple_pytorch"
 # #timer
@@ -136,44 +135,44 @@ set -e
 # time=$((now - start))
 # echo "#8 Total time cost ${time} seconds"
 
-# echo "#prepare dataset for ray_on_spark"
-# wget -nv $FTP_URI/analytics-zoo-data/mnist/train-labels-idx1-ubyte.gz
-# wget -nv $FTP_URI/analytics-zoo-data/mnist/train-images-idx3-ubyte.gz
-# wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-labels-idx1-ubyte.gz
-# wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-images-idx3-ubyte.gz
-# zip ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/MNIST_data.zip train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
+echo "#prepare dataset for ray_on_spark"
+wget -nv $FTP_URI/analytics-zoo-data/mnist/train-labels-idx1-ubyte.gz
+wget -nv $FTP_URI/analytics-zoo-data/mnist/train-images-idx3-ubyte.gz
+wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-labels-idx1-ubyte.gz
+wget -nv $FTP_URI/analytics-zoo-data/mnist/t10k-images-idx3-ubyte.gz
+zip ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/MNIST_data.zip train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
 
-# echo "#9 start test for orca ros async"
-# #timer
-# start=$(date "+%s")
-# #run the example
-# python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py \
-#   --iterations 20 --num_workers 2 --cluster_mode yarn
-# exit_status=$?
-# if [ $exit_status -ne 0 ]; then
-#   clear_up
-#   echo "orca ros async failed"
-#   exit $exit_status
-# fi
-# now=$(date "+%s")
-# time=$((now - start))
-# echo "#9 Total time cost ${time} seconds"
+echo "#9 start test for orca ros async"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/async_parameter_server.py \
+  --iterations 20 --num_workers 2 --cluster_mode yarn
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca ros async failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#9 Total time cost ${time} seconds"
 
-# echo "#10 start test for orca ros sync"
-# #timer
-# start=$(date "+%s")
-# #run the example
-# python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py \
-#   --iterations 20 --num_workers 2 --cluster_mode yarn
-# exit_status=$?
-# if [ $exit_status -ne 0 ]; then
-#   clear_up
-#   echo "orca ros sync failed"
-#   exit $exit_status
-# fi
-# now=$(date "+%s")
-# time=$((now - start))
-# echo "#10 Total time cost ${time} seconds"
+echo "#10 start test for orca ros sync"
+#timer
+start=$(date "+%s")
+#run the example
+python ${BIGDL_ROOT}/python/orca/example/ray_on_spark/parameter_server/sync_parameter_server.py \
+  --iterations 20 --num_workers 2 --cluster_mode yarn
+exit_status=$?
+if [ $exit_status -ne 0 ]; then
+  clear_up
+  echo "orca ros sync failed"
+  exit $exit_status
+fi
+now=$(date "+%s")
+time=$((now - start))
+echo "#10 Total time cost ${time} seconds"
 
 echo "#11 start test for orca rllib"
 #timer