diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/README.md b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/README.md index eeb0a1d47a7e2..364d711d072a0 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/README.md +++ b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/README.md @@ -59,6 +59,20 @@ Examples: > bash infer_with_ipu.sh +Example: + +``` +(py37_paddle-ipu) [docker-λ>] leiw@gbnwx-pod006-3-in_docker_dev:~/Paddle/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist$ bash infer_with_ipu.sh +[09/18 07:14:40] mnist:infer INFO: Reading data ... +[09/18 07:14:40] mnist:infer INFO: Complete reading image infer_3.png +[09/18 07:14:40] mnist:infer INFO: Constructing the computation graph ... +[09/18 07:15:12] mnist:infer INFO: Computation graph built. +[09/18 07:15:12] mnist:infer INFO: Change batch size of var %s from %d to %d +[09/18 07:15:12] mnist:infer INFO: Drawing IR graph ... +[09/18 07:15:12] mnist:infer INFO: Complete drawing. +digit hand write number picture is recognized as : 3 +``` + ## Inference on IPU with Analysis API (c++) backend We will add this example in the future \ No newline at end of file diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/README_cn.md b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/README_cn.md index f0ba73ca85c4a..9e065402a8f95 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/README_cn.md +++ b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/README_cn.md @@ -62,6 +62,21 @@ MNSIT 数据集足够小,可以在1个IPU上运行。对于精度对齐市足 > bash infer_with_ipu.sh +示例: + +``` +(py37_paddle-ipu) [docker-λ>] leiw@gbnwx-pod006-3-in_docker_dev:~/Paddle/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist$ bash infer_with_ipu.sh +[09/18 07:14:40] mnist:infer INFO: Reading data ... +[09/18 07:14:40] mnist:infer INFO: Complete reading image infer_3.png +[09/18 07:14:40] mnist:infer INFO: Constructing the computation graph ... +[09/18 07:15:12] mnist:infer INFO: Computation graph built. +[09/18 07:15:12] mnist:infer INFO: Change batch size of var %s from %d to %d +[09/18 07:15:12] mnist:infer INFO: Drawing IR graph ... +[09/18 07:15:12] mnist:infer INFO: Complete drawing. +digit hand write number picture is recognized as : 3 +``` +``` + ## 通过 Analysis API (c++) 在 `IPU` 上做推理 我们将加入相关示例 diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/infer.py b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/infer.py index 4be04625e8c66..5a88e00c37d00 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/infer.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/infer.py @@ -59,6 +59,12 @@ def parse_args(): type=bool, default=False, help="Whether to use IPU or not.") + parser.add_argument( + "--ues_ipu_model", + type=bool, + default=True, + help="use model trained on IPU devices" + ) parser.add_argument( "--num_ipus", type=int, @@ -75,8 +81,6 @@ def parse_args(): action="store_false", help="draw IR graph for debug" ) - parser.add_argument( - "--num_epochs", type=int, default=5, help="number of epochs.") parser.add_argument( "--save_dir", type=str, @@ -101,15 +105,47 @@ def apply_pseudo_batch_size_pass(prog, batch_size, var_name): # transform feed var batch_size to 1 global_block = prog.global_block() if var_name in global_block.vars: - feed_var = global_block.vars[var_name] # Call API + feed_var = global_block.vars[var_name] # Call Python Block API # modify attrs # TODO(yiakwy) : hard coded - feed_var.desc.set_shape([1,1,28,28]) - logger.info("Change batch size of var %s from %d to %d") + old_shape = feed_var.desc.shape() + feed_var.desc.set_shape([batch_size,1,28,28]) + logger.info("Change batch size of var %s from %d to %d" % (var_name, old_shape[0], batch_size)) return raise ValueError("Cannot find variable %s in the program description" % var_name) +def read_batch_size(prog, var_name): + global_block = prog.global_block() + if var_name in global_block.vars: + feed_var = global_block.vars[var_name] # Call Python Block API + old_shape = feed_var.desc.shape() + return old_shape[0] + + raise ValueError("Cannot find variable %s in the program description" % var_name) + +def apply_pseudo_rm_op_by_type_pass(prog, op_type): + global_block = prog.global_block() + # TODO(yiakwy) : with block python frontend API, we could + for i, op in enumerate(global_block.ops): + op.desc.set_is_target(False) + if op.type == op_type: + global_block._remove_op(i) + logger.info("Remove operator %d of type %s" % (i, op_type)) + # return + + # raise ValueError("Cannot find operator with type %s in the program description" % op_type) + +def apply_pseudo_rm_vars_pass(prog, var_name): + global_block = prog.global_block() + if var_name in global_block.vars: + global_block._remove_var(var_name) + prog.desc.flush() + logger.info("Remove var %s" % var_name) + return + + raise ValueError("Cannot find var %s in the program description" % var_name) + def load_image(file): im = Image.open(file).convert('L') im = im.resize((28, 28), Image.ANTIALIAS) @@ -144,12 +180,12 @@ def main(): # Reading images logger.info("Reading data ...") pwd = os.path.dirname(os.path.realpath(__file__)) - img = load_image(os.path.join(pwd,FLAGS.img)) + img = load_image(os.path.join(pwd,FLAGS.img)) logger.info("Complete reading image %s" % FLAGS.img) save_dir = FLAGS.save_dir num_ipus = FLAGS.num_ipus - enable_pipelining = not FLAGS.no_pipelining + enable_pipelining = FLAGS.no_pipelining will_draw_ir_graph = FLAGS.draw_ir_graph # add model @@ -162,7 +198,7 @@ def main(): logger.info("Constructing the computation graph ...") [infer_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(save_dir, infer_exc, - model_filename="recognize_digits_%s_test.pdmodel" % DEVICE_SUFFIX, params_filename="recognize_digits_%s.pdiparams" % DEVICE_SUFFIX) + model_filename="recognize_digits_%s.pdmodel" % DEVICE_SUFFIX, params_filename="recognize_digits_%s.pdiparams" % DEVICE_SUFFIX) logger.info("Computation graph built.") @@ -170,14 +206,24 @@ def main(): # TODO(yiakwy) : for the moment, we store our model trained on IPU as static graph # which means that the batch size is fixed. # - # We will apply passes to trains from batch size to `None` or `-1` upon the generated graph description later - apply_pseudo_batch_size_pass(infer_program, 1, feed_target_names[0]) - else: - pass + # We will apply passes to transform batch size from a static number to `None` or `-1` or another number upon the generated graph description # apply_pseudo_batch_size_pass(infer_program, 1, feed_target_names[0]) + + # TODO(yiakwy) : workaround + batch_size = read_batch_size(infer_program, feed_target_names[0]) + img_with_64 = np.tile(img, (batch_size, 1, 1, 1)) + img = img_with_64 + apply_pseudo_rm_op_by_type_pass(infer_program, "feed") + apply_pseudo_rm_op_by_type_pass(infer_program, "fetch") + apply_pseudo_rm_vars_pass(infer_program, "feed") + apply_pseudo_rm_vars_pass(infer_program, "fetch") + else: + if FLAGS.ues_ipu_model: + apply_pseudo_batch_size_pass(infer_program, 1, feed_target_names[0]) - if FLAGS.use_ipu: + + if FLAGS.use_ipu:#False: # Pipeline with tensorflow frontend: https://docs.graphcore.ai/projects/tensorflow1-user-guide/en/latest/perf_training.html#pipelined-training ipu_strategy = compiler.get_ipu_strategy() ipu_strategy.is_training = False @@ -192,7 +238,7 @@ def main(): logger.info("Compiling graph on IPU devices ...") feed_list = feed_target_names fetch_list = [ out.name for out in fetch_targets] - infer_program = ipu_compiler.compile(feed_list, fetch_list, infer=True) + infer_program = ipu_compiler.compile(feed_list, fetch_list) logger.info("Complete compiling.") else: if will_draw_ir_graph: diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/model.py b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/model.py index a4d65c86361a1..6b81751e526c8 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/model.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/model.py @@ -102,4 +102,4 @@ def outputs(self): if not self.is_built: raise ValueError("The model is not built!") return self._outputs - \ No newline at end of file + diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/train.py b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/train.py index a29843e61ca5a..de8b1934ce9d2 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/train.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_dataset/mnist/train.py @@ -225,6 +225,7 @@ def train(epochs, exec, model, feeder, save_dir, metrics = exec.run(train_program, feed={inp.name : data[i] for i, inp in enumerate(model.inputs)}, fetch_list=model.outputs[1]) + if batch_id % 100 == 0: if validation_loss > 0: print("Epoch %d, batch %d, Cost %f, Validation Cost %f" % ( @@ -238,15 +239,19 @@ def train(epochs, exec, model, feeder, save_dir, step += 1 if save_dir is not None: - if True:#not model.cfg.get("use_ipu", False): - # TODO(yiak) : does not work in IPU + if not model.cfg.get("use_ipu", False): paddle.static.save_inference_model( save_dir+"recognize_digits_%s" % model.cfg.get("device_suffix", "cpu"), model.inputs[0], model.outputs[0], exec, program=train_program ) else: - paddle.static.save(train_program, save_dir+"recognize_digits_%s_test" % model.cfg.get("device_suffix", "ipu")) + paddle.static.save_inference_model( + save_dir+"recognize_digits_%s" % model.cfg.get("device_suffix", "cpu"), + model.inputs[0], model.outputs[0], + exec, program=train_program.org_program + ) + # paddle.static.save(train_program.org_program, save_dir+"recognize_digits_%s_test" % model.cfg.get("device_suffix", "ipu")) # find the best pass best = sorted(report,key=lambda record: float(record[1]))[0] @@ -321,6 +326,12 @@ def main(): cfg["use_ipu"] = FLAGS.use_ipu cfg["device_suffix"] = device_suffix + # create config + cfg = {} + cfg["batch_size"] = BATCH_SIZE + cfg["use_ipu"] = FLAGS.use_ipu + cfg["device_suffix"] = device_suffix + # create model mnist = MNIST(cfg) @@ -376,4 +387,4 @@ def main(): return 0 if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file + sys.exit(main())