In [8]:
import numpy as np
import mindspore as ms
import mindspore_rl as ms_rl
import matplotlib
import gym

In [9]:
print("numpy version:", np.__version__)
print("mindspore version:", ms.__version__)
print("mindspore_rl version:", ms_rl.__version__)
print("matplotlib version:", matplotlib.__version__)
print("gym version:", gym.__version__)

numpy version: 1.22.3
mindspore version: 2.0.0a0
mindspore_rl version: 0.6.0
matplotlib version: 3.5.0
gym version: 0.21.0


In [10]:
import argparse
from mindspore_rl.algorithm.dqn import config
from mindspore_rl.algorithm.dqn.dqn_session import DQNSession
from mindspore_rl.algorithm.dqn.dqn_trainer import DQNTrainer
from mindspore import context
from mindspore import dtype as mstype

In [15]:
parser = argparse.ArgumentParser(description='MindSpore Reinforcement DQN')
parser.add_argument('--episode', type=int, default=650, help='total episode numbers.')
parser.add_argument('--device_target', type=str, default='CPU', choices=['Ascend', 'CPU', 'GPU', 'Auto'],
                    help='Choose a device to run the dqn example(Default: Auto).')
parser.add_argument('--precision_mode', type=str, default='fp32', choices=['fp32', 'fp16'],
                    help='Precision mode')
parser.add_argument('--env_yaml', type=str, default='./env_yaml/CartPole-v0.yaml',
                    help='Choose an environment yaml to update the dqn example(Default: CartPole-v0.yaml).')
parser.add_argument('--algo_yaml', type=str, default=None,
                    help='Choose an algo yaml to update the dqn example(Default: None).')
options, _ = parser.parse_known_args()

In [16]:
def train(episode=options.episode):
    """start to train dqn algorithm"""
    if options.device_target != 'Auto':
        context.set_context(device_target=options.device_target)
    if context.get_context('device_target') in ['CPU']:
        context.set_context(enable_graph_kernel=True)
    context.set_context(mode=context.GRAPH_MODE)
    compute_type = mstype.float32 if options.precision_mode == 'fp32' else mstype.float16
    config.algorithm_config['policy_and_network']['params']['compute_type'] = compute_type
    if compute_type == mstype.float16 and options.device_target != 'Ascend':
        raise ValueError("Fp16 mode is supported by Ascend backend.")
    dqn_session = DQNSession(options.env_yaml, options.algo_yaml)
    dqn_session.run(class_type=DQNTrainer, episode=episode)

In [17]:
train()

Episode 0 has 71.0 steps, cost time: 556.804 ms, per step time: 7.842 ms
Episode 0: loss is 0.181, rewards is 71.0
Episode 1 has 22.0 steps, cost time: 14.600 ms, per step time: 0.664 ms
Episode 1: loss is 0.131, rewards is 22.0
Episode 2 has 10.0 steps, cost time: 7.787 ms, per step time: 0.779 ms
Episode 2: loss is 0.11, rewards is 10.0
Episode 3 has 15.0 steps, cost time: 10.707 ms, per step time: 0.714 ms
Episode 3: loss is 0.047, rewards is 15.0
Episode 4 has 10.0 steps, cost time: 7.397 ms, per step time: 0.740 ms
Episode 4: loss is 0.079, rewards is 10.0
Episode 5 has 11.0 steps, cost time: 8.760 ms, per step time: 0.796 ms
Episode 5: loss is 0.074, rewards is 11.0
Episode 6 has 11.0 steps, cost time: 7.787 ms, per step time: 0.708 ms
Episode 6: loss is 0.043, rewards is 11.0
Episode 7 has 8.0 steps, cost time: 6.813 ms, per step time: 0.852 ms
Episode 7: loss is 0.049, rewards is 8.0
Episode 8 has 12.0 steps, cost time: 7.787 ms, per step time: 0.649 ms
Episode 8: loss is 0.102

Episode 65 has 107.0 steps, cost time: 62.293 ms, per step time: 0.582 ms
Episode 65: loss is 0.605, rewards is 107.0
Episode 66 has 89.0 steps, cost time: 52.560 ms, per step time: 0.591 ms
Episode 66: loss is 0.581, rewards is 89.0
Episode 67 has 95.0 steps, cost time: 55.569 ms, per step time: 0.585 ms
Episode 67: loss is 0.133, rewards is 95.0
Episode 68 has 200.0 steps, cost time: 116.023 ms, per step time: 0.580 ms
Episode 68: loss is 0.94, rewards is 200.0
Episode 69 has 200.0 steps, cost time: 117.346 ms, per step time: 0.587 ms
Episode 69: loss is 3.467, rewards is 200.0
Episode 70 has 132.0 steps, cost time: 77.989 ms, per step time: 0.591 ms
Episode 70: loss is 1.88, rewards is 132.0
-----------------------------------------
Evaluate for episode 70 total rewards is 197.300
-----------------------------------------
Episode 71 has 200.0 steps, cost time: 121.665 ms, per step time: 0.608 ms
Episode 71: loss is 0.415, rewards is 200.0
Episode 72 has 177.0 steps, cost time: 104.1

Episode 127 has 200.0 steps, cost time: 127.196 ms, per step time: 0.636 ms
Episode 127: loss is 0.854, rewards is 200.0
Episode 128 has 200.0 steps, cost time: 121.667 ms, per step time: 0.608 ms
Episode 128: loss is 23.349, rewards is 200.0
Episode 129 has 188.0 steps, cost time: 121.289 ms, per step time: 0.645 ms
Episode 129: loss is 3.035, rewards is 188.0
Episode 130 has 192.0 steps, cost time: 115.903 ms, per step time: 0.604 ms
Episode 130: loss is 28.642, rewards is 192.0
-----------------------------------------
Evaluate for episode 130 total rewards is 200.000
-----------------------------------------
Episode 131 has 200.0 steps, cost time: 126.751 ms, per step time: 0.634 ms
Episode 131: loss is 1.097, rewards is 200.0
Episode 132 has 200.0 steps, cost time: 117.975 ms, per step time: 0.590 ms
Episode 132: loss is 2.47, rewards is 200.0
Episode 133 has 200.0 steps, cost time: 116.362 ms, per step time: 0.582 ms
Episode 133: loss is 0.878, rewards is 200.0
Episode 134 has 19

Episode 188 has 200.0 steps, cost time: 117.324 ms, per step time: 0.587 ms
Episode 188: loss is 0.646, rewards is 200.0
Episode 189 has 200.0 steps, cost time: 117.005 ms, per step time: 0.585 ms
Episode 189: loss is 3.069, rewards is 200.0
Episode 190 has 200.0 steps, cost time: 116.342 ms, per step time: 0.582 ms
Episode 190: loss is 1.261, rewards is 200.0
-----------------------------------------
Evaluate for episode 190 total rewards is 197.500
-----------------------------------------
Episode 191 has 200.0 steps, cost time: 113.702 ms, per step time: 0.569 ms
Episode 191: loss is 116.986, rewards is 200.0
Episode 192 has 200.0 steps, cost time: 113.429 ms, per step time: 0.567 ms
Episode 192: loss is 9.694, rewards is 200.0
Episode 193 has 200.0 steps, cost time: 113.514 ms, per step time: 0.568 ms
Episode 193: loss is 3.362, rewards is 200.0
Episode 194 has 200.0 steps, cost time: 113.880 ms, per step time: 0.569 ms
Episode 194: loss is 6.399, rewards is 200.0
Episode 195 has 2

Episode 249 has 200.0 steps, cost time: 117.970 ms, per step time: 0.590 ms
Episode 249: loss is 9.436, rewards is 200.0
Episode 250 has 200.0 steps, cost time: 118.830 ms, per step time: 0.594 ms
Episode 250: loss is 0.973, rewards is 200.0
-----------------------------------------
Evaluate for episode 250 total rewards is 199.600
-----------------------------------------
Episode 251 has 200.0 steps, cost time: 115.563 ms, per step time: 0.578 ms
Episode 251: loss is 55.947, rewards is 200.0
Episode 252 has 200.0 steps, cost time: 115.442 ms, per step time: 0.577 ms
Episode 252: loss is 1.881, rewards is 200.0
Episode 253 has 200.0 steps, cost time: 118.747 ms, per step time: 0.594 ms
Episode 253: loss is 1.275, rewards is 200.0
Episode 254 has 200.0 steps, cost time: 116.800 ms, per step time: 0.584 ms
Episode 254: loss is 0.376, rewards is 200.0
Episode 255 has 200.0 steps, cost time: 117.526 ms, per step time: 0.588 ms
Episode 255: loss is 5.702, rewards is 200.0
Episode 256 has 20

Episode 310 has 200.0 steps, cost time: 116.800 ms, per step time: 0.584 ms
Episode 310: loss is 0.547, rewards is 200.0
-----------------------------------------
Evaluate for episode 310 total rewards is 200.000
-----------------------------------------
Episode 311 has 200.0 steps, cost time: 120.875 ms, per step time: 0.604 ms
Episode 311: loss is 0.794, rewards is 200.0
Episode 312 has 200.0 steps, cost time: 120.751 ms, per step time: 0.604 ms
Episode 312: loss is 0.887, rewards is 200.0
Episode 313 has 190.0 steps, cost time: 112.529 ms, per step time: 0.592 ms
Episode 313: loss is 0.356, rewards is 190.0
Episode 314 has 200.0 steps, cost time: 118.747 ms, per step time: 0.594 ms
Episode 314: loss is 0.767, rewards is 200.0
Episode 315 has 200.0 steps, cost time: 118.359 ms, per step time: 0.592 ms
Episode 315: loss is 8.278, rewards is 200.0
Episode 316 has 200.0 steps, cost time: 117.454 ms, per step time: 0.587 ms
Episode 316: loss is 1.253, rewards is 200.0
Episode 317 has 200

-----------------------------------------
Evaluate for episode 370 total rewards is 199.800
-----------------------------------------
Episode 371 has 200.0 steps, cost time: 119.230 ms, per step time: 0.596 ms
Episode 371: loss is 0.267, rewards is 200.0
Episode 372 has 200.0 steps, cost time: 116.428 ms, per step time: 0.582 ms
Episode 372: loss is 73.022, rewards is 200.0
Episode 373 has 200.0 steps, cost time: 117.029 ms, per step time: 0.585 ms
Episode 373: loss is 1.585, rewards is 200.0
Episode 374 has 192.0 steps, cost time: 111.933 ms, per step time: 0.583 ms
Episode 374: loss is 0.277, rewards is 192.0
Episode 375 has 200.0 steps, cost time: 116.868 ms, per step time: 0.584 ms
Episode 375: loss is 0.461, rewards is 200.0
Episode 376 has 200.0 steps, cost time: 118.412 ms, per step time: 0.592 ms
Episode 376: loss is 0.411, rewards is 200.0
Episode 377 has 200.0 steps, cost time: 116.591 ms, per step time: 0.583 ms
Episode 377: loss is 0.206, rewards is 200.0
Episode 378 has 20

Episode 431: loss is 10.406, rewards is 200.0
Episode 432 has 200.0 steps, cost time: 116.868 ms, per step time: 0.584 ms
Episode 432: loss is 0.415, rewards is 200.0
Episode 433 has 200.0 steps, cost time: 117.379 ms, per step time: 0.587 ms
Episode 433: loss is 0.709, rewards is 200.0
Episode 434 has 200.0 steps, cost time: 116.948 ms, per step time: 0.585 ms
Episode 434: loss is 0.147, rewards is 200.0
Episode 435 has 200.0 steps, cost time: 116.855 ms, per step time: 0.584 ms
Episode 435: loss is 0.903, rewards is 200.0
Episode 436 has 200.0 steps, cost time: 116.399 ms, per step time: 0.582 ms
Episode 436: loss is 2.694, rewards is 200.0
Episode 437 has 200.0 steps, cost time: 116.441 ms, per step time: 0.582 ms
Episode 437: loss is 31.649, rewards is 200.0
Episode 438 has 182.0 steps, cost time: 106.093 ms, per step time: 0.583 ms
Episode 438: loss is 38.089, rewards is 182.0
Episode 439 has 200.0 steps, cost time: 115.453 ms, per step time: 0.577 ms
Episode 439: loss is 113.894,

Episode 492: loss is 36.109, rewards is 200.0
Episode 493 has 200.0 steps, cost time: 115.826 ms, per step time: 0.579 ms
Episode 493: loss is 0.281, rewards is 200.0
Episode 494 has 200.0 steps, cost time: 116.494 ms, per step time: 0.582 ms
Episode 494: loss is 0.315, rewards is 200.0
Episode 495 has 200.0 steps, cost time: 116.242 ms, per step time: 0.581 ms
Episode 495: loss is 12.804, rewards is 200.0
Episode 496 has 200.0 steps, cost time: 117.324 ms, per step time: 0.587 ms
Episode 496: loss is 0.154, rewards is 200.0
Episode 497 has 200.0 steps, cost time: 115.639 ms, per step time: 0.578 ms
Episode 497: loss is 0.206, rewards is 200.0
Episode 498 has 200.0 steps, cost time: 114.935 ms, per step time: 0.575 ms
Episode 498: loss is 0.807, rewards is 200.0
Episode 499 has 200.0 steps, cost time: 116.399 ms, per step time: 0.582 ms
Episode 499: loss is 0.298, rewards is 200.0
Episode 500 has 200.0 steps, cost time: 117.382 ms, per step time: 0.587 ms
Episode 500: loss is 39.687, r

Episode 553: loss is 0.107, rewards is 200.0
Episode 554 has 200.0 steps, cost time: 115.991 ms, per step time: 0.580 ms
Episode 554: loss is 0.388, rewards is 200.0
Episode 555 has 200.0 steps, cost time: 115.827 ms, per step time: 0.579 ms
Episode 555: loss is 0.226, rewards is 200.0
Episode 556 has 200.0 steps, cost time: 117.978 ms, per step time: 0.590 ms
Episode 556: loss is 3.125, rewards is 200.0
Episode 557 has 200.0 steps, cost time: 117.325 ms, per step time: 0.587 ms
Episode 557: loss is 0.23, rewards is 200.0
Episode 558 has 200.0 steps, cost time: 116.798 ms, per step time: 0.584 ms
Episode 558: loss is 0.311, rewards is 200.0
Episode 559 has 200.0 steps, cost time: 117.334 ms, per step time: 0.587 ms
Episode 559: loss is 0.2, rewards is 200.0
Episode 560 has 200.0 steps, cost time: 116.554 ms, per step time: 0.583 ms
Episode 560: loss is 0.1, rewards is 200.0
-----------------------------------------
Evaluate for episode 560 total rewards is 200.000
---------------------

Episode 615 has 200.0 steps, cost time: 120.247 ms, per step time: 0.601 ms
Episode 615: loss is 0.307, rewards is 200.0
Episode 616 has 200.0 steps, cost time: 117.453 ms, per step time: 0.587 ms
Episode 616: loss is 60.798, rewards is 200.0
Episode 617 has 200.0 steps, cost time: 118.747 ms, per step time: 0.594 ms
Episode 617: loss is 24.32, rewards is 200.0
Episode 618 has 200.0 steps, cost time: 116.357 ms, per step time: 0.582 ms
Episode 618: loss is 0.819, rewards is 200.0
Episode 619 has 200.0 steps, cost time: 117.572 ms, per step time: 0.588 ms
Episode 619: loss is 0.145, rewards is 200.0
Episode 620 has 200.0 steps, cost time: 116.946 ms, per step time: 0.585 ms
Episode 620: loss is 0.272, rewards is 200.0
-----------------------------------------
Evaluate for episode 620 total rewards is 184.700
-----------------------------------------
Episode 621 has 200.0 steps, cost time: 120.306 ms, per step time: 0.602 ms
Episode 621: loss is 0.1, rewards is 200.0
Episode 622 has 200.

In [None]:
episode=10