In [1]:
# pip install gymnasium
# pip install pygame
# pip install swig
# pip install "gymnasium[box2d]"

# 匯入gymnasium套件，為強化版的OpenAI Gym，用於建構與操作強化學習環境
import gymnasium as gym

# 建立Box2D環境LunarLander-v3，並啟用人類可視畫面(render_mode='human')
env = gym.make("LunarLander-v3", render_mode='human')

# 重置環境，開始新的一局，獲得初始觀測值observation
observation = env.reset()

# 執行300個時間步(避免無窮迴圈)
for _ in range(300):
    # 顯示目前畫面
    env.render()

    # 從動作空間中隨機選取一個動作(代表agent的行為)
    action = env.action_space.sample()

    # 執行動作，回傳新的觀測值、獎勵、終止旗標、截斷旗標與其他資訊
    observation, reward, terminated, truncated, info = env.step(action)

    # 印出目前狀態資訊
    print(observation, reward, terminated, truncated, info)

    # 若遊戲回合結束(成功或失敗)，則重置環境開始新的一局
    if terminated or truncated:
        observation, info = env.reset()

# 關閉環境並釋放資源
env.close()

[ 0.01485701  1.4185503   0.7560133   0.15658754 -0.0185098  -0.19891521
  0.          0.        ] -1.4742682180957718 False False {}
[ 0.02225056  1.4214733   0.7477327   0.12978137 -0.02678342 -0.16548802
  0.          0.        ] 0.1554544647073908 False False {}
[ 0.02954683  1.4238061   0.7355018   0.10356735 -0.03259556 -0.1162536
  0.          0.        ] 0.757676336477233 False False {}
[ 0.0368433   1.4255395   0.73551816  0.07689642 -0.03840752 -0.11624968
  0.          0.        ] -0.448392145057511 False False {}
[ 0.0442008   1.4275148   0.7414334   0.08763466 -0.04402966 -0.11245303
  0.          0.        ] -1.7873207241876685 False False {}
[ 0.05149059  1.4288851   0.7329268   0.0607801  -0.04794693 -0.07835244
  0.          0.        ] 0.532093701983315 False False {}
[ 0.05872097  1.429666    0.7254847   0.03463085 -0.05036243 -0.04831434
  0.          0.        ] 0.5357487354891088 False False {}
[ 0.0660841   1.4306011   0.7382349   0.04148968 -0.05226781 -0.038110