testGymCartpole.py

You can view and download this file on Github: testGymCartpole.py

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# This is an EXUDYN example
#
# Details:  This file shows integration with OpenAI gym by testing a cart-pole example
#           Needs input file testGymCartpoleEnv.py which defines the model in the gym environment
#           Works well with Python3.8!
#
# Author:   Johannes Gerstmayr, Grzegorz Orzechowski
# Date:     2022-05-17
#
# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
#
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

#+++++++++++++++++++++++++++++++++++++++++++++++++
#conda create -n venvGym python=3.10 numpy matplotlib spyder-kernels=2.4 ipykernel -y
#pip install gym[spaces]
#pip install stable-baselines3==1.7.0
#pip install exudyn

import time
from math import sin, cos
from testGymCartpoleEnv import CartPoleEnv

if True: #test the model by just integrating in Exudyn and apply force

    env = CartPoleEnv()
    env.useRenderer = False #set this true to show visualization
    observation, info = env.reset(seed=42, return_info=True)
    ts = -time.time()

    for i in range(10000):
        force = 0.1*(cos(i/50))
        env.integrateStep(force)
        # action = env.action_space.sample()
        # observation, reward, done, info = env.step(action)
        # if done:
            # observation, info = env.reset(return_info=True)
        # env.render()
        # time.sleep(0.01)
    ts = ts+time.time()
    print('measured max. step FPS:', int(10000/ts))
    env.close()


#+++++++++++++++++++++++++++++++++++++++++++++++++
#reinforment learning algorithm

if True: #do some reinforcement learning with exudyn model
    import gym

    env = CartPoleEnv(thresholdFactor=5,forceFactor=2)

    env.useRenderer = False

    from stable_baselines3 import A2C
    model = A2C('MlpPolicy', env,
                device='cpu',  #usually cpu is faster for this size of networks
                verbose=1)
    ts = -time.time()
    model.learn(total_timesteps=10000)
    print('time spent=',ts+time.time())

    model.save('solution/cartpoleLearn')

    #%%+++++++++++++++++++++++++++++++++++++++
    env = CartPoleEnv(10)#test with larger threshold
    env.useRenderer = True
    obs = env.reset()
    for i in range(100):
        action, _state = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        env.render()
        if done:
          obs = env.reset()
        time.sleep(0.05) #to see results ...

    env.close()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

testGymCartpole.rst

testGymCartpole.rst

testGymCartpole.py

Files

testGymCartpole.rst

Latest commit

History

testGymCartpole.rst

File metadata and controls

testGymCartpole.py