You can view and download this file on Github: testGymDoublePendulumEnv.py
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# This is an EXUDYN example
#
# Details: This file serves as an input to testGymDoublePendulum.py
#
# Author: Johannes Gerstmayr
# Date: 2022-05-18
# Update: 2023-05-20: derive from gym.Env to ensure compatibility with newer stable-baselines3
#
# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
#
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import exudyn as exu
from exudyn.utilities import *
import math
import math
from typing import Optional, Union
import numpy as np
import gym
from gym import logger, spaces, Env
from gym.error import DependencyNotInstalled
class DoublePendulumEnv(Env):
#metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
metadata = {"render_modes": ["human"], "render_fps": 50}
def __init__(self, thresholdFactor = 1.):
self.SC = exu.SystemContainer()
self.mbs = self.SC.AddSystem()
#%%+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+++++++++++++++++++++++++++++++++++++++++++++++++++++
self.gravity = 9.81
self.length = 1.
self.masscart = 1.
self.massarm = 0.1
self.total_mass = self.massarm + self.masscart
self.armInertia = self.length**2*0.5*self.massarm
self.force_mag = 10.0
self.stepUpdateTime = 0.02 # seconds between state updates
# Angle at which to fail the episode
self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
self.x_threshold = thresholdFactor*2.4
high = np.array(
[
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max,
],
dtype=np.float32,
)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++
#see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
#for Env:
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
#+++++++++++++++++++++++++++++++++++++++++++++++++++++
self.state = None
self.rendererRunning=None
self.useRenderer = False #turn this on if needed
background = GraphicsDataCheckerBoard(point= [0,0,0], normal= [0,0,1], size=4)
oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0], #x-pos,y-pos,angle
visualization=VObjectGround(graphicsData= [background])))
nGround=self.mbs.AddNode(NodePointGround())
gCart = GraphicsDataOrthoCubePoint(size=[0.5*self.length, 0.1*self.length, 0.1*self.length],
color=color4dodgerblue)
self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
oCart = self.mbs.AddObject(RigidBody2D(physicsMass=self.masscart,
physicsInertia=0.1*self.masscart, #not needed
nodeNumber=self.nCart,
visualization=VObjectRigidBody2D(graphicsData= [gCart])))
mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
gArm1 = GraphicsDataOrthoCubePoint(size=[0.1*self.length, self.length, 0.1*self.length], color=color4red)
self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
physicsInertia=self.armInertia, #not included in original paper
nodeNumber=self.nArm1,
visualization=VObjectRigidBody2D(graphicsData= [gArm1])))
mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
gArm2 = GraphicsDataOrthoCubePoint(size=[0.1*self.length, self.length, 0.1*self.length], color=color4red)
self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
physicsInertia=self.armInertia, #not included in original paper
nodeNumber=self.nArm2,
visualization=VObjectRigidBody2D(graphicsData= [gArm2])))
mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
#gravity
self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-self.masscart*self.gravity,0]))
self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-self.massarm*self.gravity,0]))
self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-self.massarm*self.gravity,0]))
#control force
self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
#joints and constraints:
self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
#%%++++++++++++++++++++++++
self.mbs.Assemble() #computes initial vector
self.simulationSettings = exu.SimulationSettings() #takes currently set values or default values
self.simulationSettings.timeIntegration.numberOfSteps = 1
self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
self.simulationSettings.timeIntegration.verboseMode = 0
self.simulationSettings.solutionSettings.writeSolutionToFile = False
#self.simulationSettings.timeIntegration.simulateInRealtime = True
self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
self.SC.visualizationSettings.general.drawWorldBasis=True
self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
self.dynamicSolver = exudyn.MainSolverImplicitSecondOrder()
self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings)
self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings) #to initialize all data
def integrateStep(self, force):
#exudyn simulation part
#index 2 solver
self.mbs.SetLoadParameter(self.lControl, 'load', force)
#progress integration time
currentTime = self.simulationSettings.timeIntegration.endTime
self.simulationSettings.timeIntegration.startTime = currentTime
self.simulationSettings.timeIntegration.endTime = currentTime+self.stepUpdateTime
# exu.SolveDynamic(self.mbs, self.simulationSettings, solverType=exu.DynamicSolverType.TrapezoidalIndex2,
# updateInitialValues=True) #use final value as new initial values
self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings)
self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings)
currentState = self.mbs.systemData.GetSystemState() #get current values
self.mbs.systemData.SetSystemState(systemStateList=currentState,
configuration = exu.ConfigurationType.Initial)
self.mbs.systemData.SetODE2Coordinates_tt(coordinates = self.mbs.systemData.GetODE2Coordinates_tt(),
configuration = exu.ConfigurationType.Initial)
def step(self, action):
err_msg = f"{action!r} ({type(action)}) invalid"
assert self.action_space.contains(action), err_msg
assert self.state is not None, "Call reset before using step method."
self.setState2InitialValues()
force = self.force_mag if action == 1 else -self.force_mag
#++++++++++++++++++++++++++++++++++++++++++++++++++
#++++++++++++++++++++++++++++++++++++++++++++++++++
self.integrateStep(force)
#+++++++++++++++++++++++++
#compute some output:
cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
#finally write updated state:
self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
#++++++++++++++++++++++++++++++++++++++++++++++++++
#++++++++++++++++++++++++++++++++++++++++++++++++++
done = bool(
cartPosX < -self.x_threshold
or cartPosX > self.x_threshold
or arm1Angle < -self.theta_threshold_radians
or arm1Angle > self.theta_threshold_radians
or arm2Angle < -self.theta_threshold_radians
or arm2Angle > self.theta_threshold_radians
)
if not done:
reward = 1.0
elif self.steps_beyond_done is None:
# Arm1 just fell!
self.steps_beyond_done = 0
reward = 1.0
else:
if self.steps_beyond_done == 0:
logger.warn(
"You are calling 'step()' even though this "
"environment has already returned done = True. You "
"should always call 'reset()' once you receive 'done = "
"True' -- any further steps are undefined behavior."
)
self.steps_beyond_done += 1
reward = 0.0
return np.array(self.state, dtype=np.float32), reward, done, {}
def setState2InitialValues(self):
#+++++++++++++++++++++++++++++++++++++++++++++
#set specific initial state:
(xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state
initialValues = np.zeros(9) #model has 3*3 = 9 redundant states
initialValues_t = np.zeros(9)
#build redundant cordinates from self.state
initialValues[0] = xCart
initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
initialValues[3+2] = phiArm1
initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
initialValues[6+2] = phiArm2
initialValues_t[0] = xCart_t
initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
initialValues_t[3+1] = -0.5*self.length * sin(phiArm1) * phiArm1_t
initialValues_t[3+2] = phiArm1_t
initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
initialValues_t[6+1] = -self.length * sin(phiArm1) * phiArm1_t - 0.5*self.length * sin(phiArm2) * phiArm2_t
initialValues_t[6+2] = phiArm2_t
self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
def reset(
self,
*,
seed: Optional[int] = None,
return_info: bool = False,
options: Optional[dict] = None,
):
#super().reset(seed=seed)
self.state = np.random.uniform(low=-0.05, high=0.05, size=(6,))
self.steps_beyond_done = None
#+++++++++++++++++++++++++++++++++++++++++++++
#set state into initial values:
self.setState2InitialValues()
self.simulationSettings.timeIntegration.endTime = 0
self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions
#+++++++++++++++++++++++++++++++++++++++++++++
if not return_info:
return np.array(self.state, dtype=np.float32)
else:
return np.array(self.state, dtype=np.float32), {}
def render(self, mode="human"):
if self.rendererRunning==None and self.useRenderer:
exu.StartRenderer()
self.rendererRunning = True
def close(self):
self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings)
if self.rendererRunning==True:
# SC.WaitForRenderEngineStopFlag()
exu.StopRenderer() #safely close rendering window!
# #+++++++++++++++++++++++++++++++++++++++++++++
# #reset:
# self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
# self.mbs.systemData.SetODE2Coordinates_t(initialValues, exu.ConfigurationType.Initial)
# self.mbs.systemData.SetODE2Coordinates_tt(initialValues, exu.ConfigurationType.Initial)