In [6]:
import gymnasium as gym
import rlenvs
import numpy as np
from rlenvs import DiscreteCartPole


# env = gym.make("CartPole-v1")
# env = gym.make("custom/CartPole-v1")
env = gym.make("custom/DiscreteCartPole-v1")

# observation, info = env.reset(seed=82)
# observation, info = env.reset(seed=82, options={"sizes":(1, 20, 20, 20)})
observation, info = env.reset(seed=82, options={'masspole':.45, 'length':1.0, "sizes":(1, 20, 20, 20)})

In [7]:
print(env.unwrapped.state)
print(env.step(1))
print([round(float(v), 5) for v in  apply_step(env.unwrapped.state)])
print(env.unwrapped.discretizer.__locate_in_space__(3, apply_step(env.unwrapped.state)[3]))
# print(np.array(obj.discretize(my_step(env.unwrapped.state))[1], dtype=np.float32))

[ 0.       0.17978 -0.02702  0.13483]
(array([ 0.     ,  0.53948, -0.01619,  0.     ], dtype=float32), 1.0, False, False, {})
[0.01079, 0.73483, -0.01619, -0.29775]
(7, -0.26966)


In [8]:
env.unwrapped.discretizer.factor_spaces[2]

[-0.09834,
 -0.07621,
 -0.05698,
 -0.0406,
 -0.02702,
 -0.01619,
 -0.00809,
 -0.0027,
 0,
 0.0027,
 0.00809,
 0.01619,
 0.02702,
 0.0406,
 0.05698,
 0.07621,
 0.09834]

In [3]:
def apply_step(state, action=1):    
    gravity = 9.8
    masscart = 1.0
    masspole = 0.1
    total_mass = masspole + masscart
    length = 0.5  # actually half the pole's length
    polemass_length = masspole * length
    force_mag = 10.0
    tau = 0.02  # seconds between state updates

    x, x_dot, theta, theta_dot = state
    force = force_mag if action == 1 else -force_mag
    costheta = np.cos(theta)
    sintheta = np.sin(theta)

    temp = (
        force + polemass_length * np.square(theta_dot) * sintheta
    ) / total_mass
    thetaacc = (gravity * sintheta - costheta * temp) / (
        length
        * (4.0 / 3.0 - masspole * np.square(costheta) / total_mass)
    )
    xacc = temp - polemass_length * thetaacc * costheta / total_mass

    x = x + tau * x_dot
    x_dot = x_dot + tau * xacc
    theta = theta + tau * theta_dot
    theta_dot = theta_dot + tau * thetaacc


    return np.array((x, x_dot, theta, theta_dot), dtype=np.float32)

In [26]:
factors_size = (1, 10, 3, 10)

def get_factor_limits(n, f):
        end_conditions = {2: [0.21]}
        s = [0,0,0,0]
        limits = end_conditions.get(f) or []
        j = ((n-1) if n%2==0 else (n+1))//2
        for _ in range(j):
            s = apply_step(s)
            v = round(float(s[f]), 5)
            if v not in limits and n>1 and v!=0:
                limits.append(v)
        space = ([float(i*-1) for i in limits[::-1]]) + ([0] if n%2==0 else [])  + limits
        return space if len(space) == 0 else (space if space[-1]>space[0] else space[::-1])

[get_factor_limits(n,f) for f,n in enumerate(factors_size)]  

[[],
 [-0.7808, -0.58545, -0.39024, -0.19512, 0, 0.19512, 0.39024, 0.58545, 0.7808],
 [-0.00585, 0.00585],
 [-1.17804,
  -0.87989,
  -0.58537,
  -0.29268,
  0,
  0.29268,
  0.58537,
  0.87989,
  1.17804]]

In [19]:
import numpy as np
from rlenvs import DiscreteCartPole

obj = DiscreteCartPole()

_, values = obj.discretize(observation)
print(observation)
print(np.array(values, dtype=np.float32))
print(((observation-values)**2)**.5)
# env.state = np.array(values, dtype=np.float64)
# env.unwrapped.state = np.array(values, dtype=np.float64)


[-0.          0.07407407 -0.          0.05555556]
[-0.          0.16666667  0.105       0.15713485]
[0.         0.09259259 0.105      0.10157928]


In [35]:
observation,reward, terminated, truncated, info = env.step(1)
_, values = obj.discretize(observation)
print(observation, terminated)
print(np.array(values, dtype=np.float32))
print(((observation-values)**2)**.5)
# env.state = np.array(values, dtype=np.float64)
# env.state = env.unwrapped.state = np.array(values, dtype=np.float64)

[-0.     1.5    0.105 -1.5  ] False
[-0.     1.5    0.105 -1.5  ]
[0.00000000e+00 2.22044605e-16 3.27825546e-09 2.22044605e-16]


In [36]:
print(env.unwrapped.state)
print(env.step(1)[0])
print(my_step(env.unwrapped.state))
print(np.array(obj.discretize(my_step(env.unwrapped.state))[1], dtype=np.float32))

[-0.     1.5    0.105 -1.5  ]
[-0.     1.5    0.105 -1.5  ]
[ 0.03       1.6937015  0.075     -1.7581388]
[-0.     1.5    0.105 -1.5  ]


In [37]:
obj.factor_spaces

[array([-0.]),
 array([-1.5       , -1.18518519, -0.90740741, -0.66666667, -0.46296296,
        -0.2962963 , -0.16666667, -0.07407407, -0.01851852, -0.        ,
         0.01851852,  0.07407407,  0.16666667,  0.2962963 ,  0.46296296,
         0.66666667,  0.90740741,  1.18518519,  1.5       ]),
 array([-0.21 , -0.105, -0.   ,  0.105,  0.21 ]),
 array([-1.5       , -1.25707872, -1.02890329, -0.81649658, -0.62112999,
        -0.44444444, -0.28867513, -0.15713484, -0.05555556, -0.        ,
         0.05555556,  0.15713484,  0.28867513,  0.44444444,  0.62112999,
         0.81649658,  1.02890329,  1.25707872,  1.5       ])]

In [55]:
[1,2,3,4,5][:0:-1]

[5, 4, 3, 2]

In [56]:
def teste(n):
    values = [[0, 0, 0, 0]]
    for i in range((n+1)//2):
        values.append(values[-1])
    return list(zip(*(values+values[:0:-1])))

teste(10)

[(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)]

In [44]:
my_step([0, 0, 0, 0], 1)

array([ 0.        ,  0.19512194,  0.        , -0.29268292], dtype=float32)

In [5]:
def my_step(state, action=1):    
    gravity = 9.8
    masscart = 1.0
    masspole = 0.1
    total_mass = masspole + masscart
    length = 0.5  # actually half the pole's length
    polemass_length = masspole * length
    force_mag = 10.0
    tau = 0.02  # seconds between state updates

    x, x_dot, theta, theta_dot = state
    force = force_mag if action == 1 else -force_mag
    costheta = np.cos(theta)
    sintheta = np.sin(theta)

    temp = (
        force + polemass_length * np.square(theta_dot) * sintheta
    ) / total_mass
    thetaacc = (gravity * sintheta - costheta * temp) / (
        length
        * (4.0 / 3.0 - masspole * np.square(costheta) / total_mass)
    )
    xacc = temp - polemass_length * thetaacc * costheta / total_mass

    x = x + tau * x_dot
    x_dot = x_dot + tau * xacc
    theta = theta + tau * theta_dot
    theta_dot = theta_dot + tau * thetaacc


    return np.array((x, x_dot, theta, theta_dot), dtype=np.float32)