In [6]:
import numpy as np
import gymnasium as gym

from gymnasium import spaces
# vista.py

In [8]:
class Environment(gym.Env):
    """A simple OpenAI Gym environment skeleton."""
    
    def __init__(self):
        super(Environment, self).__init__()
        
        self.action_space = spaces.Discrete(2)  # Example: two discrete actions
        self.observation_space = spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)  # Example: 4D continuous observation
        
    def reset(self):
        """Reset the state of the environment to an initial state."""
        self.state = np.random.rand(4)  # Example initial state
        return self.state
    
    def step(self, action):
        """Execute one time step within the environment."""
        # Example dynamics: random next state
        self.state = np.random.rand(4)
        
        # Example reward: random reward
        reward = np.random.rand()
        
        # Example done condition
        done = np.random.rand() > 0.95
        
        info = {}
        
        return self.state, reward, done, info
    
    def render(self, mode='human'):
        """Render the environment to the screen."""
        pass  # Implement rendering logic if needed

In [16]:
n = 100
env = Environment()

reset_res = env.reset()
obs = reset_res[0] if isinstance(reset_res, tuple) else reset_res

for step in range(n):
    action = env.action_space.sample()
    res = env.step(action)
    
    if len(res) == 4: # (obs, reward, done, info)
        obs, reward, done, info = res
    elif len(res) == 5: # (obs, reward, terminated, truncated, info)
        obs, reward, terminated, truncated, info = res
        done = terminated or truncated

    
    print(f"Step {step+1}/{n} - action: {action}, reward: {reward}, done: {done}")
    
    if done:
        break

print("Episode finished.")


Step 1/100 - action: 1, reward: 0.1157266596477573, done: False
Step 2/100 - action: 1, reward: 0.8337871450437919, done: False
Step 3/100 - action: 0, reward: 0.6717579956785039, done: False
Step 4/100 - action: 1, reward: 0.1080079008562762, done: False
Step 5/100 - action: 0, reward: 0.6116591365692671, done: False
Step 6/100 - action: 0, reward: 0.46690088190427814, done: False
Step 7/100 - action: 1, reward: 0.03127470941589083, done: False
Step 8/100 - action: 0, reward: 0.3772849503742134, done: False
Step 9/100 - action: 1, reward: 0.010284517665628767, done: False
Step 10/100 - action: 1, reward: 0.9661770293049483, done: False
Step 11/100 - action: 0, reward: 0.162241783959853, done: False
Step 12/100 - action: 0, reward: 0.8548147190421468, done: False
Step 13/100 - action: 1, reward: 0.21516680670454302, done: False
Step 14/100 - action: 0, reward: 0.5074188954038346, done: False
Step 15/100 - action: 0, reward: 0.22461587068700317, done: False
Step 16/100 - action: 1, rewa