In [None]:
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt

In [7]:
# Create the Mountain Car environment
env = gym.make('MountainCar-v0')

# Reset the game to start
obs = env.reset()

# Handle different gym versions
if isinstance(obs, tuple):
    obs = obs[0]

print("Game created!")
print(f"Starting position and velocity: {obs}")

Game created!
Starting position and velocity: [-0.49955538  0.        ]


In [None]:
def observation_to_state(env, obs, states=(40, 40)):
    """
    Convert continuous position and velocity to discrete bins.
    
    obs[0] = position (like -0.445)
    obs[1] = velocity (like 0.02)
    
    We convert to:
    s0 = position bin (0 to 39)
    s1 = velocity bin (0 to 39)
    """
    
    # Get the minimum and maximum possible values
    low = env.observation_space.low    # [-1.2, -0.07]
    high = env.observation_space.high  # [0.6, 0.07]
    
     # Convert position to bin number
    # Step 1: Shift so minimum is 0
    # Step 2: Divide by range to get 0 to 1
    # Step 3: Multiply by 40 to get 0 to 40
    # Step 4: Convert to integer
    s0 = int((obs[0] - low[0]) / (high[0] - low[0]) * states[0])
    
    # Same for velocity
    s1 = int((obs[1] - low[1]) / (high[1] - low[1]) * states[1])
    
    # Make sure we don't go over 39
    s0 = min(s0, states[0] - 1)
    s1 = min(s1, states[1] - 1)
    
    return s0, s1