# Designing a Robotic Arm Reach Task (Custom Gym Env)

In this tutorial:
- Design a minimal custom environment API for a 2D arm reaching a target
- Define observation/action spaces and reward shaping
- Plug into SB3/PPO for baseline training


In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np

class ArmReachEnv(gym.Env):
    metadata = {"render_modes": []}
    def __init__(self):
        super().__init__()
        self.max_torque = 1.0
        self.dt = 0.05
        # state: [theta1, theta2, dtheta1, dtheta2, target_x, target_y]
        high = np.array([np.pi, np.pi, 5.0, 5.0, 1.0, 1.0], dtype=np.float32)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
        self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(2,), dtype=np.float32)
        self.reset()
    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.theta = np.random.uniform(-0.1, 0.1, size=2)
        self.dtheta = np.zeros(2)
        self.target = np.random.uniform(-0.8, 0.8, size=2)
        obs = np.array([*self.theta, *self.dtheta, *self.target], dtype=np.float32)
        return obs, {}
    def step(self, action):
        action = np.clip(action, -self.max_torque, self.max_torque)
        self.dtheta += action * self.dt
        self.theta += self.dtheta * self.dt
        end_eff = self.forward_kinematics(self.theta)
        dist = np.linalg.norm(end_eff - self.target)
        reward = -dist - 0.01*np.sum(action**2)
        terminated = dist < 0.05
        truncated = False
        obs = np.array([*self.theta, *self.dtheta, *self.target], dtype=np.float32)
        return obs, reward, terminated, truncated, {}
    @staticmethod
    def forward_kinematics(theta):
        l1 = l2 = 0.5
        x = l1*np.cos(theta[0]) + l2*np.cos(theta[0]+theta[1])
        y = l1*np.sin(theta[0]) + l2*np.sin(theta[0]+theta[1])
        return np.array([x,y])

env = ArmReachEnv()
obs, _ = env.reset()
print("Obs space:", env.observation_space, "Act space:", env.action_space)
for _ in range(5):
    a = env.action_space.sample()
    obs, r, term, trunc, _ = env.step(a)
print("Sanity checked.")
