In [None]:
import gym
from gym import spaces
import numpy as np

class LineFollowingEnv(gym.Env):
    def __init__(self, C1=1.0, C2=1.0):
        super(LineFollowingEnv, self).__init__()

        # Define action and observation space
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)  # angular velocity
        self.observation_space = spaces.Box(low=np.array([-1.0, -180.0]), high=np.array([1.0, 180.0]), dtype=np.float32)  # [d, theta]
        
        self.C1 = C1
        self.C2 = C2
        self.dt = 0.05
        self.forward_speed = 0.3
        self.max_steps = 1000
        self.current_step = 0

    def reset(self):
        # Randomly initialize distance and angle
        self.d = np.random.uniform(-1, 1)      # Normalized distance
        self.theta = np.random.uniform(-180, 180)  # Angle in degrees
        
        self.current_step = 0
        return np.array([self.d, self.theta], dtype=np.float32)

    def step(self, action):
        self.current_step += 1
        
        # Apply the action (update theta based on the steering angle)
        angular_velocity = action[0]
        delta_theta = angular_velocity * self.dt
        self.theta += np.degrees(delta_theta)
        
        # Simulate the effect on the line position
        self.d += np.sin(np.radians(self.theta)) * 0.05  # Change 0.05 to tune motion sensitivity

        # Calculate reward
        reward = -1 * (self.C1 * (self.d ** 2) + self.C2 * (self.theta ** 2))

        # Check if the episode is done
        done = bool(
            abs(self.d) < 0.05 or self.current_step >= self.max_steps  # Out of bounds or max steps reached
        )

        # Return step information
        obs = np.array([self.d, self.theta], dtype=np.float32)
        return obs, reward, done, {}

    def render(self, mode='console'):
        print(f"Step: {self.current_step}, d: {self.d}, theta: {self.theta}")



In [18]:
from stable_baselines3 import PPO

# Create environment
env = LineFollowingEnv(C1=1.0, C2=0.01)

# Initialize the model
model = PPO("MlpPolicy", env, verbose=1)

# Train the model
model.learn(total_timesteps=100000)

# Save the model
model.save("ppo_line_follower")


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 673      |
|    ep_rew_mean     | -3.6e+05 |
| time/              |          |
|    fps             | 1511     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 357           |
|    ep_rew_mean          | -1.48e+05     |
| time/                   |               |
|    fps                  | 866           |
|    iterations           | 2             |
|    time_elapsed         | 4             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 5.8278645e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2       

In [20]:
# Load the trained model
model = PPO.load("ppo_line_follower")

# Test the model
env = LineFollowingEnv(C1=1.0, C2=0.01)
obs = env.reset()
done = False

while not done:
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    env.render()
    print (action)

Step: 1, d: -0.6098095178604126, theta: 58.52007293701172
[0.08950257]
Step: 2, d: -0.5659797191619873, theta: 61.23443603515625
[0.54287237]
Step: 3, d: -0.5231448411941528, theta: 58.947845458984375
[-0.4573177]
Step: 4, d: -0.48086291551589966, theta: 57.74041748046875
[-0.24148586]
Step: 5, d: -0.4390176832675934, theta: 56.814666748046875
[-0.18515024]
Step: 6, d: -0.39971691370010376, theta: 51.814666748046875
[-1.]
Step: 7, d: -0.3590337336063385, theta: 54.455413818359375
[0.5281492]
Step: 8, d: -0.32103872299194336, theta: 49.455413818359375
[-1.]
Step: 9, d: -0.28602102398872375, theta: 44.455413818359375
[-1.]
Step: 10, d: -0.25424712896347046, theta: 39.455413818359375
[-1.]
Step: 11, d: -0.22595888376235962, theta: 34.455413818359375
[-1.]
Step: 12, d: -0.19741246104240417, theta: 34.814998626708984
[0.0719167]
Step: 13, d: -0.1674596220254898, theta: 36.8023796081543
[0.39747655]
Step: 14, d: -0.13653889298439026, theta: 38.200443267822266
[0.27961272]
Step: 15, d: -0.109

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [19]:
model.action_space

Box(-1.0, 1.0, (1,), float32)