In [None]:
import torch

from stable_baselines3 import PPO
import gymnasium as gym
from continuous_env.robot_obstacles import RobotObstacles
from continuous_env.discrete_obstacles import DiscreteRobotObstacles

print(torch.cuda.is_available())

gym.register(
    id="DiscreteRobotObstacleEnv-v1",
    entry_point=DiscreteRobotObstacles,
    max_episode_steps=1000,
)

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv, VecFrameStack
from stable_baselines3.common.callbacks import CheckpointCallback

TIMESTEPS = 2_000_000

def make_env():
    def _init():
        env = DiscreteRobotObstacles()
        return env
    return _init

log_path = "./logs"
num_envs = 4
env_fns = [make_env() for _ in range(num_envs)]
env = SubprocVecEnv(env_fns)
env = VecFrameStack(env, n_stack=4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PPO("CnnPolicy", env, device=device, tensorboard_log=log_path, verbose=1)

checkpoint_callback = CheckpointCallback(save_freq=10_000, save_path="./checkpoints/", name_prefix="ppo_model")

In [None]:
model.learn(total_timesteps=TIMESTEPS, callback=checkpoint_callback)

In [None]:
model.save("weights/ppo_model")

In [None]:
model.load("checkpoints/ppo_model_360000_steps")

In [None]:
eval_env = VecFrameStack(SubprocVecEnv([make_env()]), n_stack=4)
obs = eval_env.reset()

episode_rewards = []
episode_reward = 0
episodes_completed = 0

for _ in range(10000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, terms, truncs = eval_env.step(action)
    
    episode_reward += rewards[0]  
    
    if terms[0] or truncs[0]:
        episodes_completed += 1
        episode_rewards.append(episode_reward)
        episode_reward = 0
        
    if episodes_completed >= 1000:  
        break

print(episode_rewards)
avg_reward = sum(episode_rewards) / len(episode_rewards)
print(f"Average reward over {len(episode_rewards)} episodes: {avg_reward}")

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv, VecFrameStack, DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback
import time


eval_env = DiscreteRobotObstacles(render_mode="human")
eval_env = DummyVecEnv([lambda: eval_env])
eval_env = VecFrameStack(eval_env, n_stack=4)

obs = eval_env.reset()
done = False

while True: 
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        action_map = {
            0: "steer left",
            1: "steer right",
            2: "accelerate",
            3: "brake",
            4: "go back",
            5: "do nothing"
        }
        print(f"Action taken: {action_map[action[0]]}")
        obs, reward, done, info = eval_env.step(action)
        eval_env.render()
        
        time.sleep(0.00001)
        
    obs = eval_env.reset()
    done = False
