In [48]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np

class DroneChaseEnv(gym.Env):
    metadata = {'render_modes': ['human']}
    
    def __init__(self, render_mode=None):
        super(DroneChaseEnv, self).__init__()
        
        # Canvas size
        self.width = 600
        self.height = 400
        
        # Score
        self.score = 0  # Tracks how many times the drone hits the balloon

        # Drone parameters
        self.drone_pos = np.array([self.width / 2, self.height / 2], dtype=np.float32)
        self.drone_vel = np.array([0.0, 0.0], dtype=np.float32)
        self.max_speed = 5.0

        # Balloon parameters
        self.balloon_pos = np.array([np.random.uniform(0, self.width), 
                                     np.random.uniform(0, self.height)], dtype=np.float32)
        self.balloon_vel = np.array([np.random.uniform(-1, 1), 
                                     np.random.uniform(-1, 1)], dtype=np.float32)

        # Action space: Continuous thrust adjustments for left and right propellers
        # Thrust values between -1 and 1
        self.action_space = spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)

        # Observation space: Positions and velocities
        # Drone position (x, y), Drone velocity (vx, vy), Balloon position (x, y)
        low_obs = np.array([0, 0, -self.max_speed, -self.max_speed, 0, 0], dtype=np.float32)
        high_obs = np.array([self.width, self.height, self.max_speed, self.max_speed, self.width, self.height], dtype=np.float32)
        self.observation_space = spaces.Box(low=low_obs, high=high_obs, dtype=np.float32)

        # Rendering
        self.render_mode = render_mode
        if self.render_mode == 'human':
            import pygame
            pygame.init()
            self.screen = pygame.display.set_mode((self.width, self.height))
            self.clock = pygame.time.Clock()

        # Episode parameters
        self.max_steps = 200
        self.current_step = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)

        # Reset drone position and velocity
        self.drone_pos = np.array([self.width / 2, self.height / 2], dtype=np.float32)
        self.drone_vel = np.array([0.0, 0.0], dtype=np.float32)

        # Reset balloon position and velocity
        self.balloon_pos = np.array([np.random.uniform(0, self.width), 
                                     np.random.uniform(0, self.height)], dtype=np.float32)
        self.balloon_vel = np.array([np.random.uniform(-1, 1), 
                                     np.random.uniform(-1, 1)], dtype=np.float32)

        self.current_step = 0

        observation = self._get_obs()
        info = {}

        return observation, info

    def step(self, action):
        """
        Executes one time step in the environment.

        Args:
            action (array): Array of two thrust values [left_thrust, right_thrust], 
                            where each value is between -1 and 1.

        Returns:
            observation (array): Updated state containing [drone_position, drone_velocity, balloon_position].
            reward (float): The reward for the current step.
            done (bool): Whether the episode has ended (always False for continuous simulation).
            truncated (bool): Whether the episode was truncated (always False here).
            info (dict): Additional debugging information (empty here).
        """

        # 1. Calculate the drone's thrust based on the action
        # The left and right thrust values determine the horizontal and vertical motion of the drone.
        left_thrust, right_thrust = action
        thrust = np.array([right_thrust - left_thrust,  # Horizontal movement (right thrust - left thrust)
                        (left_thrust + right_thrust) / 2],  # Vertical movement (average thrust)
                        dtype=np.float32)

        # 2. Update the drone's velocity based on the thrust
        self.drone_vel += thrust

        # 3. Enforce the maximum speed limit
        # If the drone's speed exceeds the allowed max_speed, scale it down to max_speed.
        speed = np.linalg.norm(self.drone_vel)  # Calculate the magnitude of the velocity vector
        if speed > self.max_speed:
            self.drone_vel = (self.drone_vel / speed) * self.max_speed  # Scale the velocity vector

        # 4. Update the drone's position based on the velocity
        self.drone_pos += self.drone_vel

        # 5. Ensure the drone stays within the canvas bounds
        # If the position goes outside the defined environment, clip it to the boundaries.
        self.drone_pos = np.clip(self.drone_pos, [0, 0], [self.width, self.height])

        # 6. Calculate the distance between the drone and the balloon
        # Use the Euclidean distance formula to determine how far the drone is from the balloon.
        distance = np.linalg.norm(self.drone_pos - self.balloon_pos)

        # 7. Check if the drone catches the balloon (within the catch radius)
        catch_radius = 10.0  # Defines the radius within which the drone "catches" the balloon
        if distance < catch_radius:
            # If the drone catches the balloon:
            # - Increment the score
            self.score += 1
            # - Respawn the balloon at a random location
            self.balloon_pos = np.random.uniform([0, 0], [self.width, self.height])
            # - Recalculate the distance to the new balloon position
            distance = np.linalg.norm(self.drone_pos - self.balloon_pos)
            # - Give a large reward for catching the balloon
            reward = 10.0
        else:
            # If the balloon is not caught, penalize the drone based on the distance
            # Closer to the balloon = less penalty; farther away = more penalty
            reward = -distance * 0.01

        # 8. Randomly move the balloon
        # The balloon moves in small, random steps within the environment bounds.
        self.balloon_pos += np.random.uniform(-2, 2, size=(2,))
        self.balloon_pos = np.clip(self.balloon_pos, [0, 0], [self.width, self.height])  # Keep balloon in bounds

        # 9. Penalize the drone for each time step to encourage efficiency
        reward -= 0.1

        # 10. Increment the step count
        # Track how many steps have been taken in the episode.
        self.current_step += 1

        # 11. Create the observation array
        # The observation includes the drone's position, velocity, and the balloon's position.
        observation = self._get_obs()

        # 12. Return the updated state, reward, and additional info
        # 'done' and 'truncated' are False here because this is a continuous simulation.
        info = {}
        if self.render_mode == 'human':
            self.render()
        return observation, reward, False, False, info

    def render(self):
        if self.render_mode != 'human':
            return

        import pygame
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
        self.screen.fill((255, 255, 255))  # White background

        # Draw balloon
        pygame.draw.circle(self.screen, (255, 0, 0), self.balloon_pos.astype(int), 10)  # Red circle

        # Draw drone
        pygame.draw.rect(self.screen, (0, 0, 255), (*self.drone_pos - 10, 20, 20))  # Blue square

        # Add labels
        font = pygame.font.Font(None, 24)  # Default font, size 24
        balloon_label = font.render("Balloon", True, (0, 0, 0))  # Black text for balloon
        drone_label = font.render("Drone", True, (0, 0, 0))  # Black text for drone

        # Position labels near objects
        self.screen.blit(balloon_label, (self.balloon_pos[0] - 20, self.balloon_pos[1] - 20))
        self.screen.blit(drone_label, (self.drone_pos[0] - 20, self.drone_pos[1] - 20))
        
        # Display timer
        time_remaining = max(0, 30 - self.current_step / 30)  # 30 FPS assumed
        timer_label = font.render(f"Time Remaining: {time_remaining:.1f}s", True, (0, 0, 0))
        self.screen.blit(timer_label, (10, 10))
        
        # Display the score
        score_label = font.render(f"Score: {self.score}", True, (0, 0, 0))
        self.screen.blit(score_label, (10, 40))  # Display below the timer
        
        pygame.display.flip()
        self.clock.tick(30)

    def close(self):
        if self.render_mode == 'human':
            import pygame
            pygame.quit()

    def _get_obs(self):
        return np.concatenate((self.drone_pos, self.drone_vel, self.balloon_pos)).astype(np.float32)

### Train with Vectorize with periodic model saving

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
import matplotlib.pyplot as plt
import os

# Paths for saving models and logs
models_dir = "models/PPO-ball-drone"
logdir = "/home/jlukas/Desktop/My_Project/AI_Stable_GYM/logs"

# Create directories if they don't exist
os.makedirs(models_dir, exist_ok=True)
os.makedirs(logdir, exist_ok=True)

# Create and wrap the environment
# Wrap with VecMonitor for logging rewards and episode lengths
env = make_vec_env(lambda: DroneChaseEnv(), n_envs=4)
env = VecMonitor(env, filename=None)  # Enables logging for TensorBoard

# Define the PPO model with TensorBoard logging
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logdir)

# Train the agent with periodic model saving
TIMESTEPS = 10000
num_iterations = 10

for i in range(num_iterations):
    print(f"Starting iteration {i + 1}/{num_iterations}")
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="PPO-ball-drone")
    model.save(f"{models_dir}/ppo_drone_chase_{TIMESTEPS * (i + 1)}")

# Close the environment
env.close()

### Train without Vectorize and with periodic model saving

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
import matplotlib.pyplot as plt
import os

# Paths for saving models and logs
models_dir = "models/PPO-ball-drone"
logdir = "/home/jlukas/Desktop/My_Project/AI_Stable_GYM/logs"

# Create directories if they don't exist
os.makedirs(models_dir, exist_ok=True)
os.makedirs(logdir, exist_ok=True)

env = DroneChaseEnv()
env.reset()

model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=logdir)

TIMESTEPS = 10000
iters = 0
while True:
	iters += 1
	model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=f"PPO")
	model.save(f"{models_dir}/{TIMESTEPS*iters}")


### Train model directly without periodic model saving

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
import matplotlib.pyplot as plt
import os

# Paths for saving models and logs
models_dir = "models/PPO-ball-drone"
logdir = "/home/jlukas/Desktop/My_Project/AI_Stable_GYM/logs"

# Create directories if they don't exist
os.makedirs(models_dir, exist_ok=True)
os.makedirs(logdir, exist_ok=True)

env = DroneChaseEnv()
env.reset()

model = PPO('MlpPolicy', env, verbose=1)

# Train the model
model.learn(total_timesteps=800000)

# Save the model
model.save("ppo_drone_chase")

### Train model with vectorize without periodic model saving

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
import matplotlib.pyplot as plt
import os

# Paths for saving models and logs
models_dir = "models/PPO-ball-drone_v1"
logdir = "/home/jlukas/Desktop/My_Project/AI_Stable_GYM/logs"

# Create directories if they don't exist
os.makedirs(models_dir, exist_ok=True)
os.makedirs(logdir, exist_ok=True)

# Create and wrap the environment
# Wrap with VecMonitor for logging rewards and episode lengths
env = make_vec_env(lambda: DroneChaseEnv(), n_envs=4)
env = VecMonitor(env, filename=None)  # Enables logging for TensorBoard

# Define the PPO model with TensorBoard logging
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logdir)

# Train the model
model.learn(total_timesteps=1000000)

# Save the model
model.save("ppo_drone_chase")

Using cuda device
Logging to /home/jlukas/Desktop/My_Project/AI_Stable_GYM/logs/PPO_1




-----------------------------
| time/              |      |
|    fps             | 2068 |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 8192 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1004         |
|    iterations           | 2            |
|    time_elapsed         | 16           |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 0.0051609054 |
|    clip_fraction        | 0.0461       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.84        |
|    explained_variance   | 0.00729      |
|    learning_rate        | 0.0003       |
|    loss                 | 342          |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00349     |
|    std                  | 1            |
|    value_loss           | 1.46e+03     |
----------------

### To test and evaluate the model


In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
import os

models_dir = "models/PPO-ball-drone_v1"
model_path = f"{models_dir}/ppo_drone_chase"

# Create the environment for evaluation
env = DroneChaseEnv(render_mode='human')

# Load the trained model
model = PPO.load(model_path)

# Run the trained model in the environment
observation, info = env.reset()
episode_over = False

while not episode_over:
    # Use the trained model to predict actions
    action, _ = model.predict(observation, deterministic=True)
    observation, reward, terminated, truncated, info = env.step(action)
    episode_over = terminated or truncated

env.close()