In [2]:
import gym
import numpy as np

class DroneEnv(gym.Env):
    def __init__(self):
        super(DroneEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(4)  # 0: Up, 1: Down, 2: Left, 3: Right
        self.observation_space = gym.spaces.Box(low=0, high=10, shape=(2,), dtype=np.float32)
        self.state = np.array([5, 5])  # Starting position of the drone
        self.target = np.array([8, 8])  # Target position
        self.obstacles = [np.array([6, 6]), np.array([7, 7])]  # Sample obstacles

    def reset(self):
        self.state = np.array([5, 5])
        return self.state

    def step(self, action):
        if action == 0:  # Up
            self.state[1] += 1
        elif action == 1:  # Down
            self.state[1] -= 1
        elif action == 2:  # Left
            self.state[0] -= 1
        elif action == 3:  # Right
            self.state[0] += 1

        # Check for collisions
        reward = -1
        if any(np.array_equal(self.state, obs) for obs in self.obstacles):
            reward = -10  # Penalty for hitting an obstacle
            done = True
        elif np.array_equal(self.state, self.target):
            reward = 10  # Reward for reaching the target
            done = True
        else:
            done = False

        return self.state, reward, done, {}

    def render(self):
        print(f"Drone Position: {self.state}, Target: {self.target}, Obstacles: {self.obstacles}")

# Usage
env = DroneEnv()


In [3]:
pip install 'shimmy>=0.2.1'


  and should_run_async(code)




In [1]:
pip install gym shimmy




In [6]:
pip install stable-baselines3




In [5]:
pip install numpy




In [4]:
!pip install stable-baselines3
from stable_baselines3 import PPO


env = DroneEnv()

model = PPO("MlpPolicy", env, verbose=1)


Collecting gymnasium<0.30,>=0.28.1 (from stable-baselines3)
  Using cached gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Using cached gymnasium-0.29.1-py3-none-any.whl (953 kB)
Installing collected packages: gymnasium
  Attempting uninstall: gymnasium
    Found existing installation: gymnasium 1.0.0a2
    Uninstalling gymnasium-1.0.0a2:
      Successfully uninstalled gymnasium-1.0.0a2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
shimmy 2.0.0 requires gymnasium>=1.0.0a1, but you have gymnasium 0.29.1 which is incompatible.[0m[31m
[0mSuccessfully installed gymnasium-0.29.1


  from jax import xla_computation as _xla_computation


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [7]:

model.learn(total_timesteps=10000)  # Adjust timesteps as necessary


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 76       |
|    ep_rew_mean     | -85      |
| time/              |          |
|    fps             | 978      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 76          |
|    ep_rew_mean          | -85         |
| time/                   |             |
|    fps                  | 612         |
|    iterations           | 2           |
|    time_elapsed         | 6           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.012746413 |
|    clip_fraction        | 0.138       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0.0194      |
|    learning_rate        | 0.

<stable_baselines3.ppo.ppo.PPO at 0x7fc0c0b9f1f0>

In [9]:
class DroneEnv(gym.Env):
    def __init__(self):
        super(DroneEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(8)  # 0-7: Up, Down, Left, Right, Up-Right, Up-Left, Down-Right, Down-Left
        self.observation_space = gym.spaces.Box(low=0, high=10, shape=(2,), dtype=np.float32)
        self.state = np.array([5, 5])  # Starting position
        self.target = np.array([8, 8])  # Target position
        self.obstacles = [np.array([6, 6]), np.array([7, 7])]  # Obstacles

    def step(self, action):
        if action == 0:  # Up
            self.state[1] += 1
        elif action == 1:  # Down
            self.state[1] -= 1
        elif action == 2:  # Left
            self.state[0] -= 1
        elif action == 3:  # Right
            self.state[0] += 1
        elif action == 4:  # Up-Right
            self.state[0] += 1
            self.state[1] += 1
        elif action == 5:  # Up-Left
            self.state[0] -= 1
            self.state[1] += 1
        elif action == 6:  # Down-Right
            self.state[0] += 1
            self.state[1] -= 1
        elif action == 7:  # Down-Left
            self.state[0] -= 1
            self.state[1] -= 1




In [11]:
import numpy as np
import random

class DroneEnv(gym.Env):
    def __init__(self):

        self.epsilon = 0.1  # Exploration rate

    def step(self, action):
        # Performing action as before...

        # Determining whether to explore or exploit
        if random.random() < self.epsilon:
            # Exploration: pick a random action
            action = self.action_space.sample()

        # Implement action and update state...

        # Calculate reward...

        return self.state, reward, done, {}




In [14]:
import numpy as np
import heapq

def heuristic(a, b):
    return np.linalg.norm(np.array(a) - np.array(b))

def a_star(start, goal, obstacles, grid_width, grid_height):
    open_set = []
    heapq.heappush(open_set, (0, start))

    came_from = {}
    g_score = {start: 0}
    f_score = {start: heuristic(start, goal)}

    while open_set:
        current = heapq.heappop(open_set)[1]

        if current == goal:
            # Reconstruct path
            path = []
            while current in came_from:
                path.append(current)
                current = came_from[current]
            return path[::-1]

        for direction in [(1, 0), (0, 1), (-1, 0), (0, -1)]:
            neighbor = (current[0] + direction[0], current[1] + direction[1])
            if neighbor in obstacles or not (0 <= neighbor[0] < grid_width and 0 <= neighbor[1] < grid_height):
                continue

            tentative_g_score = g_score[current] + 1

            if neighbor not in g_score or tentative_g_score < g_score[neighbor]:
                came_from[neighbor] = current
                g_score[neighbor] = tentative_g_score
                f_score[neighbor] = tentative_g_score + heuristic(neighbor, goal)
                if neighbor not in [i[1] for i in open_set]:
                    heapq.heappush(open_set, (f_score[neighbor], neighbor))

    return []  # if No path found


obstacles = [(6, 6), (7, 7)]
start = (5, 4)
goal = (8, 8)
grid_width = 10  # Set your grid width
grid_height = 10  # Set your grid height
path = a_star(start, goal, obstacles, grid_width, grid_height)
print("Path from start to goal:", path)


Path from start to goal: [(5, 5), (5, 6), (5, 7), (6, 7), (6, 8), (7, 8), (8, 8)]


  and should_run_async(code)


In [15]:
# Test
obs = env.reset()
for _ in range(20):
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    env.render()
    if done:
        obs = env.reset()


Drone Position: [5 4], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [6 4], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [7 4], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [7 5], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [6 5], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [7 5], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [6 5], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [6 4], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [6 3], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [5 3], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [5 4], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [4 4], Target: [8 8], Obstacles: [array([6, 6]), array([7, 7])]
Drone Position: [4 5], Target: [8 8], Ob