Autonomous Drone Navigation using Deep Q-Learning

In [1]:
import numpy as np
import cv2
import gym
from collections import deque
import random
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# Fix for numpy.bool8 deprecation warning
if not hasattr(np, 'bool8'):
    np.bool8 = np.bool_

In [3]:
# Step 1: Setup Environment (Using Gym's CartPole as a stand-in for a drone sim)
env = gym.make("CartPole-v1")  # Replace with a drone-specific environment or simulator like AirSim


  deprecation(
  deprecation(


In [4]:
# Step 2: Define the Q-Network
class QNetwork(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.out = nn.Linear(64, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.out(x)


In [5]:
# Step 3: Create the DQN Agent
class DQNAgent:
    def __init__(self, state_dim, action_dim):
        self.model = QNetwork(state_dim, action_dim)
        self.target = QNetwork(state_dim, action_dim)
        self.memory = deque(maxlen=10000)
        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3)
        self.batch_size = 64
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01

    def act(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.randint(0, action_dim)
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            q_values = self.model(state)
        return torch.argmax(q_values).item()

    def remember(self, s, a, r, s2, done):
        self.memory.append((s, a, r, s2, done))

    def learn(self):
        if len(self.memory) < self.batch_size:
            return

        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.FloatTensor(states)
        actions = torch.LongTensor(actions).unsqueeze(1)
        rewards = torch.FloatTensor(rewards).unsqueeze(1)
        next_states = torch.FloatTensor(next_states)
        dones = torch.FloatTensor(dones).unsqueeze(1)

        q_values = self.model(states).gather(1, actions)
        max_next_q = self.target(next_states).max(1)[0].unsqueeze(1)
        target_q = rewards + self.gamma * max_next_q * (1 - dones)

        loss = nn.MSELoss()(q_values, target_q)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

    def update_target(self):
        self.target.load_state_dict(self.model.state_dict())


In [6]:
# Step 4: Training Loop
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
agent = DQNAgent(state_dim, action_dim)

episodes = 200
for ep in range(episodes):
    state = env.reset()
    total_reward = 0
    for t in range(500):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        agent.learn()
        state = next_state
        total_reward += reward
        if done:
            break
    agent.update_target()
    print(f"Episode {ep+1}: Total Reward = {total_reward}")

print("Training complete. Drone agent is ready to navigate!")

Episode 1: Total Reward = 29.0
Episode 2: Total Reward = 14.0
Episode 3: Total Reward = 19.0
Episode 4: Total Reward = 25.0


  states = torch.FloatTensor(states)


Episode 5: Total Reward = 21.0
Episode 6: Total Reward = 8.0
Episode 7: Total Reward = 13.0
Episode 8: Total Reward = 19.0
Episode 9: Total Reward = 13.0
Episode 10: Total Reward = 12.0
Episode 11: Total Reward = 11.0
Episode 12: Total Reward = 12.0
Episode 13: Total Reward = 13.0
Episode 14: Total Reward = 11.0
Episode 15: Total Reward = 9.0
Episode 16: Total Reward = 14.0
Episode 17: Total Reward = 9.0
Episode 18: Total Reward = 13.0
Episode 19: Total Reward = 10.0
Episode 20: Total Reward = 11.0
Episode 21: Total Reward = 13.0
Episode 22: Total Reward = 10.0
Episode 23: Total Reward = 13.0
Episode 24: Total Reward = 8.0
Episode 25: Total Reward = 32.0
Episode 26: Total Reward = 43.0
Episode 27: Total Reward = 79.0
Episode 28: Total Reward = 85.0
Episode 29: Total Reward = 68.0
Episode 30: Total Reward = 35.0
Episode 31: Total Reward = 51.0
Episode 32: Total Reward = 42.0
Episode 33: Total Reward = 56.0
Episode 34: Total Reward = 108.0
Episode 35: Total Reward = 138.0
Episode 36: Tot