In [1]:
pip install networkx matplotlib



In [2]:
pip install torch numpy matplotlib networkx

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque

# Define the DQN model for each agent
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# Replay buffer for experience replay
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        state, action, reward, next_state, done = zip(*random.sample(self.buffer, batch_size))
        return np.array(state), np.array(action), np.array(reward), np.array(next_state), np.array(done)

    def __len__(self):
        return len(self.buffer)

In [4]:
class MultiAgentDQN:
    def __init__(self, num_agents, state_size, action_size):
        self.num_agents = num_agents
        self.state_size = state_size
        self.action_size = action_size
        self.agents = [DQNAgent(state_size, action_size) for _ in range(num_agents)]

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = ReplayBuffer(10000)
        self.gamma = 0.99  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 64
        self.model = DQN(state_size, action_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)

    def act(self, state):
        if random.random() < self.epsilon:
            return random.randint(0, self.action_size - 1)
        state = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.model(state)
        return torch.argmax(q_values).item()

    def train(self):
        if len(self.memory) < self.batch_size:
            return
        state, action, reward, next_state, done = self.memory.sample(self.batch_size)
        state = torch.FloatTensor(state)
        next_state = torch.FloatTensor(next_state)
        action = torch.LongTensor(action)
        reward = torch.FloatTensor(reward)
        done = torch.FloatTensor(done)

        q_values = self.model(state)
        next_q_values = self.model(next_state)
        q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1)
        next_q_value = next_q_values.max(1)[0]
        expected_q_value = reward + self.gamma * next_q_value * (1 - done)

        loss = nn.MSELoss()(q_value, expected_q_value)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # Decay epsilon
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

In [11]:
import networkx as nx
import math
# Initialize FANET environment
NUM_DRONES = 10
AREA_SIZE = 1000
COMM_RANGE = 200
G = nx.Graph()

for i in range(NUM_DRONES):
    x = random.uniform(0, AREA_SIZE)
    y = random.uniform(0, AREA_SIZE)
    G.add_node(i, pos=(x, y))

def update_positions(G):
    for node in G.nodes:
        x, y = G.nodes[node]['pos']
        x += random.uniform(-10, 10)
        y += random.uniform(-10, 10)
        x = max(0, min(x, AREA_SIZE))
        y = max(0, min(y, AREA_SIZE))
        G.nodes[node]['pos'] = (x, y)

def update_edges(G):
    G.clear_edges()
    nodes = list(G.nodes)
    for i in range(len(nodes)):
        for j in range(i + 1, len(nodes)):
            pos_i = G.nodes[nodes[i]]['pos']
            pos_j = G.nodes[nodes[j]]['pos']
            distance = ((pos_i[0] - pos_j[0])**2 + (pos_i[1] - pos_j[1])**2)**0.5
            if distance <= COMM_RANGE:
                G.add_edge(nodes[i], nodes[j])

# Define state and action spaces
state_size = NUM_DRONES * 3  # Position (x, y) and energy level for each drone
action_size = NUM_DRONES  # Choose next hop from neighbors

# Initialize Multi-Agent DQN system
multi_agent_dqn = MultiAgentDQN(NUM_DRONES, state_size, action_size)

# Simulation loop
for t in range(1000):
    update_positions(G)
    update_edges(G)

    # Define state (e.g., positions and energy levels)
    state = []
    for node in G.nodes:
        x, y = G.nodes[node]['pos']
        energy = G.nodes[node].get('energy', 100)
        state.extend([x, y, energy])

    # Each agent chooses an action
    actions = []
    for agent in multi_agent_dqn.agents:
        action = agent.act(state)
        actions.append(action)

    # Simulate reward (e.g., based on distance or energy)
    rewards = []
    #Corrected indentation for the for loop below
    for i, action in enumerate(actions):
        # Assuming you have a distance function defined elsewhere
        reward = -math.dist(G.nodes[i]['pos'], G.nodes[action]['pos']) # Use the distance function # Use math.dist to calculate the distance
        rewards.append(reward)

    # Define next state
    next_state = []
    for node in G.nodes:
        x, y = G.nodes[node]['pos']
        energy = G.nodes[node].get('energy', 100)
        next_state.extend([x, y, energy])

    # Store experience in replay buffer for each agent
    done = False  # Episode ends when the packet reaches the destination
    for i, agent in enumerate(multi_agent_dqn.agents):
        agent.memory.push(state, actions[i], rewards[i], next_state, done)

    # Train each agent
    for agent in multi_agent_dqn.agents:
        agent.train()

    print(f"Time {t}: Actions {actions}, Rewards {rewards}")

Time 0: Actions [7, 2, 8, 0, 7, 8, 1, 3, 2, 4], Rewards [-617.6935886852989, -579.9777745607267, -666.0035899905155, -953.2297392184424, -562.5972985852542, -820.4137755035189, -495.73054323584506, -344.36726064945697, -666.0035899905155, -1016.3476543382586]
Time 1: Actions [5, 7, 6, 2, 3, 8, 1, 0, 9, 1], Rewards [-1039.3625289060742, -106.3898998367047, -141.45025155307448, -844.3441958778362, -849.0823006516431, -827.2657373632779, -486.2777510576311, -621.2101962610415, -1196.3606974390327, -504.8058116973115]
Time 2: Actions [2, 0, 4, 0, 5, 6, 4, 2, 6, 7], Rewards [-258.83227940946716, -652.2005715054447, -477.1888291910966, -953.3838198769197, -780.9403667213896, -1003.8780620275152, -533.7991860361316, -507.49191491148645, -741.165080484296, -459.46089973376786]
Time 3: Actions [9, 1, 9, 3, 8, 9, 1, 8, 3, 4], Rewards [-1031.69158814812, -0.0, -856.2330750876268, -0.0, -218.59813531567244, -873.6458116024653, -491.7029803275293, -753.2523623282462, -995.0981231715359, -1002.75001