In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [2]:
from keras.models import Sequential

In [3]:
!pip install tensorflow



In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
from collections import deque
# Importing the necessary classes from Keras
from tensorflow import keras  # Import keras as a module
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
# Simulation Parameters
import numpy as np # Import numpy to use np alias
# Simulation Parameters
from collections import deque # Import deque from collections module
NUM_UAVS = 10
NUM_EPISODES = 100
MAX_STEPS = 200
LEARNING_RATE = 0.001
GAMMA = 0.95
EPSILON = 1.0
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.995
BATCH_SIZE = 32

# DQN Model
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.epsilon = EPSILON
        self.gamma = GAMMA
        self.model = self._build_model()

    def _build_model(self): # Corrected indentation here
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=LEARNING_RATE)) # Changed lr to learning_rate
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > EPSILON_MIN:
            self.epsilon *= EPSILON_DECAY

# Simplified FANET Environment
class FANETEnvironment:
    def __init__(self, num_uavs):
        self.num_uavs = num_uavs
        self.state_size = num_uavs * 2  # Example: [position_x, position_y] for each UAV
        self.action_size = num_uavs  # Actions: Select next hop UAV
        self.reset()

    def reset(self):
        self.uav_positions = np.random.rand(self.num_uavs, 2) * 100  # Random positions in 100x100 grid
        self.current_step = 0
        return self._get_state()

    def _get_state(self):
        return self.uav_positions.flatten().reshape(1, -1)

    def step(self, action):
        reward = 0
        done = False
        self.current_step += 1

        # Simplified reward: Higher reward for shorter distances to destination
        destination = np.array([100, 100])
        distance = np.linalg.norm(self.uav_positions[action] - destination)
        reward = 1 / (distance + 1e-5)

        # Simulate movement (random walk)
        self.uav_positions = np.random.rand(self.num_uavs, 2) * 100

        # Check if episode is done
        if self.current_step >= MAX_STEPS:
            done = True

        next_state = self._get_state()
        return next_state, reward, done

# Training the DQN Agent
def train_dqn_agent(env, agent, episodes):
    rewards = []
    pdr = []  # Packet Delivery Ratio
    delays = []  # End-to-End Delay

    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        total_delivered = 0
        total_delay = 0

        for step in range(MAX_STEPS):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            # Simulate packet delivery and delay
            if random.random() < 0.8:  # 80% chance of successful delivery
                total_delivered += 1
                total_delay += step

            if done:
                break

        # Calculate metrics
        pdr.append(total_delivered / MAX_STEPS)
        delays.append(total_delay / (total_delivered + 1e-5))
        rewards.append(total_reward)

        # Train the agent
        if len(agent.memory) > BATCH_SIZE:
            agent.replay(BATCH_SIZE)

        print(f"Episode: {episode + 1}, Reward: {total_reward}, PDR: {pdr[-1]}, Delay: {delays[-1]}")

    return rewards, pdr, delays

# Visualization
def plot_performance(rewards, pdr, delays):
    plt.figure(figsize=(12, 8))

    plt.subplot(3, 1, 1)
    plt.plot(rewards)
    plt.title("Reward Trend")
    plt.xlabel("Episode")
    plt.ylabel("Cumulative Reward")

    plt.subplot(3, 1, 2)
    plt.plot(pdr)
    plt.title("Packet Delivery Ratio (PDR)")
    plt.xlabel("Episode")
    plt.ylabel("PDR")

    plt.subplot(3, 1, 3)
    plt.plot(delays)
    plt.title("End-to-End Delay")
    plt.xlabel("Episode")
    plt.ylabel("Delay")

    plt.tight_layout()
    plt.show()

# Main Program
if __name__ == "__main__":
    env = FANETEnvironment(NUM_UAVS)
    agent = DQNAgent(env.state_size, env.action_size)
    rewards, pdr, delays = train_dqn_agent(env, agent, NUM_EPISODES)
    plot_performance(rewards, pdr, delays)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 407ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5