In [101]:
#multi-agent testrun using pettingzoo
#still need to update the actionset
#still need to update reward function
#trial to see if it works

#update the actions so that it selects the frequency range of a particular radar first, and then picks a sub-band from inside it
#fix the frequency hopping of the radar so that it only transmits one frequency per radar per step.

#after finalizing actions and rewards, test on vanilla dqn and sstart implementing ddqn with per

In [102]:
import numpy as np
import math
import gymnasium as gym
from gym import Env
from gym import spaces
import random
import numpy as np
from IPython.display import clear_output
import os
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from pettingzoo import ParallelEnv

is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

#device setup
device = torch.device(
    "cuda" if torch.cuda.is_available() else
    "mps" if torch.backends.mps.is_available() else
    "cpu"
)

In [103]:
hop_states = [9e9, 9e9 + 20e6, 9e9 + 40e6, 9e9 + 60e6, 9e9 + 80e6, 9e9 + 100e6, 9e9 + 120e6, 9e9 + 140e6]
jamming_bandwidths = [10e6, 20e6, 30e6]

In [104]:
from pettingzoo import ParallelEnv
from gym import spaces
import numpy as np

class ParallelRadarJammingEnv(ParallelEnv):
    def __init__(self, hop_states, jamming_bandwidths, max_hop_length=8):
        super().__init__()

        self.hop_states = hop_states
        self.low = self.hop_states[0]
        self.interval = self.hop_states[1] - self.hop_states[0]
        self.n_frequencies = len(hop_states)
        self.max_hop_length = max_hop_length

        # agent params, action spaces and observation spaces
        self.agents = ["radar_0", "radar_1", "radar_2", "radar_3"]
        self.possible_agents = self.agents[:]
        self.agent_name_mapping = {agent: i for i, agent in enumerate(self.agents)}

        self.jamming_bandwidths = jamming_bandwidths
        self.n_bandwidths = len(self.jamming_bandwidths)
        self.action_spaces = {agent: spaces.Discrete(self.max_hop_length * self.n_bandwidths) for agent in self.agents}

        self.observation_spaces = {
            agent: spaces.Box(low=0, high=1, shape=(self.n_frequencies,), dtype=np.float32) for agent in self.agents
        }

        # step and reward vars
        self.hopping_patterns = None
        self.current_frequencies = None
        self.current_steps = None
        self.cumulative_rewards = None
        self.net_agent_rewards = None
        self.terminations = None
        self.rewards = None
        self.infos = None

    def index_to_action(self, index):
        frequency_index = index // self.n_bandwidths
        bandwidth_index = index % self.n_bandwidths
        frequency = int(self.low + frequency_index * self.interval)
        bandwidth = self.jamming_bandwidths[bandwidth_index]
        return frequency, bandwidth


    def generate_hopping_pattern(self):
        return np.random.choice(self.hop_states, self.max_hop_length, replace=False)

    def next_frequency(self, agent):
        return self.hopping_patterns[agent][self.current_steps[agent] % self.max_hop_length]

    def reset(self):

        self.hopping_patterns = {agent: self.generate_hopping_pattern() for agent in self.agents}
        self.current_steps = {agent: 0 for agent in self.agents}
        self.current_frequencies = {agent: self.next_frequency(agent) for agent in self.agents}
        self.cumulative_rewards = {agent: 0 for agent in self.agents}
        self.terminations = {agent: False for agent in self.agents}
        self.rewards = {agent: 0 for agent in self.agents}
        self.infos = {agent: {} for agent in self.agents}

        observations = {agent: self.observe(agent) for agent in self.agents}
        return observations

    def observe(self, agent):

        #current radar freq being used is one-hot encoded
        observation = np.zeros(self.n_frequencies)
        freq_index = np.where(self.hop_states == self.current_frequencies[agent])[0][0]
        observation[freq_index] = 1
        return observation

    def step(self, actions):
        total_jammed_frequencies = 0
        self.net_agent_rewards = 0

        for agent, action in actions.items():
            print (self.next_frequency(agent))
            frequency, bandwidth = self.index_to_action(action)

            lower_bound = frequency - bandwidth/2
            upper_bound = frequency + bandwidth/2

            freq_index = np.where(self.hop_states == self.current_frequencies[agent])[0][0]
            current_frequency = self.hop_states[freq_index]

            if lower_bound <= current_frequency <= upper_bound:
                reward = 100
                total_jammed_frequencies += 1
            else:
                reward = -50

            if 2 * (bandwidth / self.interval) > 5:
                reward -= 1
            else:
                reward += 3

            jam_threshold = len(self.hop_states) / len(self.agents)

            if total_jammed_frequencies > jam_threshold:
                reward += 5
            else:
                reward -= 5

            self.current_steps[agent] += 1
            self.cumulative_rewards[agent] += reward
            self.net_agent_rewards += reward

            # num of steps
            if self.current_steps[agent] >= 100:
                self.terminations[agent] = True

            self.rewards[agent] = reward
            self.current_frequencies[agent] = self.next_frequency(agent)

        observations = {agent: self.observe(agent) for agent in self.agents}
        rewards = {agent: self.rewards[agent] for agent in self.agents}
        dones = {agent: self.terminations[agent] for agent in self.agents}
        infos = {agent: self.infos[agent] for agent in self.agents}

        return observations, rewards, dones, infos

    def render(self):
        for agent in self.agents:
            print(f"Agent {agent}, Step: {self.current_steps[agent]}, Frequency: {self.current_frequencies[agent]}")

In [105]:
env = ParallelRadarJammingEnv(hop_states, jamming_bandwidths)

observations = env.reset()
done = {agent: False for agent in env.agents}

cumulative_reward = {agent: 0 for agent in env.agents}

while not all(done.values()):

    actions = {agent: env.action_spaces[agent].sample() for agent in env.agents}
    observations, rewards, done, infos = env.step(actions)
    
    for agent in env.agents:
        cumulative_reward[agent] += rewards[agent]
    
    env.render()
    print(f"Actions: {actions}, Rewards: {rewards}, Cumulative Rewards: {cumulative_reward}")
    print('\n')


9120000000.0
9000000000.0
9100000000.0
9120000000.0
Agent radar_0, Step: 1, Frequency: 9020000000.0
Agent radar_1, Step: 1, Frequency: 9040000000.0
Agent radar_2, Step: 1, Frequency: 9080000000.0
Agent radar_3, Step: 1, Frequency: 9060000000.0
Actions: {'radar_0': 23, 'radar_1': 23, 'radar_2': 1, 'radar_3': 13}, Rewards: {'radar_0': -52, 'radar_1': -52, 'radar_2': -52, 'radar_3': -52}, Cumulative Rewards: {'radar_0': -52, 'radar_1': -52, 'radar_2': -52, 'radar_3': -52}


9020000000.0
9040000000.0
9080000000.0
9060000000.0
Agent radar_0, Step: 2, Frequency: 9100000000.0
Agent radar_1, Step: 2, Frequency: 9120000000.0
Agent radar_2, Step: 2, Frequency: 9120000000.0
Agent radar_3, Step: 2, Frequency: 9020000000.0
Actions: {'radar_0': 4, 'radar_1': 19, 'radar_2': 10, 'radar_3': 15}, Rewards: {'radar_0': 98, 'radar_1': -52, 'radar_2': -52, 'radar_3': -52}, Cumulative Rewards: {'radar_0': 46, 'radar_1': -104, 'radar_2': -104, 'radar_3': -104}


9100000000.0
9120000000.0
9120000000.0
90200000