In [None]:
import gymnasium
from gymnasium import spaces
import numpy as np
import pandas as pd

class RadarTaskSchedulerEnv(gymnasium.Env):
    def __init__(self, task_file="task_scheduling_dataset.csv"):
        super(RadarTaskSchedulerEnv, self).__init__()

        self.tasks = pd.read_csv(task_file)
        self.current_task_idx = 1

        self.observation_space = spaces.Box(low=0, high=100, shape=(3,), dtype=np.int64)
        self.action_space = spaces.MultiDiscrete([3, 3, 3])  # [delay, compress, radar_choice]

    def reset(self,*, seed=None, options=None):
        super().reset(seed=seed)
        self.current_task_idx = 1
        obs= self._get_observation()
        return obs, {}

    def step(self, action):
        delay, compress, radar_choice = action
        task = self.tasks.iloc[self.current_task_idx]

        tns = min(task['Deadline'], task['Request_Time'] + delay)
        Pn = min(task['Max_Power'], task['Init_Power'] + compress)

        task_dropped = (tns > task['Deadline']) or (Pn > task['Max_Power'])
        reward = 0 if not task_dropped else -1

        self.current_task_idx += 1
        done = self.current_task_idx >= len(self.tasks)

        if not done:
            next_obs = self._get_observation()
        else:
            next_obs = (0, 0, 0)  # or any dummy terminal observation

        return next_obs, reward, done, False, {}

    def _get_observation(self):
        task = self.tasks.iloc[self.current_task_idx]
        return (int(task['Duration']), int(task['Deadline']), int(task['Init_Power']))


In [None]:
print("Radar Task Scheduler Environment initialized.")


In [27]:
import numpy as np
from collections import defaultdict

class QLearningAgent:
    def __init__(self,state_size, action_size, alpha=0.15, gamma=0.95, epsilon=0.1):
        self.q_table = defaultdict(lambda: np.zeros(action_size))
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.action_size = action_size
        self.state_size = state_size

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(self.action_size)
        return np.argmax(self.q_table[state])

    def update_q_value(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.gamma * self.q_table[next_state][best_next_action]
        td_error = td_target - self.q_table[state][action]
        self.q_table[state][action] += self.alpha * td_error


In [28]:
# import env_setup
# import agent as ag
import matplotlib.pyplot as plt

# Setup
env = RadarTaskSchedulerEnv("task_scheduling_dataset.csv")
print("Reset output:", env.reset())

agent = QLearningAgent(state_size=3,action_size=27)

# Helper to decode flat action index to [delay, compress, radar]
def decode_action(index):
    delay = index // 9
    compress = (index % 9) // 3
    radar = index % 3
    return [delay, compress, radar]

# Training loop
episodes = 10 
reward_log = []

for episode in range(episodes):
    state, info = env.reset()
    done = False
    total_reward = 0

    while not done:
        action_index = agent.choose_action(state)
        action = decode_action(action_index)

        next_state, reward, done, _, _ = env.step(action)
        agent.update_q_value(state, action_index, reward, next_state)

        state = next_state
        total_reward += reward

    reward_log.append(total_reward)
    print(f"Episode {episode + 1} — Total Reward: {total_reward}")

print("Training complete!")


Reset output: ((3, 86, 9), {})
Episode 1 — Total Reward: 999
Episode 2 — Total Reward: 999
Episode 3 — Total Reward: 999
Episode 4 — Total Reward: 999
Episode 5 — Total Reward: 999
Episode 6 — Total Reward: 999
Episode 7 — Total Reward: 999
Episode 8 — Total Reward: 999
Episode 9 — Total Reward: 999
Episode 10 — Total Reward: 999
Training complete!
