### DQN

In [1]:
import gymnasium
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.evaluation import evaluate_policy
from utils.env import CogSatEnv


In [2]:
from utils.env import env_name
print(f"Using environment: {env_name}")

Using environment: NermineCogSatEnv-v1


In [3]:

import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

# set the seed
seed = 42

gymnasium.register(
    id='CogSatEnv-v1',  # Use the same ID here as you used in the script
    entry_point='env:CogSatEnv',
)

# Initialize the environment
env_id = "CogSatEnv-v1"
env = CogSatEnv()

In [4]:
env.reset(seed=seed)  # Reset the environment with the seed

++++===== ENV RESET+++===


({'utc_time': array([1744250400], dtype=int64),
  'freq_lgs_leo': array([13.,  1., 14.,  1., 14., 13.,  6., 14.,  7., 14.]),
  'freq_ggs_geo': array([ 2.,  5.,  8.,  9.,  1.,  7.,  6.,  3.,  4., 10.])},
 {})

In [5]:
env.intial_obs

{'utc_time': array([0], dtype=int64),
 'freq_lgs_leo': array([20.72350111, 20.93542428, 20.2704186 , 21.92923927, 20.52469689,
        21.63208769, 21.77370558, 21.36014224, 20.99820695, 21.48747991]),
 'freq_ggs_geo': array([21.9374196 , 21.37202516, 20.74557307, 20.63565233, 21.23009371,
        21.02947991, 21.24959189, 21.25677718, 21.0467088 , 21.903055  ])}

In [6]:
dummy_env = DummyVecEnv([lambda: env])  # Wrap the environment with DummyVecEnv

In [None]:

epoch_length = 180 ## got through experiment
epoch_numbers = 100

total_steps = epoch_length * epoch_numbers

# Optional: Check the environment
check_env(env, warn=True)

# Instantiate the model
model = DQN(
    policy="MultiInputPolicy",
    env=env,
    learning_rate=1e-4,
    buffer_size=50000,
    learning_starts=10,
    batch_size=16,
    tau=1.0,
    gamma=0.99,
    train_freq=4,
    target_update_interval=10,
    verbose=1
)

# Train the agent
model.learn(total_timesteps=total_steps)
# measure perofmance of training
# Save the model
model.save("dqn_cogsat")
env.close()




++++===== ENV RESET+++===
++++===== ENV RESET+++===
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-
Step Started
Action taken:  2
Current LEO User ID:  0
self.tIndex:  0
Current LEO Satellite ID:  583
Updated ChannelListLeo:  2.0
Next Observation:  {'utc_time': array([1744250400], dtype=int64), 'freq_lgs_leo': array([ 2.,  6.,  2.,  1.,  5.,  1.,  5., 13.,  8.,  3.]), 'freq_ggs_geo': array([ 6.,  7.,  9., 10.,  4.,  2.,  3.,  8.,  5.,  1.])}
SINR[:,self.tIndex]:  [-6.32873319 -2.59134506 -7.14761251 -5.38927408 -7.33616857 -7.55214991
 -6.25448873 -6.0028467  -4.69434058 -8.31473457 -4.45024485 -2.01230767
 -6.31963908 -6.21705849 -1.4946837  -6.89623708 -3.92703562 -6.02203655
 -5.47694978 -7.10600672]
Reward:  -111.53389343460168
*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-


In [None]:
# Run it in sepratae file
# Evaluate the agent
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward} +/- {std_reward}")