In [3]:
import os

import numpy as np
from stable_baselines3 import PPO, DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.logger import configure
from stable_baselines3.common.env_checker import check_env


from src.drone_env import DroneEnv
from src.utils import read_config

# Create log directory
log_dir = "logs/"
os.makedirs(log_dir, exist_ok=True)

# Configure logger to use TensorBoard
logger = configure(log_dir, ["stdout", "tensorboard"])

config = read_config("config.yaml")

def print_state(state):
    # Print 4 decimals, and a space if the number is positive for alignment
    print("State: ", [f"{x:.4f}" if x < 0 else f"{x:.4f} " for x in state])

Logging to logs/


In [None]:
env = DroneEnv(config, render_mode="human")
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset() # Get initial set of observations
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample() # Take a random action from the action space
        n_state, reward, done, info =  env.step(action) # Get new set of observations
        score+=reward
    print('Episode:{} Score:{}'.format(episode, round(score,2)))
env.close()

In [None]:
env.close()

In [4]:
env = DroneEnv(config)
check_env(env, warn=True)
env = DummyVecEnv([lambda: env])
model = DQN('MlpPolicy', env, tensorboard_log=log_dir)

In [5]:
model.learn(total_timesteps=1000000, progress_bar=True)

Output()

<stable_baselines3.dqn.dqn.DQN at 0x7f52386edd20>

In [6]:
model.save(os.path.join('Training', 'Saved Models', 'DQN_model_1M'))

In [7]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)




KeyboardInterrupt: 

In [8]:
env = DroneEnv(config, render_mode="human")
episodes = 5
for episode in range(1, episodes+1):
    obs, _ = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action, _ = model.predict(obs)
        obs, reward, done, _, info = env.step(action) # Get new set of observations
        score+=reward
    print('Episode:{} Score:{}'.format(episode, round(score,2)))
env.close()

KeyboardInterrupt: 