# Import all the necessary packages

In [1]:
import csv
from enum import IntEnum
import time
import jsonrpclib
import subprocess
from subprocess import PIPE, Popen
from threading  import Thread
import sys
import re
from collections import OrderedDict

import PySimpleGUI as sg

from gym import Env, error, spaces, utils
from stable_baselines3 import DQN, PPO, A2C, TD3, SAC
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import BaseCallback, CallbackList, CheckpointCallback, EvalCallback
from stable_baselines3.common import results_plotter
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy, plot_results
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import results_plotter
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy, plot_results

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import multivariate_normal

import os
import requests
import shutil
import tempfile
import xml.etree.ElementTree as ET
from io import StringIO, BytesIO

import cv2
import numpy as np
import torch
from PIL import Image
from IPython.display import clear_output
import gym
from cv2 import QRCodeDetector
from pyzbar import pyzbar

import olympe
from olympe.messages.ardrone3.Piloting import TakeOff, Landing, moveBy, PCMD, moveTo
from olympe.messages.ardrone3.PilotingState import FlyingStateChanged, PositionChanged, GpsLocationChanged, moveToChanged
from olympe.enums.ardrone3.PilotingState import FlyingStateChanged_State as FlyingState
from olympe.messages.ardrone3.GPSSettingsState import GPSFixStateChanged, HomeChanged
from olympe.messages.gimbal import set_target, attitude
from olympe.messages.camera import (
    set_camera_mode,
    set_photo_mode,
    take_photo,
    photo_progress,
)
from olympe.media import (
    media_created,
    resource_created,
    media_removed,
    resource_removed,
    resource_downloaded,
    indexing_state,
    delete_media,
    download_media,
    download_media_thumbnail,
    MediaEvent,
)

from pynput.keyboard import Listener, Key, KeyCode
from collections import defaultdict

olympe.log.update_config({
    "loggers": {
        "olympe": {
                "handlers": []
            }
        },
        "ulog": {
            "level": "OFF",
            "handlers": [],
        }
})

# Define the constants

In [2]:
DRONE_IP = os.environ.get("DRONE_IP", "10.202.0.1")
DRONE_MEDIA_PORT = os.environ.get("DRONE_MEDIA_PORT", "80")

ANAFI_URL = "http://{}/".format(DRONE_IP)
ANAFI_MEDIA_API_URL = ANAFI_URL + "api/v1/media/medias/"

# Define the classes

## Action

In [3]:
class Action:
    def __init__(self, drone):
        self.drone = drone
        self.home = self.drone.get_state(GpsLocationChanged)
        
        self.current_cell = self._get_cell(13)
        self.invalid_left_cells = [1, 6, 11, 16, 21]
        self.invalid_forward_cells = [1, 2, 3, 4, 5]
        self.invalid_right_cells = [5, 10, 15, 20, 25]
        self.invalid_backward_cells = [21, 22, 23, 24, 25]
        
        self.Move = IntEnum(
            'MOVE',
            'FORWARD BACKWARD LEFT RIGHT FORWARD_LEFT FORWARD_RIGHT BACKWARD_LEFT BACKWARD_RIGHT HOVER',
            start=0
        )
        
    def take_action(self, action):
        next_cell_id = self._get_next_cell_id(action)
        next_cell = self._get_cell(next_cell_id)
        
        old_cell_id, new_cell_id = self.current_cell["id"], next_cell["id"]
        if old_cell_id == new_cell_id: 
            return old_cell_id, new_cell_id, self._get_action_name(action)
        
        self._move_to_cell(next_cell)
        
        self.current_cell = next_cell
        
        return old_cell_id, new_cell_id, self._get_action_name(action)
        
    def reset(self):
        next_cell = self._get_cell(13)
        self._move_to_cell(next_cell)
        
        old_cell_id, new_cell_id = self.current_cell["id"], next_cell["id"]
        self.current_cell = next_cell
        
        return old_cell_id, new_cell_id
    
    def _get_cell(self, cell_id):
        return self._cell_coords[cell_id - 1]
    
    def _get_action_name(self, action):
        direction = str(self.Move(action)).split(".")[1]
        code = ""
        for i in direction.split("_"):
            if i != "":
                code += i[0].upper()
        
        return code
#         direction = str(self.Move(action)).split(".")[1].capitalize()
#         return "Moving " + direction if "hover" not in direction.lower() else "Hovering"
    
    def _move_to_cell(self, next_cell):        
        self.drone(
            moveTo(next_cell["latitude"],  next_cell["longitude"], next_cell["altitude"], "HEADING_DURING", 90.0)
            >> moveToChanged(status="DONE", _timeout=15)
        ).wait()
    
    def _get_next_cell_id(self, action):
        if action == self.Move.HOVER:
            return self.current_cell["id"]
        elif action == self.Move.LEFT:
            if self.current_cell["id"] in self.invalid_left_cells:
                return self.current_cell["id"]
            next_cell_id = self.current_cell["id"] - 1
        elif action == self.Move.RIGHT:
            if self.current_cell["id"] in self.invalid_right_cells:
                return self.current_cell["id"]
            next_cell_id = self.current_cell["id"] + 1
        elif action == self.Move.FORWARD:
            if self.current_cell["id"] in self.invalid_forward_cells:
                return self.current_cell["id"]
            next_cell_id = self.current_cell["id"] - 5
        elif action == self.Move.BACKWARD:
            if self.current_cell["id"] in self.invalid_backward_cells:
                return self.current_cell["id"]
            next_cell_id = self.current_cell["id"] + 5
        elif action == self.Move.FORWARD_RIGHT:
            if self.current_cell["id"] in self.invalid_forward_cells + self.invalid_right_cells:
                return self.current_cell["id"]
            next_cell_id = self.current_cell["id"] - 4
        elif action == self.Move.FORWARD_LEFT:
            if self.current_cell["id"] in self.invalid_forward_cells + self.invalid_left_cells:
                return self.current_cell["id"]
            next_cell_id = self.current_cell["id"] - 6
        elif action == self.Move.BACKWARD_RIGHT:
            if self.current_cell["id"] in self.invalid_backward_cells + self.invalid_right_cells:
                return self.current_cell["id"]
            next_cell_id = self.current_cell["id"] + 6
        elif action == self.Move.BACKWARD_LEFT:
            if self.current_cell["id"] in self.invalid_backward_cells + self.invalid_left_cells:
                return self.current_cell["id"]
            next_cell_id = self.current_cell["id"] + 4
            
        return next_cell_id
    
    @property
    def _cell_coords(self):
        altitude = 6.0
        dlong = 6.8e-5 # in degrees == 5 meters along x-axis (forward[+]-backward[-])
        dlat = 7.2e-5 # in degrees == 8 meters along y-axis (left[+]-right[-])
        
        home_lat = self.home["latitude"]
        home_long = self.home["longitude"]
        
        return [
            # cell no. 1
            OrderedDict([('id', 1),
                         ('latitude', home_lat + 2 * dlat),
                         ('longitude', home_long + 2 * dlong),
                         ('altitude', altitude)]),
            # cell no. 2
            OrderedDict([('id', 2),
                         ('latitude', home_lat + 1 * dlat),
                         ('longitude', home_long + 2 * dlong),
                         ('altitude', altitude)]),
            # cell no. 3
            OrderedDict([('id', 3),
                         ('latitude', home_lat + 0 * dlat),
                         ('longitude', home_long + 2 * dlong),
                         ('altitude', altitude)]),
            # cell no. 4
            OrderedDict([('id', 4),
                         ('latitude', home_lat + -1 * dlat),
                         ('longitude', home_long + 2 * dlong),
                         ('altitude', altitude)]),
            # cell no. 5
            OrderedDict([('id', 5),
                         ('latitude', home_lat + -2 * dlat),
                         ('longitude', home_long + 2 * dlong),
                         ('altitude', altitude)]),
            # cell no. 6
            OrderedDict([('id', 6),
                         ('latitude', home_lat + 2 * dlat),
                         ('longitude', home_long + 1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 7
            OrderedDict([('id', 7),
                         ('latitude', home_lat + 1 * dlat),
                         ('longitude', home_long + 1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 8
            OrderedDict([('id', 8),
                         ('latitude', home_lat + 0 * dlat),
                         ('longitude', home_long + 1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 9
            OrderedDict([('id', 9),
                         ('latitude', home_lat + -1 * dlat),
                         ('longitude', home_long + 1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 10
            OrderedDict([('id', 10),
                         ('latitude', home_lat + -2 * dlat),
                         ('longitude', home_long + 1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 11
            OrderedDict([('id', 11),
                         ('latitude', home_lat + 2 * dlat),
                         ('longitude', home_long + 0 * dlong),
                         ('altitude', altitude)]),
            # cell no. 12
            OrderedDict([('id', 12),
                         ('latitude', home_lat + 1 * dlat),
                         ('longitude', home_long + 0 * dlong),
                         ('altitude', altitude)]),
            # cell no. 13
            OrderedDict([('id', 13),
                         ('latitude', home_lat + 0 * dlat),
                         ('longitude', home_long + 0 * dlong),
                         ('altitude', altitude)]),
            # cell no. 14
            OrderedDict([('id', 14),
                         ('latitude', home_lat + -1 * dlat),
                         ('longitude', home_long + 0 * dlong),
                         ('altitude', altitude)]),
            # cell no. 15
            OrderedDict([('id', 15),
                         ('latitude', home_lat + -2 * dlat),
                         ('longitude', home_long + 0 * dlong),
                         ('altitude', altitude)]),
            # cell no. 16
            OrderedDict([('id', 16),
                         ('latitude', home_lat + 2 * dlat),
                         ('longitude', home_long + -1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 17
            OrderedDict([('id', 17),
                         ('latitude', home_lat + 1 * dlat),
                         ('longitude', home_long + -1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 18
            OrderedDict([('id', 18),
                         ('latitude', home_lat + 0 * dlat),
                         ('longitude', home_long + -1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 19
            OrderedDict([('id', 19),
                         ('latitude', home_lat + -1 * dlat),
                         ('longitude', home_long + -1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 20
            OrderedDict([('id', 20),
                         ('latitude', home_lat + -2 * dlat),
                         ('longitude', home_long + -1 * dlong),
                         ('altitude', altitude)]),
            # cell no. 21
            OrderedDict([('id', 21),
                         ('latitude', home_lat + 2 * dlat),
                         ('longitude', home_long + -2 * dlong),
                         ('altitude', altitude)]),
            # cell no. 22
            OrderedDict([('id', 22),
                         ('latitude', home_lat + 1 * dlat),
                         ('longitude', home_long + -2 * dlong),
                         ('altitude', altitude)]),
            # cell no. 23
            OrderedDict([('id', 23),
                         ('latitude', home_lat + 0 * dlat),
                         ('longitude', home_long + -2 * dlong),
                         ('altitude', altitude)]),
            # cell no. 24
            OrderedDict([('id', 24),
                         ('latitude', home_lat + -1 * dlat),
                         ('longitude', home_long + -2 * dlong),
                         ('altitude', altitude)]),
            # cell no. 25
            OrderedDict([('id', 25),
                         ('latitude', home_lat + -2 * dlat),
                         ('longitude', home_long + -2 * dlong),
                         ('altitude', altitude)]),
            ]
        
    def __len__(self):
        return len(self.Move)

## Drone

In [4]:
class Drone:
    def __init__(self, drone_ip, num_targets, max_timestep, is_training=False):
        self.drone = olympe.Drone(drone_ip)
        self.drone.connect()
        
        self.drone(GPSFixStateChanged(_policy = 'wait'))
        self._takeoff()
        if not is_training: self._setup_camera()

        self.action = Action(self.drone)
        
        self.is_training = is_training
        self.num_targets = num_targets
        self.max_timestep = max_timestep
        self.timestep = 0
        self.visited_targets = np.zeros(self.num_targets, dtype=bool)
        self.target_positions = np.zeros(self.num_targets, dtype=np.uint8)
    
    def take_action(self, action):
        old_cell, new_cell, action_name = self.action.take_action(action)
        self.timestep += 1
        detected_targets = self._detect_targets(new_cell)
        
        reward = self._get_reward(detected_targets) # !!! _get_reward must come before self.visited_targets is changed in _get_state
        state = self._get_state(new_cell, detected_targets) 
        if self.timestep >= self.max_timestep or np.all(self.visited_targets):
            done = True
            self.visited_targets[:] = False
        else:
            done = False
        info = {
            "action": str(action_name), 
            "direction": "Cell " + str(old_cell) + " --> " + "Cell " + str(new_cell)
        }
        
        return state, reward, done, info

    def reset(self):
        old_cell, new_cell = self.action.reset()
        detected_targets = self._detect_targets(new_cell)
        self.timestep = 0
        return self._get_state(new_cell, detected_targets)
    
    def _get_state(self, new_cell, detected_targets):
        # {t, cell_id, [I1, I2, I3, ..., In]}
        
        self.visited_targets[detected_targets] = True
        
        return np.concatenate(([self.timestep, new_cell], self.visited_targets)).astype(np.uint8)
    
    def _get_reward(self, detected_targets):
        reward_scale = 1.5
        num_new_targets = np.count_nonzero(
            detected_targets & (detected_targets != self.visited_targets)
        )
        return reward_scale * num_new_targets if num_new_targets > 0 else -1
        
    def _detect_targets(self, cell_id):
        return self.target_positions == cell_id
        
        if self.is_training:
            positions = np.genfromtxt('target_positions.csv', delimiter=',', skip_header=1, dtype=np.uint8)
            return positions[:,1] == cell_id
        
        detected_targets = np.zeros(self.num_targets, dtype=bool)

        img = self._take_photo()
        if img is None:
            return detected_targets
        
        for result in pyzbar.decode(img):
            idx = int(result.data) - 1
            try:
                detected_targets[idx] = True
            except (ValueError, IndexError):
                pass
                
        return detected_targets

    def _setup_camera(self):
        assert self.drone.media_autoconnect
        self.drone.media.integrity_check = True
        is_indexed = False
        while not is_indexed:
            is_indexed = self.drone.media(
                indexing_state(state="indexed")
            ).wait(_timeout=5).success()
        
        self.drone(set_camera_mode(cam_id=0, value="photo")).wait()

        assert self.drone(
            set_photo_mode(
                cam_id=0,
                mode="single",
                format= "rectilinear",
                file_format="jpeg",
                # the following are ignored in photo single mode
                burst="burst_14_over_1s",
                bracketing="preset_1ev",
                capture_interval=5.0,
            )
        ).wait().success()

        assert self.drone(
            set_target(
                gimbal_id=0,
                control_mode="position",
                yaw_frame_of_reference="none",
                yaw=0.0,
                pitch_frame_of_reference="absolute",
                pitch=-90.0,
                roll_frame_of_reference="none",
                roll=0.0,
                )
            >> attitude(
                pitch_absolute=-90.0, _policy="wait", _float_tol=(1e-3, 1e-1)
                )
            ).wait(_timeout=20).success()
    
    def _take_photo(self):
        photo_saved = self.drone(photo_progress(result="photo_saved", _policy="wait"))
        self.drone(take_photo(cam_id=0)).wait()
        
        photo_taken = False
        tries = 0
        while not photo_taken:
            tries += 1
            if tries > 3:
#                 assert False, "take_photo timedout"
                print("take_photo timedout")
                return None
            photo_taken = photo_saved.wait(_timeout=5).success()
            
        # get the bytes of the image
        media_id = photo_saved.received_events().last().args["media_id"]
        for _ in range(5):
            media_info_response = requests.get(ANAFI_MEDIA_API_URL + media_id, timeout=10)
            if media_info_response.status_code == 200:
                break
        try:
            media_info_response.raise_for_status()
        except requests.exceptions.HTTPError as err:
            print(err)
            return None
        
        resource = media_info_response.json()["resources"][0]
        image_response = requests.get(ANAFI_URL + resource["url"], stream=True)
        image_response.raise_for_status()
        
        img = Image.open(BytesIO(image_response.content))
        
        # delete the image stored on the drone
        photo_deleted = False
        delete_tries = 0
        while not photo_deleted:
            delete_tries += 1
            if delete_tries > 3:
#                 assert False, "Failed to delete media {} {}".format(media_id, delete.explain())
                print("Failed to delete media {} {}".format(media_id, delete.explain()))
                break
            delete = delete_media(media_id, _timeout=10)
            photo_deleted = self.drone.media(delete).wait().success()
        
        return img
    
    def _takeoff(self):
        takeoff_success = self._success_if_takeoff()
        if not takeoff_success:
            print("Retrying taking off...")
            takeoff_success = self._success_if_takeoff()
    
    def _success_if_takeoff(self):
        return self.drone(
                FlyingStateChanged(state="hovering")
                | (TakeOff() & FlyingStateChanged(state="hovering"))
            ).wait(10).success()
        
    def _land(self):
        self.drone(PCMD(1, 0, 0, 0, 0, 0) >> FlyingStateChanged(state="hovering", _timeout=5)).wait()
        assert self.drone(Landing() >> FlyingStateChanged(state="landed")).wait().success()
        
    def __del__(self):
        self._land()
        self.drone.disconnect()
        del state
        del reward
        del action

## Simulation

In [5]:
def find_distance(x, y):
    x = x - y[:,np.newaxis]
    x = x**2
    x = x.sum(axis=2)
    x = np.sqrt(x)
    return x

# check if the target overlaps with 
# another target or border
def is_overlapping(x, y=None, limit=0.5):
    # check with another target
    if y is None: y = x
    x = find_distance(x, y) < limit
    x = np.triu(x, k=1)
    
    # check with the borders
    borders_parallel_y = np.array([12.5, 7.5, 2.5, -2.5, -7.5, -12.5])
    x1 = abs(x[:,0] - borders_parallel_y[:,np.newaxis]) < 0.4

    borders_parallel_x = np.array([20, 12, 4, -4, -12, -20])
    x2 = abs(x[:,1] - borders_parallel_x[:,np.newaxis]) < 0.4
    
    x = np.vstack((x, x1, x2))
    x = np.any(x, axis=0)
    return x

In [6]:
num_targets=10
mu_x=-7.5 
variance_x=3
mu_y=-11
variance_y=5

distribution = multivariate_normal([mu_x, mu_y], [[variance_x, 0], [0, variance_y]])
locs = distribution.rvs(size=num_targets)

overlapping_targets = is_overlapping(locs)
while np.any(overlapping_targets):
    num_overlaps = overlapping_targets[overlapping_targets].size
    locs[overlapping_targets] = distribution.rvs(size=num_overlaps)
    overlapping_targets = is_overlapping(locs)

In [122]:
num_overlaps = overlapping_targets[overlapping_targets].size
num_overlaps

0

In [113]:
locs

array([[ -8.89833982,  -9.2297941 ],
       [ -6.14338843,  -7.05054842],
       [ -9.39296298, -10.29139529],
       [ -2.49084933, -12.58398566],
       [ -7.80841139, -11.54652836],
       [ -6.34247422, -13.69616478],
       [ -9.47697081, -13.13073038],
       [ -6.87047567,  -6.11557153],
       [ -8.49369027, -16.85767925],
       [-10.40676275, -16.74391154]])

In [114]:
locs[overlapping_targets] = distribution.rvs(size=num_overlaps)

In [115]:
locs

array([[ -8.89833982,  -9.2297941 ],
       [ -6.14338843,  -7.05054842],
       [ -9.42833748, -16.50998974],
       [ -2.49084933, -12.58398566],
       [ -5.75448539, -11.38370156],
       [ -6.34247422, -13.69616478],
       [ -6.54538078,  -9.91008558],
       [ -6.26700056,  -7.63354395],
       [ -8.49369027, -16.85767925],
       [-10.40676275, -16.74391154]])

In [110]:
overlapping_targets = is_overlapping(locs)

In [111]:
overlapping_targets

array([False, False,  True, False,  True, False,  True,  True, False,
       False])

In [7]:
class Simulation:
    sphinx = jsonrpclib.Server('http://127.0.0.1:8383')
    
    def __init__(self):
        pass
    
    @staticmethod
    def disable_battery():
        Simulation.sphinx.SetParam(machine='anafi4k',
                                   object='lipobattery/lipobattery',
                                   parameter='discharge_speed_factor',
                                   value='0')
    
    @staticmethod
    def reset_world():
        Simulation.sphinx.TriggerAction(machine='world',
                                        object='fwman/fwman',
                                        action='world_reset_all')
    
    def find_distance(x, y):
        x = x - y[:,np.newaxis]
        x = x**2
        x = x.sum(axis=2)
        x = np.sqrt(x)
        return x

    # check if the target overlaps with another target or border
    @classmethod
    def is_overlapping(cls, x, y=None, limit=0.8):
        # check with the borders
        borders_parallel_y = np.array([12.5, 7.5, 2.5, -2.5, -7.5, -12.5])
        x1 = abs(x[:,0] - borders_parallel_y[:,np.newaxis]) < 0.4

        borders_parallel_x = np.array([20, 12, 4, -4, -12, -20])
        x2 = abs(x[:,1] - borders_parallel_x[:,np.newaxis]) < 0.4
        
        # check with another target
        if y is None: y = x
        x = cls.find_distance(x, y) < limit
        x = np.triu(x, k=1)

        x = np.vstack((x, x1, x2))
        x = np.any(x, axis=0)
        return x
    
    @classmethod
    def gen_targets_pos(cls, num_targets, mu_x=-7.5, variance_x=3, mu_y=-11, variance_y=5):
#         locs = multivariate_normal([mu_x, mu_y], [[variance_x, 0], [0, variance_y]]).rvs(size=num_targets)
        
        distribution = multivariate_normal([mu_x, mu_y], [[variance_x, 0], [0, variance_y]])
        locs = distribution.rvs(size=num_targets)

        overlapping_targets = cls.is_overlapping(locs)
        while np.any(overlapping_targets):
            num_overlaps = overlapping_targets[overlapping_targets].size
            locs[overlapping_targets] = distribution.rvs(size=num_overlaps)
            overlapping_targets = cls.is_overlapping(locs)
        
        np.savetxt('../comm/positions.csv', 
                   np.hstack((np.arange(len(locs))[:,np.newaxis] + 1, locs)),
                   fmt='%.4f',
                   delimiter=',',
                  )
        
        cell_boundaries = [
           (locs[:,0] > 7.5) & (locs[:,1] > 12),
           (locs[:,0] > 7.5) & (locs[:,1] > 4),
           (locs[:,0] > 7.5) & (locs[:,1] > -4),
           (locs[:,0] > 7.5) & (locs[:,1] > -12),
           (locs[:,0] > 7.5) & (locs[:,1] > -20),
           (locs[:,0] > 2.5) & (locs[:,1] > 12),
           (locs[:,0] > 2.5) & (locs[:,1] > 4),
           (locs[:,0] > 2.5) & (locs[:,1] > -4),
           (locs[:,0] > 2.5) & (locs[:,1] > -12),
           (locs[:,0] > 2.5) & (locs[:,1] > -20),
           (locs[:,0] > -2.5) & (locs[:,1] > 12),
           (locs[:,0] > -2.5) & (locs[:,1] > 4),
           (locs[:,0] > -2.5) & (locs[:,1] > -4),
           (locs[:,0] > -2.5) & (locs[:,1] > -12),
           (locs[:,0] > -2.5) & (locs[:,1] > -20),
           (locs[:,0] > -7.5) & (locs[:,1] > 12),
           (locs[:,0] > -7.5) & (locs[:,1] > 4),
           (locs[:,0] > -7.5) & (locs[:,1] > -4),
           (locs[:,0] > -7.5) & (locs[:,1] > -12),
           (locs[:,0] > -7.5) & (locs[:,1] > -20),
           (locs[:,0] > -12.5) & (locs[:,1] > 12),
           (locs[:,0] > -12.5) & (locs[:,1] > 4),
           (locs[:,0] > -12.5) & (locs[:,1] > -4),
           (locs[:,0] > -12.5) & (locs[:,1] > -12),
           (locs[:,0] > -12.5) & (locs[:,1] > -20),
        ]
        cell_ids = np.arange(25) + 1
        
        return np.select(cell_boundaries, cell_ids), locs
#         return locs
    
    @staticmethod
    def cease_targets():
        f = open("../comm/toggle_movement.txt", "w")
        f.write("0")
        f.close()
    
    @staticmethod
    def move_targets():
        f = open("../comm/toggle_movement.txt", "w")
        f.write("1")
        f.close()
        
    @staticmethod
    def reset_targets(release=False):
        fmobile = open("../comm/reset_position.txt", "w")
#         fstatic = open("../plugins/static_target/reset_position.txt", "w")
        if not release:
            fmobile.write("1\n")
#             fstatic.write("1\n")
        else:
            fmobile.write("0\n")
#             fstatic.write("0\n")
        fmobile.close()
#         fstatic.close()
        

In [9]:
Simulation.gen_targets_pos(10)

array([[-10.46458845, -10.01633378],
       [ -6.24402812, -10.54155127],
       [ -6.08411325,  -6.51080688],
       [ -8.804801  , -11.15770231],
       [ -8.34083635,  -9.8827977 ],
       [ -5.9115347 ,  -9.73495238],
       [ -6.67194236, -14.80068425],
       [-11.51665774, -12.71224588],
       [ -6.25879009,  -8.87102503],
       [ -9.48866335, -12.54896677]])

In [167]:
Simulation.reset_targets(release=True)

In [168]:
Simulation.reset_targets()

In [187]:
Simulation.move_targets()

In [185]:
Simulation.cease_targets()

In [186]:
# for _ in range(5):
cells, locs = Simulation.gen_targets_pos(10)
Simulation.reset_targets()
time.sleep(0.1)
Simulation.reset_targets(release=True)
#     time.sleep(1)
# for _ in range(10):
#     print(sorted(Simulation.gen_targets_pos(10)))

In [163]:
7.5-7.17883494

0.3211650600000002

In [87]:
test

array([[ -6.53392003,  -9.38427206],
       [ -8.08757585, -12.59822305],
       [ -9.04958566, -12.05622549],
       [ -6.71018167,  -9.60039297],
       [ -7.8884485 ,  -9.59102998],
       [ -9.29949368, -13.57839219],
       [ -7.84492359, -13.40426512],
       [ -7.17883494, -12.22309897],
       [ -8.51418777, -10.8921918 ],
       [ -7.07184698, -11.1778088 ]])

In [3]:
test_A = np.array([
    [ -6.53392003,  -9.38427206],
    [ -8.08757585, -12.59822305],
    [ -9.04958566, -12.05622549],
    [ -6.71018167,  -9.60039297],
    [ -7.8884485 ,  -9.59102998],
    [ -9.29949368, -13.57839219],
    [ -7.84492359, -13.40426512],
    [ -7.17883494, -12.22309897],
    [ -8.51418777, -10.8921918 ],
    [ -7.07184698, -11.1778088 ]
])

In [4]:
test = test_A

In [7]:
norm = np.linalg.norm

p1 = np.array([-7.5, 4])
p2 = np.array([-7.5, -20])

p3 = np.array([-7.8884485, -9.59102998])
d = np.abs(norm(np.cross(p2-p1, p1-p3)))/norm(p2-p1)

In [15]:
y_parallel_borders = np.array(test)

In [24]:
y_parallel_borders = np.array([
    [12.5, 7.5, 2.5, -2.5, -7.5, -12.5]
])

x_parallel_borders = np.array([
    [20, 12, 4, -4, -12, -20]
])

In [33]:
find_distance(y_parallel_borders, test[:,0])

AxisError: axis 2 is out of bounds for array of dimension 2

In [38]:
y_parallel_borders.ravel()

array([ 12.5,   7.5,   2.5,  -2.5,  -7.5, -12.5])

In [49]:
abs(test[:,0] - y_parallel_borders.ravel()[:,np.newaxis])

array([[19.03392003, 20.58757585, 21.54958566, 19.21018167, 20.3884485 ,
        21.79949368, 20.34492359, 19.67883494, 21.01418777, 19.57184698],
       [14.03392003, 15.58757585, 16.54958566, 14.21018167, 15.3884485 ,
        16.79949368, 15.34492359, 14.67883494, 16.01418777, 14.57184698],
       [ 9.03392003, 10.58757585, 11.54958566,  9.21018167, 10.3884485 ,
        11.79949368, 10.34492359,  9.67883494, 11.01418777,  9.57184698],
       [ 4.03392003,  5.58757585,  6.54958566,  4.21018167,  5.3884485 ,
         6.79949368,  5.34492359,  4.67883494,  6.01418777,  4.57184698],
       [ 0.96607997,  0.58757585,  1.54958566,  0.78981833,  0.3884485 ,
         1.79949368,  0.34492359,  0.32116506,  1.01418777,  0.42815302],
       [ 5.96607997,  4.41242415,  3.45041434,  5.78981833,  4.6115515 ,
         3.20050632,  4.65507641,  5.32116506,  3.98581223,  5.42815302]])

In [53]:
np.any(abs(test[:,0] - y_parallel_borders.ravel()[:,np.newaxis]) < 0.4, axis=0)

array([False, False, False, False,  True, False,  True,  True, False,
       False])

In [36]:
abs(test[:,0][:,np.newaxis] - y_parallel_borders) < 0.5

array([[False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False,  True, False],
       [False, False, False, False, False, False],
       [False, False, False, False,  True, False],
       [False, False, False, False,  True, False],
       [False, False, False, False, False, False],
       [False, False, False, False,  True, False]])

In [51]:
abs(test[:,0][:,np.newaxis] - y_parallel_borders)

array([[19.03392003, 14.03392003,  9.03392003,  4.03392003,  0.96607997,
         5.96607997],
       [20.58757585, 15.58757585, 10.58757585,  5.58757585,  0.58757585,
         4.41242415],
       [21.54958566, 16.54958566, 11.54958566,  6.54958566,  1.54958566,
         3.45041434],
       [19.21018167, 14.21018167,  9.21018167,  4.21018167,  0.78981833,
         5.78981833],
       [20.3884485 , 15.3884485 , 10.3884485 ,  5.3884485 ,  0.3884485 ,
         4.6115515 ],
       [21.79949368, 16.79949368, 11.79949368,  6.79949368,  1.79949368,
         3.20050632],
       [20.34492359, 15.34492359, 10.34492359,  5.34492359,  0.34492359,
         4.65507641],
       [19.67883494, 14.67883494,  9.67883494,  4.67883494,  0.32116506,
         5.32116506],
       [21.01418777, 16.01418777, 11.01418777,  6.01418777,  1.01418777,
         3.98581223],
       [19.57184698, 14.57184698,  9.57184698,  4.57184698,  0.42815302,
         5.42815302]])

In [23]:
y_parallel_borders

array([[-6.53392003,  1.        ],
       [-8.08757585,  2.        ],
       [-9.04958566,  3.        ],
       [-6.71018167,  4.        ],
       [-7.8884485 ,  5.        ],
       [-9.29949368,  6.        ],
       [-7.84492359,  7.        ],
       [-7.17883494,  8.        ],
       [-8.51418777,  9.        ],
       [-7.07184698, 10.        ]])

In [None]:
           (locs[:,0] > 7.5) & (locs[:,1] > 12),
           (locs[:,0] > 7.5) & (locs[:,1] > 4),
           (locs[:,0] > 7.5) & (locs[:,1] > -4),
           (locs[:,0] > 7.5) & (locs[:,1] > -12),
           (locs[:,0] > 7.5) & (locs[:,1] > -20),
           (locs[:,0] > 2.5) & (locs[:,1] > 12),
           (locs[:,0] > 2.5) & (locs[:,1] > 4),
           (locs[:,0] > 2.5) & (locs[:,1] > -4),
           (locs[:,0] > 2.5) & (locs[:,1] > -12),
           (locs[:,0] > 2.5) & (locs[:,1] > -20),
           (locs[:,0] > -2.5) & (locs[:,1] > 12),
           (locs[:,0] > -2.5) & (locs[:,1] > 4),
           (locs[:,0] > -2.5) & (locs[:,1] > -4),
           (locs[:,0] > -2.5) & (locs[:,1] > -12),
           (locs[:,0] > -2.5) & (locs[:,1] > -20),
           (locs[:,0] > -7.5) & (locs[:,1] > 12),
           (locs[:,0] > -7.5) & (locs[:,1] > 4),
           (locs[:,0] > -7.5) & (locs[:,1] > -4),
           (locs[:,0] > -7.5) & (locs[:,1] > -12),
           (locs[:,0] > -7.5) & (locs[:,1] > -20),
           (locs[:,0] > -12.5) & (locs[:,1] > 12),
           (locs[:,0] > -12.5) & (locs[:,1] > 4),
           (locs[:,0] > -12.5) & (locs[:,1] > -4),
           (locs[:,0] > -12.5) & (locs[:,1] > -12),
           (locs[:,0] > -12.5) & (locs[:,1] > -20),
        ]

In [58]:
a = np.array([[1,2], [3,4]])
b = np.array([[5,6], [7,8]])

In [60]:
np.vstack((a, b))

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [10]:
len(test)

10

In [8]:
d

0.3884485

In [63]:
borders_parallel_x = np.array([20, 12, 4, -4, -12, -20])
    
x2 = abs(test[:,1] - borders_parallel_x[:,np.newaxis]) < 0.4

In [64]:
x2

array([[False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False],
       [False, False,  True, False, False, False, False,  True, False,
        False],
       [False, False, False, False, False, False, False, False, False,
        False]])

In [56]:
is_overlapping(test)

[[0.         3.56977974 3.66986494 0.27888423 1.37021758 5.02384726
  4.22836547 2.91116016 2.4890324  1.87246881]
 [3.56977974 0.         1.10418487 3.29912108 3.01377867 1.55867776
  0.84177428 0.98312162 1.75856201 1.74621928]
 [3.66986494 1.10418487 0.         3.39174356 2.72496393 1.54254513
  1.80787764 1.87817864 1.28125928 2.16403932]
 [0.27888423 3.29912108 3.39174356 0.         1.17830403 4.7464739
  3.96951918 2.66424898 2.21882451 1.6183456 ]
 [1.37021758 3.01377867 2.72496393 1.17830403 0.         4.22966971
  3.81348353 2.72604816 1.4438046  1.7845742 ]
 [5.02384726 1.55867776 1.54254513 4.7464739  4.22966971 0.
  1.46495542 2.51674655 2.79863858 3.27493671]
 [4.22836547 0.84177428 1.80787764 3.96951918 3.81348353 1.46495542
  0.         1.35603376 2.59969746 2.35685281]
 [2.91116016 0.98312162 1.87817864 2.66424898 2.72604816 2.51674655
  1.35603376 0.         1.88533315 1.05075114]
 [2.4890324  1.75856201 1.28125928 2.21882451 1.4438046  2.79863858
  2.59969746 1.885333

array([False, False, False,  True, False, False, False, False, False,
       False])

In [29]:
A = np.array([[2, 3], [5, 10]])
B = np.array([3, 10, 11, 12])

In [94]:
find_distance(test, np.array([[0, -7]]))

[[ 6.95534788  9.83610616 10.36631163  7.1964284   8.3030751  11.3910415
  10.12706462  8.87786202  9.36165319  8.21371451]]


array([[ 6.95534788,  9.83610616, 10.36631163,  7.1964284 ,  8.3030751 ,
        11.3910415 , 10.12706462,  8.87786202,  9.36165319,  8.21371451]])

In [65]:
def find_distance(x, y):
    x = x - y[:,np.newaxis]
    x = x**2
    x = x.sum(axis=2)
    x = np.sqrt(x)
#     print(x)
    return x

# check if the target overlaps with 
# another target or border
def is_overlapping(x, y=None, limit=0.5):
    # check with another target
    if y is None: y = x
    x = find_distance(x, y) < limit
    x = np.triu(x, k=1)
    
    # check with the borders
    borders_parallel_y = np.array([12.5, 7.5, 2.5, -2.5, -7.5, -12.5])
    x1 = abs(test[:,0] - borders_parallel_y[:,np.newaxis]) < 0.4

    borders_parallel_x = np.array([20, 12, 4, -4, -12, -20])
    x2 = abs(test[:,1] - borders_parallel_x[:,np.newaxis]) < 0.4
    
#     print(x)
    
    x = np.vstack((x, x1, x2))
    x = np.any(x, axis=0)
    return x

In [67]:
np.any(is_overlapping(test))

True

In [53]:
E = D**2
E

array([[[ 0,  0],
        [ 9, 49]],

       [[ 9, 49],
        [ 0,  0]]])

In [57]:
F = E.sum(axis=2)
F

array([[ 0, 58],
       [58,  0]])

In [58]:
G = np.sqrt(F)
G

array([[0.        , 7.61577311],
       [7.61577311, 0.        ]])

In [68]:
np.sqrt((0.35**2) + (0.35**2))

0.49497474683058323

In [72]:
np.any(np.triu(find_distance(test) < 0.5, k=1), axis=0)

array([False, False, False,  True, False, False, False, False, False,
       False])

In [37]:
A**2

array([[  4,   9],
       [ 25, 100]])

In [41]:
(A**2).sum(axis=1)

array([ 13, 125])

In [42]:
np.sqrt((A**2).sum(axis=1))

array([ 3.60555128, 11.18033989])

In [25]:
C = A == B[:,np.newaxis]

In [26]:
C

array([[False,  True, False, False],
       [False, False, False,  True],
       [False, False, False, False],
       [False, False, False, False]])

In [28]:
np.any(C, axis=0)

array([False,  True, False,  True])

In [16]:
A[:,np.newaxis]

array([[2],
       [3],
       [5]])

# Define a Gym Environment

In [8]:
class AnafiEnv(Env):
    def __init__(self, num_targets, max_timestep, drone_ip="10.202.0.1", is_training=False):
        super(AnafiEnv, self).__init__()
        
        Simulation.disable_battery()
#         Simulation.cease_targets()
        
        self.num_targets = num_targets
        self.max_timestep = max_timestep
        self.begin(num_targets, max_timestep, is_training, drone_ip)
        
        self.action_space = spaces.Discrete(len(self.agent.action))
        self.observation_space = spaces.Box( # {t, cell_id, [I1, I2, I3, ..., In]}
            low=np.array([0, 1] + num_targets*[0]), 
            high=np.array([max_timestep, 25] + num_targets*[1]), 
            dtype=np.uint8,
        )
        
#         Simulation.move_targets()
    
    def begin(self, num_targets, max_timestep, is_training, drone_ip="10.202.0.1"):
        self.agent = Drone(drone_ip, num_targets, max_timestep, is_training)
    
    def step(self, action):
        obs, reward, done, info = self.agent.take_action(action)
        
        return obs, reward, done, info
    
    def reset(self):
#         Simulation.reset_targets()
        self.agent.target_positions = Simulation.gen_targets_pos(self.num_targets)
        return self.agent.reset()
    
    def render(self, mode='human'):
        pass
    
    def close(self):
#         Simulation.cease_targets()
        del self.agent

# Run the simulation

In [9]:
def disp_info(action, observation, reward, done, info):
#     clear_output(wait=True)
    print("Action:", info["action"] + ",", info["direction"])
    print("State:", observation)
    print("Reward:", reward)
#     down_scale = 3
#     display(img.resize((img.size[0]//down_scale, img.size[1]//down_scale)))

In [12]:
try:
    del env
except Exception as e:
    pass

env = AnafiEnv(num_targets=10, max_timestep=5, is_training=True)
observation = env.reset()
# actions = [7, 4, 7, 3, 1, 2, 8, 4, 1, 5, 6]
for i in range(1000):
    action = env.action_space.sample()
#     action = actions[i % len(actions)]
    observation, reward, done, info = env.step(action)
    disp_info(action, observation, reward, done, info)

    if done:
        print("The episode has ended. Resetting environment...")
        observation = env.reset()
        disp_info(action, observation, 0, done, info)

env.close()

Action: BR, Cell 13 --> Cell 19
State: [ 1 19  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: F, Cell 19 --> Cell 14
State: [ 2 14  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BR, Cell 14 --> Cell 20
State: [ 3 20  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BR, Cell 20 --> Cell 20
State: [ 4 20  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: R, Cell 20 --> Cell 20
State: [ 5 20  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended. Resetting environment...
Action: R, Cell 20 --> Cell 20
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: BL, Cell 13 --> Cell 17
State: [ 1 17  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BL, Cell 17 --> Cell 21
State: [ 2 21  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: B, Cell 21 --> Cell 21
State: [ 3 21  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: F, Cell 21 --> Cell 16
State: [ 4 16  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: B, Cell 16 --> Cell 21
State: [ 5 21  0  0  0  0  0  0  0  0  0  0]
Re

2022-03-24 05:43:26,323 [31m[ERROR] [0m	ulog - pomp - epoll_ctl(fd=105) err=9(Bad file descriptor)[0m
2022-03-24 05:43:26,324 [31m[ERROR] [0m	ulog - pomp - epoll_ctl op=2 cb=0x7ff731c633c8 userdata=0x7ff5cc414f10[0m


Action: L, Cell 7 --> Cell 6
State: [3 6 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: F, Cell 6 --> Cell 1
State: [4 1 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: FL, Cell 1 --> Cell 1
State: [5 1 0 0 0 0 0 0 0 0 0 0]
Reward: -1
The episode has ended. Resetting environment...
Action: FL, Cell 1 --> Cell 1
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: L, Cell 13 --> Cell 12
State: [ 1 12  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BL, Cell 12 --> Cell 16
State: [ 2 16  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: R, Cell 16 --> Cell 17
State: [ 3 17  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: H, Cell 17 --> Cell 17
State: [ 4 17  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: H, Cell 17 --> Cell 17
State: [ 5 17  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended. Resetting environment...
Action: H, Cell 17 --> Cell 17
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: F, Cell 13 --> Cell 8
State: [1 8 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Act

Action: BR, Cell 15 --> Cell 15
State: [ 5 15  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended. Resetting environment...
Action: BR, Cell 15 --> Cell 15
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: L, Cell 13 --> Cell 12
State: [ 1 12  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BR, Cell 12 --> Cell 18
State: [ 2 18  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: B, Cell 18 --> Cell 23
State: [ 3 23  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: FL, Cell 23 --> Cell 17
State: [ 4 17  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BL, Cell 17 --> Cell 21
State: [ 5 21  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended. Resetting environment...
Action: BL, Cell 17 --> Cell 21
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: FL, Cell 13 --> Cell 7
State: [1 7 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: R, Cell 7 --> Cell 8
State: [2 8 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: BL, Cell 8 --> Cell 12
State: [ 3 12  0  0  0  0

Action: FR, Cell 18 --> Cell 14
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: R, Cell 13 --> Cell 14
State: [ 1 14  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BL, Cell 14 --> Cell 18
State: [ 2 18  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BL, Cell 18 --> Cell 22
State: [ 3 22  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: R, Cell 22 --> Cell 23
State: [ 4 23  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: H, Cell 23 --> Cell 23
State: [ 5 23  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended. Resetting environment...
Action: H, Cell 23 --> Cell 23
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: BR, Cell 13 --> Cell 19
State: [ 1 19  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: F, Cell 19 --> Cell 14
State: [ 2 14  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: FR, Cell 14 --> Cell 10
State: [ 3 10  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: FR, Cell 10 --> Cell 10
State: [ 4 10  0  0  0  0  0  0  0  0  0  0]
Re

Action: FL, Cell 13 --> Cell 7
State: [1 7 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: H, Cell 7 --> Cell 7
State: [2 7 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: B, Cell 7 --> Cell 12
State: [ 3 12  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BR, Cell 12 --> Cell 18
State: [ 4 18  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: H, Cell 18 --> Cell 18
State: [ 5 18  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended. Resetting environment...
Action: H, Cell 18 --> Cell 18
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: R, Cell 13 --> Cell 14
State: [ 1 14  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: R, Cell 14 --> Cell 15
State: [ 2 15  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BL, Cell 15 --> Cell 19
State: [ 3 19  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: R, Cell 19 --> Cell 20
State: [ 4 20  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: R, Cell 20 --> Cell 20
State: [ 5 20  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended

Action: L, Cell 18 --> Cell 17
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: F, Cell 13 --> Cell 8
State: [1 8 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: F, Cell 8 --> Cell 3
State: [2 3 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: BR, Cell 3 --> Cell 9
State: [3 9 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: R, Cell 9 --> Cell 10
State: [ 4 10  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: B, Cell 10 --> Cell 15
State: [ 5 15  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended. Resetting environment...
Action: B, Cell 10 --> Cell 15
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: L, Cell 13 --> Cell 12
State: [ 1 12  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: B, Cell 12 --> Cell 17
State: [ 2 17  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: B, Cell 17 --> Cell 22
State: [ 3 22  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: L, Cell 22 --> Cell 21
State: [ 4 21  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: B, Cell 21 --> Cell 21
State: 

Reward: -1
Action: H, Cell 8 --> Cell 8
State: [5 8 0 0 0 0 0 0 0 0 0 0]
Reward: -1
The episode has ended. Resetting environment...
Action: H, Cell 8 --> Cell 8
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: FR, Cell 13 --> Cell 9
State: [1 9 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: F, Cell 9 --> Cell 4
State: [2 4 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: H, Cell 4 --> Cell 4
State: [3 4 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: BR, Cell 4 --> Cell 10
State: [ 4 10  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: F, Cell 10 --> Cell 5
State: [5 5 0 0 0 0 0 0 0 0 0 0]
Reward: -1
The episode has ended. Resetting environment...
Action: F, Cell 10 --> Cell 5
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: BR, Cell 13 --> Cell 19
State: [ 1 19  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: R, Cell 19 --> Cell 20
State: [ 2 20  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: L, Cell 20 --> Cell 19
State: [ 3 19  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action:

Reward: -1
The episode has ended. Resetting environment...
Action: BR, Cell 17 --> Cell 23
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: BR, Cell 13 --> Cell 19
State: [ 1 19  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: FL, Cell 19 --> Cell 13
State: [ 2 13  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: B, Cell 13 --> Cell 18
State: [ 3 18  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: FR, Cell 18 --> Cell 14
State: [ 4 14  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: FR, Cell 14 --> Cell 10
State: [ 5 10  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended. Resetting environment...
Action: FR, Cell 14 --> Cell 10
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: L, Cell 13 --> Cell 12
State: [ 1 12  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: H, Cell 12 --> Cell 12
State: [ 2 12  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: FR, Cell 12 --> Cell 8
State: [3 8 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: L, Cell 8 --> Cell 7
St

Exception ignored in: <function Drone.__del__ at 0x7ff5cd77f160>
Traceback (most recent call last):
  File "/tmp/ipykernel_9486/3071416055.py", line 183, in __del__
  File "/tmp/ipykernel_9486/3071416055.py", line 180, in _land
AssertionError: 


Action: FR, Cell 17 --> Cell 13
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: R, Cell 13 --> Cell 14
State: [ 1 14  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: L, Cell 14 --> Cell 13
State: [ 2 13  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: BL, Cell 13 --> Cell 17
State: [ 3 17  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: H, Cell 17 --> Cell 17
State: [ 4 17  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: F, Cell 17 --> Cell 12
State: [ 5 12  0  0  0  0  0  0  0  0  0  0]
Reward: -1
The episode has ended. Resetting environment...
Action: F, Cell 17 --> Cell 12
State: [ 0 13  0  0  0  0  0  0  0  0  0  0]
Reward: 0
Action: FR, Cell 13 --> Cell 9
State: [1 9 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: R, Cell 9 --> Cell 10
State: [ 2 10  0  0  0  0  0  0  0  0  0  0]
Reward: -1
Action: F, Cell 10 --> Cell 5
State: [3 5 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: H, Cell 5 --> Cell 5
State: [4 5 0 0 0 0 0 0 0 0 0 0]
Reward: -1
Action: L, Cell 5 --> Cell 4
State:

# Train the agent

In [7]:
class SaveOnBestTrainingRewardCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).

    :param check_freq:
    :param log_dir: Path to the folder where the model will be saved.
      It must contains the file created by the ``Monitor`` wrapper.
    :param verbose: Verbosity level.
    """
    def __init__(self, check_freq: int, log_dir: str, verbose: int = 1):
        super(SaveOnBestTrainingRewardCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.log_dir = log_dir
        self.save_path = os.path.join(log_dir, 'best_model')
        self.best_mean_reward = -np.inf

    def _init_callback(self) -> None:
        pass
#         # Create folder if needed
#         if self.save_path is not None:
#             os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps}")
                    print(f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}")

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print(f"Saving new best model to {self.save_path}")
                    self.model.save(self.save_path)
        
        return True

In [9]:
try:
    del env
    del model
except Exception as e:
    pass

# Create log dir
run = 2
log_dir = "logs/"
monitor_file = os.path.join(log_dir, str(run))
os.makedirs(log_dir, exist_ok=True)

env = AnafiEnv(num_targets=10, max_timestep=15, is_training=True)
env = Monitor(env, monitor_file)
env = DummyVecEnv([lambda: env])

callback = SaveOnBestTrainingRewardCallback(check_freq=512, log_dir=log_dir)

# model = PPO("MlpPolicy", env, n_steps=1024, verbose=1, tensorboard_log=log_dir)
model = PPO.load(os.path.join(log_dir, str(run-1) + "_run"), env)
model.learn(total_timesteps=15_000, callback=callback, tb_log_name="PPO_" + str(run), reset_num_timesteps=False)
model.save(os.path.join(log_dir, str(run) + "_run"))

Logging to logs/PPO_2_0
Num timesteps: 15872
Best mean reward: -inf - Last mean reward per episode: 5.42
Saving new best model to logs/best_model
Num timesteps: 16384
Best mean reward: 5.42 - Last mean reward per episode: 5.22
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 11.4     |
|    ep_rew_mean     | 5.22     |
| time/              |          |
|    fps             | 0        |
|    iterations      | 1        |
|    time_elapsed    | 2073     |
|    total_timesteps | 16384    |
---------------------------------
Num timesteps: 16896
Best mean reward: 5.42 - Last mean reward per episode: 5.92
Saving new best model to logs/best_model
Num timesteps: 17408
Best mean reward: 5.92 - Last mean reward per episode: 6.30
Saving new best model to logs/best_model
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.5        |
|    ep_rew_mean          | 6.3         |
| time/              

Num timesteps: 25088
Best mean reward: 8.03 - Last mean reward per episode: 8.27
Saving new best model to logs/best_model
Num timesteps: 25600
Best mean reward: 8.27 - Last mean reward per episode: 8.78
Saving new best model to logs/best_model
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.92         |
|    ep_rew_mean          | 8.78         |
| time/                   |              |
|    fps                  | 0            |
|    iterations           | 10           |
|    time_elapsed         | 23149        |
|    total_timesteps      | 25600        |
| train/                  |              |
|    approx_kl            | 0.0088339625 |
|    clip_fraction        | 0.0869       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 0.37         |
|    learning_rate        | 0.0003       |
|    loss                 | 7.67         |
|    n_updates           

2022-03-08 06:34:35,631 [31m[ERROR] [0m	ulog - pomp - epoll_ctl(fd=89) err=9(Bad file descriptor)[0m
2022-03-08 06:34:35,637 [31m[ERROR] [0m	ulog - pomp - epoll_ctl op=2 cb=0x7f8775037c18 userdata=0x7f85c5422b90[0m


# Test the agent

In [None]:
def reset_target_pos():
    Simulation.gen_targets_pos(10)
    Simulation.reset_targets()
    time.sleep(0.1)
    Simulation.reset_targets(release=True)

In [102]:
try:
    del env
    del model
except Exception as e:
    pass

env = AnafiEnv(num_targets=10, max_timestep=15)
model = PPO.load("logs/4_run", env, verbose=1)

# header = ["timestep", "action"]

# episode = 9
# timestep = 0
# file = open('run_' + str(episode+1) + '.csv', 'w', encoding='UTF8', newline='')
# writer = csv.writer(file)
# writer.writerow(header)

obs = env.reset()
reset_target_pos()

for i in range(1000):
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
#     timestep += 1
    
#     writer.writerow([str(timestep), info["action"]])

    if done:
        obs = env.reset()
        reset_target_pos()
        
#         if episode >= 10:
#             file.close()
#             break
#         episode += 1
#         timestep = 0

#         file = open('run_' + str(episode+1) + '.csv', 'w', encoding='UTF8', newline='')
#         writer = csv.writer(file)
#         writer.writerow(header)

541 Server Error: Media Not Yet Indexed for url: http://10.202.0.1:80/api/v1/media/medias
Media are not yet indexed


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


2022-03-09 15:16:31,648 [31m[ERROR] [0m	ulog - pomp - epoll_ctl(fd=121) err=9(Bad file descriptor)[0m
2022-03-09 15:16:31,650 [31m[ERROR] [0m	ulog - pomp - epoll_ctl op=2 cb=0x7ff39afeebe0 userdata=0x7ff2a6a33f90[0m
