<a href="https://colab.research.google.com/github/aashmauprety/Final_UAV/blob/master/TF_Agent_UAV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
!pip install tf-agents

Collecting tf-agents
[?25l  Downloading https://files.pythonhosted.org/packages/ef/b0/88c9aab39050cfb544ec73ee48b8d0e67b4b16ed5470c82235255b119952/tf_agents-0.5.0-py3-none-any.whl (933kB)
[K     |████████████████████████████████| 942kB 2.8MB/s 
Collecting protobuf>=3.11.3
[?25l  Downloading https://files.pythonhosted.org/packages/28/05/9867ef8eafd12265267bee138fa2c46ebf34a276ea4cbe184cba4c606e8b/protobuf-3.12.2-cp36-cp36m-manylinux1_x86_64.whl (1.3MB)
[K     |████████████████████████████████| 1.3MB 16.3MB/s 
Collecting gin-config==0.1.3
[?25l  Downloading https://files.pythonhosted.org/packages/8c/be/c984b1c8a7ba1c385b32bf39c7a225cd9f713d49705898309d01b60fd0e7/gin_config-0.1.3-py3-none-any.whl (43kB)
[K     |████████████████████████████████| 51kB 6.4MB/s 
Installing collected packages: protobuf, gin-config, tf-agents
  Found existing installation: protobuf 3.10.0
    Uninstalling protobuf-3.10.0:
      Successfully uninstalled protobuf-3.10.0
  Found existing installation: gin-co

In [0]:
from enum import Enum
import numpy as np

import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.environments import utils
from tf_agents.specs import array_spec
from tf_agents.trajectories import time_step as timeStep

In [0]:
class ActionResult(Enum):
  COLLISION = 1
  NO_MOVE = 2
  SUCCESS = 3
  IS_INVALID = 4
  VALID_MOVE = 5

In [0]:
#MY CUSTOM ENVIRONMENT 
class UAV():
  #initialize
  def __init__(self):
    self._state = np.zeros(36, dtype= np.int32)
    self._state[5] =1 #initial position of UAV1
    self._state[9] = 2 #initial popsition of UAV2
    self._state[15] = 3 #position of station which is stationary
    self._game_ended = False


  #Reset the environement to its initial state.
  def reset(self):
    self._state = np.zeros(36, dtype =  np.int32)
    self._state[5] = 1
    self._state[9] = 2
    self._state[15] = 3
    self._game_ended = False

  def _is_success(self, position1, position2):
    return bool(position1 == 15) ^  bool(position2 == 15)

  def move_uav(self, current_state1, current_state2, next_state1, next_state2):

    #check if success occured 
    if self._is_success(next_state1, next_state2):
      self._state[current_state1] = 0
      self._state[next_state1] = 1
      self._state[current_state2] = 0
      self._state[next_state2] = 2
      self._game_ended = True
      return ActionResult.SUCCESS


    # If move results in out of boundary
    if next_state1 < 0 or next_state2 < 0 or next_state1 > (len(self._state) - 1) or next_state2 > (len(self._state) - 1):
      self._game_ended = True
      return ActionResult.IS_INVALID

    #collision
    if next_state1 == 15 and next_state2 == 15:
      self._game_ended = True
      return ActionResult.COLLISION

    if next_state1 == current_state1 and next_state2 == current_state2: 
      self._state[current_state1] = 1
      self._state[current_state2] = 2
      self._game_ended = False
      return ActionResult.NO_MOVE

    self._state[current_state1] = 0
    self._state[current_state2] = 0
    self._state[next_state1] = 1
    self._state[next_state2] = 2   
    return ActionResult.VALID_MOVE 

  def game_ended(self):
    return self._game_ended

  def game_state(self):
    return self._state     


In [0]:
class UAVEnvironment(py_environment.PyEnvironment):
  def __init__(self, game):
    self._action_spec = array_spec.BoundedArraySpec(
            shape=(), dtype=np.int32, minimum=0, maximum=3, name='action')
    self._observation_spec = array_spec.BoundedArraySpec(
        shape=(36,), dtype=np.int32, minimum=0, maximum=3, name='observation')

    # 0=>Left, 1=>Right, 2=>Down, 3=>Up
    self._action_values = {0:-1,1:1,2:-6,3:6}
    self._game = game

  def _reset(self):
        self._game.reset()
        return timeStep.restart(self._game.game_state())

  def action_spec(self):
   return self._action_spec

  def observation_spec(self):
    return self._observation_spec

  def _step(self, action):    

      if self._game.game_ended():
          return self.reset()

      action = action.item()

      next_UAV1_position_direction = self._action_values.get(action)
      next_UAV2_position_direction = self._action_values.get(action)
      current_UAV1_position = np.where(self._game.game_state() == 1)[0].item()
      current_UAV2_position = np.where(self._game.game_state() == 1) [0].item()
      new_UAV1_position = current_UAV1_position + next_UAV1_position_direction
      new_UAV2_position = current_UAV2_position + next_UAV2_position_direction

      response = self._game.move_uav(current_UAV1_position, current_UAV2_position, new_UAV1_position, new_UAV2_position)  

      if response == ActionResult.SUCCESS:
            return timeStep.termination(self._game.game_state(), reward = 10, discount = 0.7)

      elif response == ActionResult.IS_INVALID:
          return timeStep.termination(self._game.game_state(), -0.3)

      elif response == ActionResult.COLLISION:
          return timeStep.termination(self._game.game_state(), -1)

      elif response == ActionResult.NO_MOVE:
          return timeStep.transition(self._game.game_state(), reward=1, discount=1.0)

      return timeStep.transition(self._game.game_state(), reward=-0.3, discount=1.0)

In [46]:
UAVEnvironment = UAVEnvironment(UAV())
utils.validate_py_environment(UAVEnvironment, episodes=5)

ValueError: ignored