In [17]:
import requests
import gym
from gym import error, spaces, utils
from gym.utils import seeding

class FooEnv(gym.Env):
  metadata = {'render.modes': ['human']}
  def get_cpu_observation():
        response = requests.get('http://192.168.99.100:8888/cpu', timeout=5)
        results = response.json()
        if len(results) > 0:
                cpu = results['cpu']
                prediction = results['prediction']
                anomalyScore = results['anomalyScore']
                anomalyLikelihood = results['anomalyLikelihood']
                utility_cpu = results['utility_cpu']
                cpu_axis=[cpu, prediction, anomalyScore, anomalyLikelihood, utility_cpu]
        return np.array(cpu_axis) 

  def get_mem_observation():
    response = requests.get('http://192.168.99.100:8888/mem', timeout=5)
    results = response.json()
    if len(results) > 0:
        mem = results['mem']
        prediction = results['prediction']
        anomalyScore = results['anomalyScore']
        anomalyLikelihood = results['anomalyLikelihood']
        utility_mem = results['utility_mem']
        #mem_axis=[mem, prediction, anomalyScore, anomalyLikelihood, utility_mem]    
    mem_axis=[mem, prediction, anomalyScore, anomalyLikelihood, utility_mem]
    return np.array(mem_axis) 
  def get_net_observation():
    response = requests.get('http://192.168.99.100:8888/net', timeout=5)
    results = response.json()
    if len(results) > 0:
        net = results['net']
        prediction = results['prediction']
        anomalyScore = results['anomalyScore']
        anomalyLikelihood = results['anomalyLikelihood']
        utility_net = results['utility_net']
         
    net_axis=[net, prediction, anomalyScore, anomalyLikelihood, utility_net]
    return np.array(net_axis) 

  def get_disk_observation():
    response = requests.get('http://192.168.99.100:8888/disk', timeout=5)
    if response is not None:
        results = response.json()
        if len(results) > 0:
            disk = results['disk']
            prediction = results['prediction']
            anomalyScore = results['anomalyScore']
            anomalyLikelihood = results['anomalyLikelihood']
            utility_mem = results['utility_disk']
            disk_axis=[disk, prediction, anomalyScore, anomalyLikelihood, utility_mem]
    return np.array(disk_axis) 

  def __init__(self):
    """
    Description:
        A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
    Source:
        This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
    Observation: 
        Type: Box(4)
        Num	Observation                 Min         Max
        0	CPU Utility                -4.8            4.8
        1	memory Utility             -4.8            4.8
        2	Disk Utility               -4.8            4.8
        3	Net Utility      		   -4.8            4.8
        
    Actions:
        Type: Discrete(2)
        Num	Action
        0	Stay in State S0
        1	Scale A Service Down Move to State S0
        2 	Scale Service  Up
        3  	Remove Node
        4	Add Node 
        5 	Initialise new swarm with state
        
     Reward:
        Reward SoftMAX(Ucpu, Umem, Unet, Udisk)
    Starting State:
        All observations are assigned to current observation of S0
    Episode Termination:
        Adaptation Time is >= 300MS 
        Number of Nodes <=1 
        Number of Nodes >= Maximum Number of Node 
        Number of Replicas <= min(Service)
        Number of Replicas >= Max(service)
        Considered solved when services are fully converged
    """
    self.cpu_axis  = self.get_cpu_observation()
    self.mem_axis = self.get_mem_observation()
    self.disk_axis = self.get_disk_observation()
    self.net_axis  = self.get_net_observation()

    self.action_space = spaces.Discrete(5)
    high = np.array([
            self.cpu_axis,
            self.mem_axis,
            self.disk_axis,
            self.net_axis])
    self.observation_space = spaces.Box(-high, high, dtype=np.float32)
    self.seed()
    self.viewer = None
    self.state = None
    self.steps_beyond_done = None
    
def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

def step(self, action):
    assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
    state = self.state
    cpu_axis, mem_axis, disk_axis, net_axis = state
    if action == -1:
        print("Scale Down Move to State S1")
        #Reward = max of utility fitness 
        current_state = get_current_stat()
        reward= current_state - past_stat
        done= False 
        info = "Scale Down Move to State S1"
    elif action == 0: 
        print("Stay in State S0")
        current_state = get_current_stat()
        reward= current_state - past_stat

        done= True 
        info = "Stay in State S0"
    elif action == 1: 
        print("Scale Service UP S2")
        current_state = get_current_stat()
        reward= current_state - past_stat
        done= False 
        info = "Stay in State S0"
    elif action == 2: 
        print("Remove Node S3")
        current_state = get_current_stat()
        reward= current_state - past_stat
        done= False
        info = "Remove Node S3"
    elif action == 3: 
        print("Mantain Cluster State S0")
        current_state = get_current_stat()
        reward= current_state - past_stat
        done= False
        info = "Mantain Cluster State S0"
    elif action == 4: 
        print("Add Node S4")
        current_state = get_current_stat()
        reward= current_state - past_stat
        done= False
        info = "Add Node S4"
    else: 
        print ("action not defined")
        current_state = get_current_stat()
        reward= -1
        info = "action not defined"
    if done: 
        reward = 1.0
    elif self.steps_beyond_done is None:
        #Adaptation Failed 
        reward = 1.0 
        self.steps_beyond_done = 0
    else: 
        if self.steps_beyond_done == 0:
            logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0
    state = get_observation()
    return np.array(self.state), reward, done, {}



def reset(self):
    self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
    self.steps_beyond_done = None
    return np.array(self.state)

def render(self, mode='human', close=False):
  	logger.warn("View is not allowed in this environment")
  	return 0 

def close(self):
    if self.viewer:
        self.viewer.close()
        self.viewer = None

  