In [1]:
from __future__ import division, print_function, unicode_literals
import csv
import shutil
import os
import time
import requests
import subprocess
from subprocess import call
import numpy as np
import tensorflow as tf


In [2]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory 

Using TensorFlow backend.


In [3]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [4]:
def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

In [11]:
def get_cpu_observation():
    response = requests.get('http://192.168.99.100:8888/cpu', timeout=5)
    results = response.json()
    if len(results) > 0:
        cpu = results['cpu']
        prediction = results['prediction']
        anomalyScore = results['anomalyScore']
        anomalyLikelihood = results['anomalyLikelihood']
        utility_cpu = results['utility_cpu']
        #print 'time: ', timestamp, ' cpu Usage: ',cpu , 'utility_cpu: ', utility_cpu
        cpu_axis=[cpu, prediction, anomalyScore, anomalyLikelihood, utility_cpu]
    cpu_axis=[cpu, prediction, anomalyScore, anomalyLikelihood, utility_cpu]
    return np.array(cpu_axis) 

In [12]:
get_cpu_observation()

array([54.20305313, 54.20305313,  1.        ,  0.5       , 27.10152656])

In [13]:
def get_mem_observation():
    response = requests.get('http://192.168.99.100:8888/mem', timeout=5)
    results = response.json()
    if len(results) > 0:
        mem = results['mem']
        prediction = results['prediction']
        anomalyScore = results['anomalyScore']
        anomalyLikelihood = results['anomalyLikelihood']
        utility_mem = results['utility_mem']
        #mem_axis=[mem, prediction, anomalyScore, anomalyLikelihood, utility_mem]    
    mem_axis=[mem, prediction, anomalyScore, anomalyLikelihood, utility_mem]
    return np.array(mem_axis) 

In [14]:
get_mem_observation()

array([52.37857156, 52.37857156,  1.        ,  0.5       , 26.18928578])

In [15]:
def get_disk_observation():
    response = requests.get('http://192.168.99.100:8888/disk', timeout=5)
    if response is not None:
        results = response.json()
        if len(results) > 0:
            disk = results['disk']
            prediction = results['prediction']
            anomalyScore = results['anomalyScore']
            anomalyLikelihood = results['anomalyLikelihood']
            utility_mem = results['utility_disk']
            disk_axis=[disk, prediction, anomalyScore, anomalyLikelihood, utility_mem]
            return np.array(disk_axis) 

In [16]:
get_disk_observation()

array([0., 0., 0., 0., 0.])

In [17]:
def get_net_observation():
    response = requests.get('http://192.168.99.100:8888/net', timeout=5)
    results = response.json()
    if len(results) > 0:
        net = results['net']
        prediction = results['prediction']
        anomalyScore = results['anomalyScore']
        anomalyLikelihood = results['anomalyLikelihood']
        utility_net = results['utility_net']
         
    net_axis=[net, prediction, anomalyScore, anomalyLikelihood, utility_net]
    return np.array(net_axis) 

In [18]:
get_net_observation()

array([0. , 0. , 1. , 0.5, 0. ])

In [19]:
def get_observation():
    disk_axis =  get_disk_observation()
    mem_axis = get_mem_observation()
    cpu_axis = get_cpu_observation()
    net_axis = get_net_observation()
    obs =np.vstack((cpu_axis,mem_axis, disk_axis, net_axis) )
    return obs 

In [20]:
l=[]
for i in range(10):
    a = get_observation()
    l.append(a)
    
f1= np.array(l)
    

In [21]:
obs = get_observation()
obs

array([[5.42030531e+01, 5.42030531e+01, 1.00000000e+00, 5.00000000e-01,
        2.71015266e+01],
       [5.23785716e+01, 5.23785716e+01, 1.00000000e+00, 5.00000000e-01,
        2.61892858e+01],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00],
       [6.18374558e+02, 6.18374558e+02, 1.00000000e+00, 5.00000000e-01,
        3.09187279e+02]])

In [22]:
def get_current_stat():
    current_state = get_observation()
    cpu_fitnes  = np.dot(current_state[0, 0], current_state[0,3])/np.sum(current_state[0,3])
    print (cpu_fitnes, 'cpu_fitnes')
    mem_fitnes  = np.dot(current_state[1,0], current_state[1,3])/np.sum(current_state[1,3])
    print ('mem_fitnes', mem_fitnes)
    disk_fitnes  = np.dot(current_state[2,0],current_state[2,3])/np.sum(current_state[2,3])
    print ('disk_fitnes', disk_fitnes)
    net_fitnes  = np.dot(current_state[3, 0],current_state[3,3])/np.sum(current_state[3,3])
    print ('net_fitnes', net_fitnes)
    sum_utility =  cpu_fitnes+ mem_fitnes + disk_fitnes + net_fitnes 
    return sum_utility

In [23]:
get_current_stat()

54.203053129792075 cpu_fitnes
mem_fitnes 52.37857156075411
disk_fitnes nan
net_fitnes 61.62054018006003


  import sys


nan

# # # MDP 


In [44]:
from simple_rl.run_experiments import run_agents_on_mdp
from simple_rl.tasks import GridWorldMDP
from simple_rl.agents import QLearningAgent


In [45]:
env =  GridWorldMDP()

In [24]:
possible_actions = [-1, 0, 1, 2, 3, 4]    

In [25]:
def adaptation_step(action):
    
    past_stat = get_current_stat()
     
    if action == -1:
            print("Scale Down Move to State S1")
            #Reward = max of utility fitness 
            current_state = get_current_stat()
            reward= current_state - past_stat
            done= False 
            info = "Scale Down Move to State S1"
    elif action == 0: 
        print("Stay in State S0")
        current_state = get_current_stat()
        reward= current_state - past_stat

        done= True 
        info = "Stay in State S0"
    elif action == 1: 
        print("Scale Service UP S2")
        current_state = get_current_stat()
        reward= current_state - past_stat
        done= False 
        info = "Stay in State S0"
    elif action == 2: 
        print("Remove Node S3")
        current_state = get_current_stat()
        reward= current_state - past_stat
        done= False
        info = "Remove Node S3"
    elif action == 3: 
        print("Mantain Cluster State S0")
        current_state = get_current_stat()
        reward= current_state - past_stat
        done= False
        info = "Mantain Cluster State S0"
    elif action == 4: 
        print("Add Node S4")
        current_state = get_current_stat()
        reward= current_state - past_stat
        done= False
        info = "Add Node S4"
    else: 
        print ("action not defined")
        current_state = get_current_stat()
        reward= -1
        info = "action not defined"
    
    state = get_observation()
     
    return np.array(state), reward, done, info
        

In [6]:
ENV_NAME = 'CartPole-v0'


# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)


In [30]:
 nb_actions = len(possible_actions)



In [31]:
observation_space= np.array((1, 6))

## Deep Q-Learning


In [58]:
import gym
import gym_foo

In [60]:
env = gym.make('foo-v2')

UnregisteredEnv: No registered env with id: foo-v0

In [32]:
# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

In [33]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 2)                 0         
_________________________________________________________________
dense_4 (Dense)              (None, 16)                48        
_________________________________________________________________
activation_4 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_5 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_6 (Activation)    (None, 16)                0         
__________

In [42]:
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [43]:
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)


Training for 50000 steps ...


AssertionError: 

In [None]:
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)