In [14]:
!pip install -e ../gym_k8s_real

Obtaining file:///Users/zxp/Desktop/learn/0_Project/EPI/rl-autoscaler-k8s/gym_k8s_real
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: gym-k8s-real
  Attempting uninstall: gym-k8s-real
    Found existing installation: gym-k8s-real 0.0.1
    Uninstalling gym-k8s-real-0.0.1:
      Successfully uninstalled gym-k8s-real-0.0.1
  Running setup.py develop for gym-k8s-real
Successfully installed gym-k8s-real-0.0.1


In [1]:
import gym_k8s_real

ModuleNotFoundError: No module named 'gym_k8s_real'

In [4]:
import gym
import random
import subprocess
import time
import numpy as np
from threading import Lock, Thread
import datetime

## Info about the kubernetes environment we deploy

In [7]:
# Timestep duration in minutes
# We wait these many minutes for our actions to be enforced
timestep_duration = 1
app_name = 'proxy'
memory_req = '128Mi'
cpu_req = '80m'
sla_throughput = 100
prometheus_host = 'http://145.100.135.52:30013'
# Nginx ingress latency metric
prometheus_throughput_metric_name = '(sum(http_requests_total) by (app))'
gym_env = 'gym_k8s_real:k8s-env-discrete-state-discrete-action-v0'
q_table_file = 'Q-env-discrete-state-discrete-action-data.npy'
q_table_init_value = 50
total_epochs = 60
num_of_services = 1
steps_per_epoch = 16

## Load or create the Q table
If our Q table file is present, we load in into memory. Otherwise we create a gym environment and using the environment's observation and space dimensions, we create a new one.

In [8]:
try:
    q_table = np.load(q_table_file)
    print('File present. Loading done!')
    
except IOError:
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=prometheus_throughput_metric_name
    )

    q_table = np.full((env.observation_space.n, env.action_space.n), q_table_init_value)
    np.save(q_table_file, q_table)

    print('File not present. Created successfully!')

table_lock = Lock()

Error: A module (gym_k8s_real) was specified for the environment but was not found, make sure the package is installed with `pip install` before calling `gym.make()`

## Create historical states csv file if it doesn't exist

In [6]:
try:
    open('k8s_historical_states_discrete.csv', 'r').close()
    print('File already present.')
except IOError:
    with open('k8s_historical_states_discrete.csv', 'w') as f:
        f.write('current_app_name,timestep,state,action,next_state,reward,'
                'done,number_of_pods,cpu_util,throughput_violation,throughput,hpa_threshold,info\n')
    print('File not present. Created successfully!')

File already present.


# Train the agent

## Stress functions
We use a vegeta script to stress our application:

In [7]:
# def stress_function(endpoint):
#     rate_per_second = 0.5
#     rate_per_minute = int(rate_per_second * 60)

#     vegeta_process = subprocess.Popen('stress-test/vegeta-stress.sh {} {} {}'
#                    .format(endpoint, rate_per_minute, timestep_duration * 2), shell=True)
#     return vegeta_process

## Agent training
This function trains our agent:

In [8]:
def train_agent(num_service):
    # Hyperparameters
    alpha = 0.1
    gamma = 0.9
    epsilon_init = 0.97
    epsilon_min = 0.2

    current_app_name = app_name
    endpoint = 'kubeless-{}-python.default.127.0.0.1.nip.io:8000'.format(current_app_name)
    current_throughput_metric_name = prometheus_throughput_metric_name
    
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=current_app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=current_throughput_metric_name
    )

    for epoch in range(0, total_epochs):
        state, _ = env.reset()
        print(state)

        done = False
#         vegeta_process = stress_function(endpoint)

        for step in range(steps_per_epoch):
            current_timestep = epoch * steps_per_epoch + step
            
            # Epsilon keeps getting smaller and stops when it reaches epsilon_min
            current_epsilon = pow(epsilon_init, current_timestep)
            epsilon = max(current_epsilon, epsilon_min)
            
            if done:
#                 vegeta_process.kill()
#                 delete_service(num_service)
                time.sleep(2 * 60)

#                 deploy_service(num_service)
#                 time.sleep(60)
#                 create_ingress(num_service)
                
                break
            else:    
                if random.uniform(0, 1) < epsilon:
                    action = env.action_space.sample()  # Explore action space, non-greedy (NG) action selection
                else:
                    action = np.argmax(q_table[state])  # Exploit learned values, greedy (G) action selection
                
                real_ob, reward, done, next_state = env.step(action)
                
                now = datetime.datetime.now()
                dt_string = now.strftime('%d/%m/%Y %H:%M:%S')
                dt_dict = {
                    'datetime': dt_string
                }
                info = dt_dict

                (pod_cpu_util,
                 cpu_threshold,
                 number_of_pods,
                 throughput) = real_ob

                # Latency violation becomes 1 if the SLA was violated
                # otherwise it's 0
                throughput_violation = int(throughput < sla_throughput)

                print('======ROUND{}=======\n'
                      'app: {}, action: {}'.format(step, current_app_name, action))

                # Save historical tuple
                with open('k8s_historical_states_discrete.csv', 'a') as f:
                    f.write(
                        '{},{},{},{},'.format(current_app_name, current_timestep, state, action) +
                        '{},{},{},{},'.format(next_state, reward, done, number_of_pods) +
                        '{},{},{},{},{}'.format(pod_cpu_util, throughput_violation, throughput, cpu_threshold, info) +
                        '\n'
                    )

                with table_lock:
                    old_value = q_table[state, action]
                    # Q-table update is always greedy (np.max)
                    # Q-learning is off-policy since the action taken can be of a different policy (non-greedy, random) (NG)
                    next_max = np.max(q_table[next_state])
                    new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
                    q_table[state, action] = new_value

                print('app: {}, state: {}, action: {}, new_value: {}, next_state: {}, reward: {}'\
                      .format(current_app_name, state, action, new_value, next_state, reward))

                state = next_state

        with table_lock:
            np.save(q_table_file, q_table)

        print('Training finished.\n')

In [9]:
train_agent(num_of_services)

Error: A module (gym_k8s_real) was specified for the environment but was not found, make sure the package is installed with `pip install` before calling `gym.make()`