In [1]:
!pip install -e ../gym_k8s_real

Defaulting to user installation because normal site-packages is not writeable
Obtaining file:///home/li/rl-autoscaler-k8s/gym_k8s_real
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: gym-k8s-real
  Attempting uninstall: gym-k8s-real
    Found existing installation: gym-k8s-real 0.0.1
    Uninstalling gym-k8s-real-0.0.1:
      Successfully uninstalled gym-k8s-real-0.0.1
  Running setup.py develop for gym-k8s-real
Successfully installed gym-k8s-real
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0m

In [1]:
import gym
import random
import subprocess
import time
import numpy as np
from threading import Lock, Thread
import datetime

In [2]:
import gym_k8s_real

## Info about the kubernetes environment we deploy

In [3]:
# Timestep duration in minutes
# We wait these many minutes for our actions to be enforced
timestep_duration = 1.5
app_name = 'proxy'
memory_req = '128Mi'
cpu_req = '80m'
sla_throughput = 2.6
prometheus_host = 'http://145.100.135.52:30013'
# Nginx ingress latency metric
# prometheus_throughput_metric_name = '(sum(http_requests_total) by (app))'
prometheus_throughput_metric_name = 'latency'
gym_env = 'gym_k8s_real:k8s-env-discrete-state-discrete-action-v0'
q_table_file = 'Q-env-discrete-state-discrete-action-data.npy'
q_table_init_value = 12.5
total_epochs = 100
num_of_services = 1
steps_per_epoch = 15

## Load or create the Q table
If our Q table file is present, we load in into memory. Otherwise we create a gym environment and using the environment's observation and space dimensions, we create a new one.

In [4]:
try:
    q_table = np.load(q_table_file)
    print('File present. Loading done!')
    
except IOError:
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=prometheus_throughput_metric_name
    )

    q_table = np.full((env.observation_space.n, env.action_space.n), q_table_init_value)
    np.save(q_table_file, q_table)

    print('File not present. Created successfully!')

table_lock = Lock()

File present. Loading done!


## Create historical states csv file if it doesn't exist

In [5]:
try:
    open('k8s_historical_states_discrete.csv', 'r').close()
    print('File already present.')
except IOError:
    with open('k8s_historical_states_discrete.csv', 'w') as f:
        f.write('current_app_name,timestep,state,action,next_state,reward,'
                'done,number_of_pods,cpu_util,latency_violation,latency,hpa_threshold,info\n')
    print('File not present. Created successfully!')

File already present.


# Train the agent

## Agent training
This function trains our agent:

In [6]:
def decode(i):
        out = []
        out.append(i % 7)
        i //= 7
        out.append(i % 5)
        i //= 5
        out.append(i % 5)
        i //= 5
        out.append(i)
        return reversed(out)

In [9]:
def train_agent(num_service):
    # Hyperparameters
    alpha = 0.1
    gamma = 0.9
    epsilon_init = 0.97
    epsilon_min = 0.2

    current_app_name = app_name
    current_throughput_metric_name = prometheus_throughput_metric_name
    
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=current_app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=current_throughput_metric_name
    )

    for epoch in range(0, total_epochs):
        state, _ = env.reset()
        decoded_state = list(decode(state))
        print('======EPOCH{}=======\n'
              'Training started with cpu utilization: {}, hpa cpu threshold: {}, number of pods: {}, latency:{}'
             .format(epoch, decoded_state[0], decoded_state[1], decoded_state[2], decoded_state[3]))
     
        done = False
#         vegeta_process = stress_function(ensdpoint)

        for step in range(steps_per_epoch):
            current_timestep = epoch * steps_per_epoch + step
            q_table = np.load(q_table_file)
            
            # Epsilon keeps getting smaller and stops when it reaches epsilon_min
            current_epsilon = pow(epsilon_init, current_timestep)
            epsilon = max(current_epsilon, epsilon_min)
            
            if done:
                break
            else:    
                if random.uniform(0, 1) < epsilon:
                    action = env.action_space.sample()  # Explore action space, non-greedy (NG) action selection
                else:
                    action = np.argmax(q_table[state])  # Exploit learned values, greedy (G) action selection
                
                decoded_state = list(decode(state))
                print('======ROUND{}=======\n'
                      'app: {}, pod_cpu_util: {}, cpu_threshold: {}, number_of_pods: {}, latency: {}, latency_violation: {}, action: {}'
                      .format(step, current_app_name, decoded_state[0], decoded_state[1], decoded_state[2], 
                              decoded_state[3], int(decoded_state[3] >= 5), action))
                
                real_ob, reward, done, next_state = env.step(action)
                
                now = datetime.datetime.now() + datetime.timedelta(hours=2)
                dt_string = now.strftime('%d/%m/%Y %H:%M:%S')
                dt_dict = {
                    'datetime': dt_string
                }
                info = dt_dict

                (pod_cpu_util,
                 cpu_threshold,
                 number_of_pods,
                 throughput) = real_ob

                # Latency violation becomes 1 if the SLA was violated
                # otherwise it's 0
                throughput_violation = int(throughput > sla_throughput)
                

                # Save historical tuple
                with open('k8s_historical_states_discrete.csv', 'a') as f:
                    f.write(
                        '{},{},{},{},'.format(current_app_name, current_timestep, state, action) +
                        '{},{},{},{},'.format(next_state, reward, done, number_of_pods) +
                        '{},{},{},{},{}'.format(pod_cpu_util, throughput_violation, throughput, cpu_threshold, info) +
                        '\n'
                    )

                with table_lock:
                    old_value = q_table[state, action]
                    # Q-table update is always greedy (np.max)
                    # Q-learning is off-policy since the action taken can be of a different policy (non-greedy, random) (NG)
                    next_max = np.max(q_table[next_state])
                    new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
                    q_table[state, action] = new_value
                    
                
                print('reward: {}, new_value: {}'.format(reward, new_value))
                
                print('new cpu utilization: {}, new hpa cpu threshold: {}, new number of pods: {}, new latency: {}, new latency_violation: {}'
                      .format(pod_cpu_util, cpu_threshold, number_of_pods, throughput, throughput_violation)
                      .format(decoded_state[0], decoded_state[1], decoded_state[2], decoded_state[3]))
                
                state = next_state
                
                with table_lock:
                    np.save(q_table_file, q_table)

        with table_lock:
            np.save(q_table_file, q_table)

        print('Training finished.\n')

In [11]:
train_agent(num_of_services)

Created new namespaced_horizontal_pod_autoscaler
Training started with cpu utilization: 0, hpa cpu threshold: 0, number of pods: 0, latency:1
app: proxy, pod_cpu_util: 0, cpu_threshold: 0, number_of_pods: 0, latency: 1, latency_violation: 0, action: 2
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 15.834861732875037, new_value: 13.958486173287504
new cpu utilization: 80, new hpa cpu threshold: 60, new number of pods: 3, new latency: 1.0772640705108643, new latency_violation: 0
app: proxy, pod_cpu_util: 4, cpu_threshold: 2, number_of_pods: 1, latency: 2, latency_violation: 0, action: 1
reward: 13.10015313775055, new_value: 14.996324683647948
new cpu utilization: 52, new hpa cpu threshold: 60, new number of pods: 5, new latency: 1.1911786397298176, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, number_of_pods: 2, latency: 2, latency_violation: 0, action: 2
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 16.76

reward: 16.496338527847875, new_value: 16.975287962781252
new cpu utilization: 77, new hpa cpu threshold: 100, new number of pods: 3, new latency: 0.9404997825622559, new latency_violation: 0
app: proxy, pod_cpu_util: 3, cpu_threshold: 4, number_of_pods: 1, latency: 1, latency_violation: 0, action: 1
reward: 16.177905327245313, new_value: 18.020549699227658
new cpu utilization: 86, new hpa cpu threshold: 100, new number of pods: 3, new latency: 1.004992405573527, new latency_violation: 0
app: proxy, pod_cpu_util: 4, cpu_threshold: 4, number_of_pods: 1, latency: 1, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 16.01260635205729, new_value: 14.141245400083791
new cpu utilization: 68, new hpa cpu threshold: 80, new number of pods: 3, new latency: 1.0394412676493328, new latency_violation: 0
app: proxy, pod_cpu_util: 3, cpu_threshold: 3, number_of_pods: 1, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizon

Created new namespaced_horizontal_pod_autoscaler without exception
reward: 15.766177366939774, new_value: 19.776080412348634
new cpu utilization: 55, new hpa cpu threshold: 60, new number of pods: 4, new latency: 0.866371234258016, new latency_violation: 0
Training finished.

Created new namespaced_horizontal_pod_autoscaler
Training started with cpu utilization: 2, hpa cpu threshold: 2, number of pods: 2, latency:2
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, number_of_pods: 2, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 10.924552920644842, new_value: 26.851530520168254
new cpu utilization: 43, new hpa cpu threshold: 40, new number of pods: 7, new latency: 1.1805004278818767, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 3, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 7.66290063291652, new_value:

Created new namespaced_horizontal_pod_autoscaler without exception
reward: 13.584192428644204, new_value: 33.08800080785826
new cpu utilization: 44, new hpa cpu threshold: 60, new number of pods: 5, new latency: 1.0833947658538818, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, number_of_pods: 2, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 12.68585508925246, new_value: 27.95130922057397
new cpu utilization: 45, new hpa cpu threshold: 40, new number of pods: 6, new latency: 1.0380563735961914, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 3, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 8.771218662834317, new_value: 21.977463264568247
new cpu utilization: 28, new hpa cpu threshold: 20, new number of pods: 10, new latency: 0.9300623734792074, new latency_violation: 0
app: prox

reward: 7.631817736323979, new_value: 20.954068618582394
new cpu utilization: 30, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.1712551911671956, new latency_violation: 0
app: proxy, pod_cpu_util: 1, cpu_threshold: 0, number_of_pods: 4, latency: 2, latency_violation: 0, action: 2
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 13.050968991373475, new_value: 33.6932673539693
new cpu utilization: 45, new hpa cpu threshold: 60, new number of pods: 5, new latency: 1.202515443166097, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, number_of_pods: 2, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 11.879532437302615, new_value: 28.43836881191557
new cpu utilization: 40, new hpa cpu threshold: 40, new number of pods: 6, new latency: 1.2165222962697346, new latency_violation: 0
Training finished.

Created new namespaced_horizontal_pod_autoscaler
Training

reward: 11.806801255992067, new_value: 14.892997366634352
new cpu utilization: 19, new hpa cpu threshold: 20, new number of pods: 7, new latency: 0.9899245897928873, new latency_violation: 0
app: proxy, pod_cpu_util: 0, cpu_threshold: 0, number_of_pods: 3, latency: 1, latency_violation: 0, action: 1
reward: 12.243692442378942, new_value: 17.23444101369332
new cpu utilization: 20, new hpa cpu threshold: 20, new number of pods: 7, new latency: 0.9027818044026693, new latency_violation: 0
app: proxy, pod_cpu_util: 1, cpu_threshold: 0, number_of_pods: 3, latency: 1, latency_violation: 0, action: 1
reward: 11.750648527546607, new_value: 16.129862173957978
new cpu utilization: 19, new hpa cpu threshold: 20, new number of pods: 7, new latency: 1.001449426015218, new latency_violation: 0
app: proxy, pod_cpu_util: 0, cpu_threshold: 0, number_of_pods: 3, latency: 1, latency_violation: 0, action: 1
reward: 10.086541168656517, new_value: 17.760214142332032
new cpu utilization: 14, new hpa cpu thre

KeyboardInterrupt: 

In [25]:
with table_lock:
    np.save(q_table_file, q_table)

In [27]:
print(q_table[382]) 

[13.96471286 21.37426449 15.42381685]
