In [1]:
!pip install -e ../gym_k8s_real

^C
Traceback (most recent call last):
  File "/home/li/.local/bin/pip", line 5, in <module>
    from pip._internal.cli.main import main
  File "/home/li/.local/lib/python3.9/site-packages/pip/_internal/cli/main.py", line 9, in <module>
    from pip._internal.cli.autocompletion import autocomplete
  File "/home/li/.local/lib/python3.9/site-packages/pip/_internal/cli/autocompletion.py", line 10, in <module>
    from pip._internal.cli.main_parser import create_main_parser
  File "/home/li/.local/lib/python3.9/site-packages/pip/_internal/cli/main_parser.py", line 8, in <module>
    from pip._internal.cli import cmdoptions
  File "/home/li/.local/lib/python3.9/site-packages/pip/_internal/cli/cmdoptions.py", line 23, in <module>
    from pip._internal.cli.parser import ConfigOptionParser
  File "/home/li/.local/lib/python3.9/site-packages/pip/_internal/cli/parser.py", line 12, in <module>
    from pip._internal.configuration import Configuration, ConfigurationError
  File "/hom

In [1]:
import gym
import random
import subprocess
import time
import numpy as np
from threading import Lock, Thread
import datetime

In [2]:
import gym_k8s_real

## Info about the kubernetes environment we deploy

In [3]:
# Timestep duration in minutes
# We wait these many minutes for our actions to be enforced
timestep_duration = 1.5
app_name = 'proxy'
memory_req = '128Mi'
cpu_req = '80m'
sla_throughput = 2.6
prometheus_host = 'http://145.100.135.52:30013'
# Nginx ingress latency metric
# prometheus_throughput_metric_name = '(sum(http_requests_total) by (app))'
prometheus_throughput_metric_name = 'latency'
gym_env = 'gym_k8s_real:k8s-env-discrete-state-discrete-action-v0'
q_table_file = 'Q-env-discrete-state-discrete-action-data.npy'
q_table_init_value = 12.5
total_epochs = 100
num_of_services = 1
steps_per_epoch = 15

## Load or create the Q table
If our Q table file is present, we load in into memory. Otherwise we create a gym environment and using the environment's observation and space dimensions, we create a new one.

In [4]:
try:
    q_table = np.load(q_table_file)
    print('File present. Loading done!')
    
except IOError:
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=prometheus_throughput_metric_name
    )

    q_table = np.full((env.observation_space.n, env.action_space.n), q_table_init_value)
    np.save(q_table_file, q_table)

    print('File not present. Created successfully!')

table_lock = Lock()

File present. Loading done!


## Create historical states csv file if it doesn't exist

In [5]:
try:
    open('k8s_historical_states_discrete_testing_with_bursts.csv', 'r').close()
    print('File already present.')
except IOError:
    with open('k8s_historical_states_discrete_testing_with_bursts.csv', 'w') as f:
        f.write('current_app_name,timestep,state,action,next_state,reward,'
                'done,number_of_pods,cpu_util,latency_violation,latency,hpa_threshold,info\n')
    print('File not present. Created successfully!')

File already present.


# Train the agent

## Agent training
This function trains our agent:

In [6]:
def decode(i):
        out = []
        out.append(i % 7)
        i //= 7
        out.append(i % 5)
        i //= 5
        out.append(i % 5)
        i //= 5
        out.append(i)
        return reversed(out)

In [7]:
def train_agent(num_service):
    # Hyperparameters
    alpha = 0.1
    gamma = 0.9
    epsilon_init = 0.4
    epsilon_min = 0.2

    current_app_name = app_name
    current_throughput_metric_name = prometheus_throughput_metric_name
    
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=current_app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=current_throughput_metric_name
    )

    for epoch in range(0, total_epochs):
        state, _ = env.reset()
        decoded_state = list(decode(state))
        print('======EPOCH{}=======\n'
              'Training started with cpu utilization: {}, hpa cpu threshold: {}, number of pods: {}, latency:{}'
             .format(epoch, decoded_state[0], decoded_state[1], decoded_state[2], decoded_state[3]))
     
        done = False
#         vegeta_process = stress_function(ensdpoint)

        for step in range(steps_per_epoch):
            current_timestep = epoch * steps_per_epoch + step
            q_table = np.load(q_table_file)
            
            # Epsilon keeps getting smaller and stops when it reaches epsilon_min
            current_epsilon = pow(epsilon_init, current_timestep)
            epsilon = max(current_epsilon, epsilon_min)
            
            if done:
                break
            else:    
                if random.uniform(0, 1) < epsilon:
                    action = env.action_space.sample()  # Explore action space, non-greedy (NG) action selection
                else:
                    action = np.argmax(q_table[state])  # Exploit learned values, greedy (G) action selection
                
                decoded_state = list(decode(state))
                print('======ROUND{}=======\n'
                      'app: {}, pod_cpu_util: {}, cpu_threshold: {}, number_of_pods: {}, latency: {}, latency_violation: {}, action: {}'
                      .format(step, current_app_name, decoded_state[0], decoded_state[1], decoded_state[2], 
                              decoded_state[3], int(decoded_state[3] >= 5), action))
                
                real_ob, reward, done, next_state = env.step(action)
                
                now = datetime.datetime.now() + datetime.timedelta(hours=2)
                dt_string = now.strftime('%d/%m/%Y %H:%M:%S')
                dt_dict = {
                    'datetime': dt_string
                }
                info = dt_dict

                (pod_cpu_util,
                 cpu_threshold,
                 number_of_pods,
                 throughput) = real_ob

                # Latency violation becomes 1 if the SLA was violated
                # otherwise it's 0
                throughput_violation = int(throughput > sla_throughput)
                

                # Save historical tuple
                with open('k8s_historical_states_discrete_testing_with_bursts.csv', 'a') as f:
                    f.write(
                        '{},{},{},{},'.format(current_app_name, current_timestep, state, action) +
                        '{},{},{},{},'.format(next_state, reward, done, number_of_pods) +
                        '{},{},{},{},{}'.format(pod_cpu_util, throughput_violation, throughput, cpu_threshold, info) +
                        '\n'
                    )

                with table_lock:
                    old_value = q_table[state, action]
                    # Q-table update is always greedy (np.max)
                    # Q-learning is off-policy since the action taken can be of a different policy (non-greedy, random) (NG)
                    next_max = np.max(q_table[next_state])
                    new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
                    q_table[state, action] = new_value
                    
                
                print('reward: {}, new_value: {}'.format(reward, new_value))
                
                print('new cpu utilization: {}, new hpa cpu threshold: {}, new number of pods: {}, new latency: {}, new latency_violation: {}'
                      .format(pod_cpu_util, cpu_threshold, number_of_pods, throughput, throughput_violation)
                      .format(decoded_state[0], decoded_state[1], decoded_state[2], decoded_state[3]))
                
                state = next_state
                
                with table_lock:
                    np.save(q_table_file, q_table)

        with table_lock:
            np.save(q_table_file, q_table)

        print('Training finished.\n')

In [None]:
train_agent(num_of_services)

Created new namespaced_horizontal_pod_autoscaler
Training started with cpu utilization: 0, hpa cpu threshold: 3, number of pods: 0, latency:1
app: proxy, pod_cpu_util: 0, cpu_threshold: 3, number_of_pods: 0, latency: 1, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 17.25473485841023, new_value: 21.95558846630707
new cpu utilization: 113, new hpa cpu threshold: 60, new number of pods: 2, new latency: 1.0120807488759358, new latency_violation: 0
app: proxy, pod_cpu_util: 5, cpu_threshold: 2, number_of_pods: 1, latency: 1, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 8.243223727227807, new_value: 13.199322372722781
new cpu utilization: 57, new hpa cpu threshold: 40, new number of pods: 6, new latency: 2.3805087407430015, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 3, latency: 4, latency_violation: 0, action: 0
Created new na

reward: 6.9754062894085385, new_value: 15.259237428027337
new cpu utilization: 30, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.3271433512369792, new latency_violation: 0
app: proxy, pod_cpu_util: 1, cpu_threshold: 0, number_of_pods: 4, latency: 2, latency_violation: 0, action: 2
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 12.077808619577873, new_value: 36.87613638595297
new cpu utilization: 42, new hpa cpu threshold: 60, new number of pods: 6, new latency: 1.170904000600179, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, number_of_pods: 3, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 10.271828323945062, new_value: 29.79303255000361
new cpu utilization: 43, new hpa cpu threshold: 40, new number of pods: 7, new latency: 1.3363398710886638, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 3, latency: 

Created new namespaced_horizontal_pod_autoscaler without exception
reward: 5.628325763447743, new_value: 34.55406845036048
new cpu utilization: 37, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.6990795135498047, new latency_violation: 0
app: proxy, pod_cpu_util: 1, cpu_threshold: 0, number_of_pods: 4, latency: 3, latency_violation: 0, action: 2
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 9.872713750631217, new_value: 32.514112487928664
new cpu utilization: 56, new hpa cpu threshold: 60, new number of pods: 6, new latency: 1.7618117332458496, new latency_violation: 0
Training finished.

Created new namespaced_horizontal_pod_autoscaler
Training started with cpu utilization: 2, hpa cpu threshold: 2, number of pods: 3, latency:2
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, number_of_pods: 3, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 8.020085798706683, new_value:

Created new namespaced_horizontal_pod_autoscaler without exception
reward: 11.721078029525074, new_value: 34.45481480149076
new cpu utilization: 86, new hpa cpu threshold: 100, new number of pods: 4, new latency: 1.8855005900065105, new latency_violation: 0
app: proxy, pod_cpu_util: 4, cpu_threshold: 4, number_of_pods: 2, latency: 3, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 9.930764355182848, new_value: 28.39161953184532
new cpu utilization: 74, new hpa cpu threshold: 80, new number of pods: 5, new latency: 2.1356329917907715, new latency_violation: 0
app: proxy, pod_cpu_util: 3, cpu_threshold: 3, number_of_pods: 2, latency: 4, latency_violation: 0, action: 1
reward: 10.694053172379258, new_value: 31.323565152662535
new cpu utilization: 74, new hpa cpu threshold: 80, new number of pods: 5, new latency: 1.85690172513326, new latency_violation: 0
app: proxy, pod_cpu_util: 3, cpu_threshold: 3, number_of_pods: 2, latency: 3,

reward: 7.204779370728978, new_value: 27.351344692203078
new cpu utilization: 55, new hpa cpu threshold: 60, new number of pods: 7, new latency: 2.347664992014567, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, number_of_pods: 3, latency: 4, latency_violation: 0, action: 1
reward: 8.858281810866938, new_value: 27.588008620165045
new cpu utilization: 63, new hpa cpu threshold: 60, new number of pods: 6, new latency: 2.120396614074707, new latency_violation: 0
app: proxy, pod_cpu_util: 3, cpu_threshold: 2, number_of_pods: 3, latency: 4, latency_violation: 0, action: 1
reward: 7.426666067739975, new_value: 24.08528954354464
new cpu utilization: 48, new hpa cpu threshold: 60, new number of pods: 7, new latency: 2.251063903172811, new latency_violation: 0
Training finished.

Created new namespaced_horizontal_pod_autoscaler
Training started with cpu utilization: 2, hpa cpu threshold: 2, number of pods: 3, latency:4
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, numbe

Created new namespaced_horizontal_pod_autoscaler without exception
reward: 13.79773020532484, new_value: 33.05690740604209
new cpu utilization: 68, new hpa cpu threshold: 80, new number of pods: 4, new latency: 1.2888890107472737, new latency_violation: 0
app: proxy, pod_cpu_util: 3, cpu_threshold: 3, number_of_pods: 2, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 9.481261296485446, new_value: 33.42510789416443
new cpu utilization: 76, new hpa cpu threshold: 60, new number of pods: 5, new latency: 2.323540369669596, new latency_violation: 0
app: proxy, pod_cpu_util: 3, cpu_threshold: 2, number_of_pods: 2, latency: 4, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 7.6353114048166155, new_value: 21.392789952395994
new cpu utilization: 37, new hpa cpu threshold: 40, new number of pods: 10, new latency: 1.170461893081665, new latency_violation: 0
app: proxy,

Created new namespaced_horizontal_pod_autoscaler without exception
reward: 5.613605593446332, new_value: 28.981873382678206
new cpu utilization: 63, new hpa cpu threshold: 40, new number of pods: 10, new latency: 1.7036187648773193, new latency_violation: 0
app: proxy, pod_cpu_util: 3, cpu_threshold: 1, number_of_pods: 4, latency: 3, latency_violation: 0, action: 1
reward: 4.704764929304463, new_value: 14.033483160782126
new cpu utilization: 58, new hpa cpu threshold: 40, new number of pods: 10, new latency: 2.0097556114196777, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 4, latency: 3, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 5.388238199389445, new_value: 26.595160622369313
new cpu utilization: 39, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.7746416727701824, new latency_violation: 0
Training finished.

Created new namespaced_horizontal_pod_autoscaler
Trai

Created new namespaced_horizontal_pod_autoscaler without exception
reward: 13.411831425175048, new_value: 32.84592705523946
new cpu utilization: 58, new hpa cpu threshold: 80, new number of pods: 4, new latency: 1.385322093963623, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 3, number_of_pods: 2, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 11.926545021526671, new_value: 29.18839405183573
new cpu utilization: 55, new hpa cpu threshold: 60, new number of pods: 5, new latency: 1.4842453797658284, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, number_of_pods: 2, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 10.396453719427646, new_value: 37.18578368940281
new cpu utilization: 40, new hpa cpu threshold: 40, new number of pods: 8, new latency: 1.0522433916727703, new latency_violation: 0
app: proxy

In [25]:
with table_lock:
    np.save(q_table_file, q_table)

In [27]:
print(q_table[382]) 

[13.96471286 21.37426449 15.42381685]
