In [None]:
!pip install -e ../gym_k8s_real

In [3]:
import gym
import random
import subprocess
import time
import numpy as np
from threading import Lock, Thread
import datetime

In [4]:
import gym_k8s_real

## Info about the kubernetes environment we deploy

In [5]:
# Timestep duration in minutes
# We wait these many minutes for our actions to be enforced
timestep_duration = 1.5
app_name = 'proxy'
memory_req = '128Mi'
cpu_req = '80m'
sla_throughput = 2.6
prometheus_host = 'http://145.100.135.52:30013'
# Nginx ingress latency metric
# prometheus_throughput_metric_name = '(sum(http_requests_total) by (app))'
prometheus_throughput_metric_name = 'latency'
gym_env = 'gym_k8s_real:k8s-env-discrete-state-discrete-action-v1'
q_table_file = 'Q-env-discrete-state-discrete-action-data-V1.npy'
q_table_init_value = 12.5
total_epochs = 100
num_of_services = 1
steps_per_epoch = 15

## Load or create the Q table
If our Q table file is present, we load in into memory. Otherwise we create a gym environment and using the environment's observation and space dimensions, we create a new one.

In [6]:
try:
    q_table = np.load(q_table_file)
    print('File present. Loading done!')
    
except IOError:
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=prometheus_throughput_metric_name
    )

    q_table = np.full((env.observation_space.n, env.action_space.n), q_table_init_value)
    np.save(q_table_file, q_table)

    print('File not present. Created successfully!')

table_lock = Lock()

File not present. Created successfully!


## Create historical states csv file if it doesn't exist

In [7]:
try:
    open('k8s_historical_states_discrete_V1.csv', 'r').close()
    print('File already present.')
except IOError:
    with open('k8s_historical_states_discrete_V1.csv', 'w') as f:
        f.write('current_app_name,timestep,state,action,next_state,reward,'
                'done,number_of_pods,cpu_util,latency_violation,latency,hpa_threshold,info\n')
    print('File not present. Created successfully!')

File not present. Created successfully!


# Train the agent

## Agent training
This function trains our agent:

In [8]:
def decode(i):
        out = []
        out.append(i % 7)
        i //= 7
        out.append(i % 5)
        i //= 5
        out.append(i % 5)
        i //= 5
        out.append(i)
        return reversed(out)

In [9]:
def train_agent(num_service):
    # Hyperparameters
    alpha = 0.1
    gamma = 0.9
    epsilon_init = 0.97
    epsilon_min = 0.2

    current_app_name = app_name
    current_throughput_metric_name = prometheus_throughput_metric_name
    
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=current_app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=current_throughput_metric_name
    )

    for epoch in range(0, total_epochs):
        state, _ = env.reset()
        decoded_state = list(decode(state))
        print('======EPOCH{}=======\n'
              'Training started with cpu utilization: {}, hpa cpu threshold: {}, number of pods: {}, latency:{}'
             .format(epoch, decoded_state[0], decoded_state[1], decoded_state[2], decoded_state[3]))
     
        done = False
#         vegeta_process = stress_function(ensdpoint)

        for step in range(steps_per_epoch):
            current_timestep = epoch * steps_per_epoch + step
            q_table = np.load(q_table_file)
            
            # Epsilon keeps getting smaller and stops when it reaches epsilon_min
            current_epsilon = pow(epsilon_init, current_timestep)
            epsilon = max(current_epsilon, epsilon_min)
            
            if done:
                break
            else:    
                if random.uniform(0, 1) < epsilon:
                    action = env.action_space.sample()  # Explore action space, non-greedy (NG) action selection
                else:
                    action = np.argmax(q_table[state])  # Exploit learned values, greedy (G) action selection
                
                decoded_state = list(decode(state))
                print('======ROUND{}=======\n'
                      'app: {}, pod_cpu_util: {}, cpu_threshold: {}, number_of_pods: {}, latency: {}, latency_violation: {}, action: {}'
                      .format(step, current_app_name, decoded_state[0], decoded_state[1], decoded_state[2], 
                              decoded_state[3], int(decoded_state[3] >= 5), action))
                
                real_ob, reward, done, next_state = env.step(action)
                
                now = datetime.datetime.now() + datetime.timedelta(hours=2)
                dt_string = now.strftime('%d/%m/%Y %H:%M:%S')
                dt_dict = {
                    'datetime': dt_string
                }
                info = dt_dict

                (pod_cpu_util,
                 cpu_threshold,
                 number_of_pods,
                 throughput) = real_ob

                # Latency violation becomes 1 if the SLA was violated
                # otherwise it's 0
                throughput_violation = int(throughput > sla_throughput)
                

                # Save historical tuple
                with open('k8s_historical_states_discrete_V1.csv', 'a') as f:
                    f.write(
                        '{},{},{},{},'.format(current_app_name, current_timestep, state, action) +
                        '{},{},{},{},'.format(next_state, reward, done, number_of_pods) +
                        '{},{},{},{},{}'.format(pod_cpu_util, throughput_violation, throughput, cpu_threshold, info) +
                        '\n'
                    )

                with table_lock:
                    old_value = q_table[state, action]
                    # Q-table update is always greedy (np.max)
                    # Q-learning is off-policy since the action taken can be of a different policy (non-greedy, random) (NG)
                    next_max = np.max(q_table[next_state])
                    new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
                    q_table[state, action] = new_value
                    
                
                print('reward: {}, new_value: {}'.format(reward, new_value))
                
                print('new cpu utilization: {}, new hpa cpu threshold: {}, new number of pods: {}, new latency: {}, new latency_violation: {}'
                      .format(pod_cpu_util, cpu_threshold, number_of_pods, throughput, throughput_violation)
                      .format(decoded_state[0], decoded_state[1], decoded_state[2], decoded_state[3]))
                
                state = next_state
                
                with table_lock:
                    np.save(q_table_file, q_table)

        with table_lock:
            np.save(q_table_file, q_table)

        print('Training finished.\n')

In [None]:
train_agent(num_of_services)

Created new namespaced_horizontal_pod_autoscaler
Training started with cpu utilization: 0, hpa cpu threshold: 0, number of pods: 0, latency:2
app: proxy, pod_cpu_util: 0, cpu_threshold: 0, number_of_pods: 0, latency: 2, latency_violation: 0, action: 1
reward: 0, new_value: 12.375
new cpu utilization: 0, new hpa cpu threshold: 20, new number of pods: 1, new latency: 0.8955121040344238, new latency_violation: 0
app: proxy, pod_cpu_util: 0, cpu_threshold: 0, number_of_pods: 0, latency: 1, latency_violation: 0, action: 4
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 25, new_value: 14.875
new cpu utilization: 0, new hpa cpu threshold: 60, new number of pods: 1, new latency: 0.9296979109446207, new latency_violation: 0
app: proxy, pod_cpu_util: 0, cpu_threshold: 2, number_of_pods: 0, latency: 1, latency_violation: 0, action: 4
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 25, new_value: 14.875
new cpu utilization: 0, new hpa cpu thres

reward: 3.877147423655815, new_value: 13.372589298652846
new cpu utilization: 44, new hpa cpu threshold: 40, new number of pods: 10, new latency: 2.345114231109619, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 4, latency: 4, latency_violation: 0, action: 0
reward: 0, new_value: 12.448830307804988
new cpu utilization: 44, new hpa cpu threshold: 40, new number of pods: 10, new latency: 2.1337684790293374, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 4, latency: 4, latency_violation: 0, action: 1
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 3.8508930198020432, new_value: 13.576925416908836
new cpu utilization: 44, new hpa cpu threshold: 20, new number of pods: 10, new latency: 2.3568915526072183, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 0, number_of_pods: 4, latency: 4, latency_violation: 0, action: 0
reward: 0, new_value: 12.364188310123918
new c

Created new namespaced_horizontal_pod_autoscaler without exception
reward: 4.433291348729618, new_value: 14.845010909241145
new cpu utilization: 42, new hpa cpu threshold: 40, new number of pods: 10, new latency: 2.1127737363179526, new latency_violation: 0
Training finished.

Created new namespaced_horizontal_pod_autoscaler
Training started with cpu utilization: 2, hpa cpu threshold: 1, number of pods: 4, latency:3
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 4, latency: 3, latency_violation: 0, action: 0
reward: 0, new_value: 12.569836664740514
new cpu utilization: 45, new hpa cpu threshold: 40, new number of pods: 10, new latency: 2.180327892303467, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 4, latency: 4, latency_violation: 0, action: 1
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 4.26989403920787, new_value: 14.961407033157622
new cpu utilization: 42, new hpa cpu threshold: 20, new number 

In [25]:
with table_lock:
    np.save(q_table_file, q_table)

In [27]:
print(q_table[382]) 

[13.96471286 21.37426449 15.42381685]
