In [2]:
!pip install -e ../gym_k8s_real

Defaulting to user installation because normal site-packages is not writeable
Obtaining file:///home/li/rl-autoscaler-k8s/gym_k8s_real
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: gym-k8s-real
  Attempting uninstall: gym-k8s-real
    Found existing installation: gym-k8s-real 0.0.1
    Uninstalling gym-k8s-real-0.0.1:
      Successfully uninstalled gym-k8s-real-0.0.1
  Running setup.py develop for gym-k8s-real
Successfully installed gym-k8s-real
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0m

In [3]:
import gym
import random
import subprocess
import time
import numpy as np
from threading import Lock, Thread
import datetime

In [4]:
import gym_k8s_real

## Info about the kubernetes environment we deploy

In [4]:
# Timestep duration in minutes
# We wait these many minutes for our actions to be enforced
timestep_duration = 1.5
app_name = 'proxy'
memory_req = '128Mi'
cpu_req = '80m'
sla_throughput = 2.6
prometheus_host = 'http://145.100.135.52:30013'
# Nginx ingress latency metric
# prometheus_throughput_metric_name = '(sum(http_requests_total) by (app))'
prometheus_throughput_metric_name = 'latency'
gym_env = 'gym_k8s_real:k8s-env-discrete-state-five-action-v0'
q_table_file = 'Q-env-discrete-state-discrete-five-action-data.npy'
q_table_init_value = 12.5
total_epochs = 100
num_of_services = 1
steps_per_epoch = 15

## Load or create the Q table
If our Q table file is present, we load in into memory. Otherwise we create a gym environment and using the environment's observation and space dimensions, we create a new one.

In [5]:
try:
    q_table = np.load(q_table_file)
    print('File present. Loading done!')
    
except IOError:
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=prometheus_throughput_metric_name
    )

    q_table = np.full((env.observation_space.n, env.action_space.n), q_table_init_value)
    np.save(q_table_file, q_table)

    print('File not present. Created successfully!')

table_lock = Lock()

File not present. Created successfully!


## Create historical states csv file if it doesn't exist

In [6]:
try:
    open('k8s_historical_discrete_states_five_actions.csv', 'r').close()
    print('File already present.')
except IOError:
    with open('k8s_historical_discrete_states_five_actions.csv', 'w') as f:
        f.write('current_app_name,timestep,state,action,next_state,reward,'
                'done,number_of_pods,cpu_util,latency_violation,latency,hpa_threshold,info\n')
    print('File not present. Created successfully!')

File already present.


# Train the agent

## Agent training
This function trains our agent:

In [7]:
def decode(i):
        out = []
        out.append(i % 7)
        i //= 7
        out.append(i % 5)
        i //= 5
        out.append(i % 5)
        i //= 5
        out.append(i)
        return reversed(out)

In [8]:
def train_agent(num_service):
    # Hyperparameters
    alpha = 0.1
    gamma = 0.9
    epsilon_init = 0.97
    epsilon_min = 0.2

    current_app_name = app_name
    current_throughput_metric_name = prometheus_throughput_metric_name
    
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_name=current_app_name,
        sla_throughput=sla_throughput,
        prometheus_host=prometheus_host,
        prometheus_throughput_metric_name=current_throughput_metric_name
    )

    for epoch in range(0, total_epochs):
        state, _ = env.reset()
        decoded_state = list(decode(state))
        print('======EPOCH{}=======\n'
              'Training started with cpu utilization: {}, hpa cpu threshold: {}, number of pods: {}, latency:{}'
             .format(epoch, decoded_state[0], decoded_state[1], decoded_state[2], decoded_state[3]))
     
        done = False
#         vegeta_process = stress_function(ensdpoint)

        for step in range(steps_per_epoch):
            current_timestep = epoch * steps_per_epoch + step
            q_table = np.load(q_table_file)
            
            # Epsilon keeps getting smaller and stops when it reaches epsilon_min
            current_epsilon = pow(epsilon_init, current_timestep)
            epsilon = max(current_epsilon, epsilon_min)
            
            if done:
                break
            else:    
                if random.uniform(0, 1) < epsilon:
                    action = env.action_space.sample()  # Explore action space, non-greedy (NG) action selection
                else:
                    action = np.argmax(q_table[state])  # Exploit learned values, greedy (G) action selection
                
                decoded_state = list(decode(state))
                print('======ROUND{}=======\n'
                      'app: {}, pod_cpu_util: {}, cpu_threshold: {}, number_of_pods: {}, latency: {}, latency_violation: {}, action: {}'
                      .format(step, current_app_name, decoded_state[0], decoded_state[1], decoded_state[2], 
                              decoded_state[3], int(decoded_state[3] >= 5), action))
                
                real_ob, reward, done, next_state = env.step(action)
                
                now = datetime.datetime.now() + datetime.timedelta(hours=2)
                dt_string = now.strftime('%d/%m/%Y %H:%M:%S')
                dt_dict = {
                    'datetime': dt_string
                }
                info = dt_dict

                (pod_cpu_util,
                 cpu_threshold,
                 number_of_pods,
                 throughput) = real_ob

                # Latency violation becomes 1 if the SLA was violated
                # otherwise it's 0
                throughput_violation = int(throughput > sla_throughput)
                

                # Save historical tuple
                with open('k8s_historical_discrete_states_five_actions.csv', 'a') as f:
                    f.write(
                        '{},{},{},{},'.format(current_app_name, current_timestep, state, action) +
                        '{},{},{},{},'.format(next_state, reward, done, number_of_pods) +
                        '{},{},{},{},{}'.format(pod_cpu_util, throughput_violation, throughput, cpu_threshold, info) +
                        '\n'
                    )

                with table_lock:
                    old_value = q_table[state, action]
                    # Q-table update is always greedy (np.max)
                    # Q-learning is off-policy since the action taken can be of a different policy (non-greedy, random) (NG)
                    next_max = np.max(q_table[next_state])
                    new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
                    q_table[state, action] = new_value
                    
                
                print('reward: {}, new_value: {}'.format(reward, new_value))
                
                print('new cpu utilization: {}, new hpa cpu threshold: {}, new number of pods: {}, new latency: {}, new latency_violation: {}'
                      .format(pod_cpu_util, cpu_threshold, number_of_pods, throughput, throughput_violation)
                      .format(decoded_state[0], decoded_state[1], decoded_state[2], decoded_state[3]))
                
                state = next_state
                
                with table_lock:
                    np.save(q_table_file, q_table)

        with table_lock:
            np.save(q_table_file, q_table)

        print('Training finished.\n')

In [9]:
train_agent(num_of_services)

Created new namespaced_horizontal_pod_autoscaler
Training started with cpu utilization: 0, hpa cpu threshold: 0, number of pods: 0, latency:1
app: proxy, pod_cpu_util: 0, cpu_threshold: 0, number_of_pods: 0, latency: 1, latency_violation: 0, action: 1
reward: 0, new_value: 12.375
new cpu utilization: 0, new hpa cpu threshold: 20, new number of pods: 1, new latency: 0.9118185838063558, new latency_violation: 0
app: proxy, pod_cpu_util: 0, cpu_threshold: 0, number_of_pods: 0, latency: 1, latency_violation: 0, action: 3
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 10.02196482996812, new_value: 13.377196482996812
new cpu utilization: 133, new hpa cpu threshold: 40, new number of pods: 4, new latency: 2.2089010874430337, new latency_violation: 0
app: proxy, pod_cpu_util: 5, cpu_threshold: 1, number_of_pods: 2, latency: 4, latency_violation: 0, action: 4
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 7.398192563903438, new_value: 13.1

reward: 0, new_value: 12.375
new cpu utilization: 43, new hpa cpu threshold: 40, new number of pods: 9, new latency: 1.6869866053263347, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 4, latency: 3, latency_violation: 0, action: 2
reward: 6.724213331851527, new_value: 13.047421333185152
new cpu utilization: 39, new hpa cpu threshold: 40, new number of pods: 9, new latency: 1.3169920444488525, new latency_violation: 0
app: proxy, pod_cpu_util: 1, cpu_threshold: 1, number_of_pods: 4, latency: 2, latency_violation: 0, action: 2
reward: 5.313676585529188, new_value: 12.955635578539583
new cpu utilization: 41, new hpa cpu threshold: 40, new number of pods: 9, new latency: 1.818633238474528, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 4, latency: 3, latency_violation: 0, action: 2
reward: 5.523377665359534, new_value: 13.461024168471154
new cpu utilization: 39, new hpa cpu threshold: 40, new number of pods: 

reward: 0, new_value: 12.375
new cpu utilization: 41, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.5563313961029053, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 0, number_of_pods: 4, latency: 3, latency_violation: 0, action: 1
reward: 0, new_value: 12.375
new cpu utilization: 41, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.691678762435913, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 0, number_of_pods: 4, latency: 3, latency_violation: 0, action: 2
reward: 4.632602995104991, new_value: 12.838260299510498
new cpu utilization: 41, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.649765412012736, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 0, number_of_pods: 4, latency: 3, latency_violation: 0, action: 1
reward: 0, new_value: 12.292943426955945
new cpu utilization: 46, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.7304558753967285, new l

reward: 0, new_value: 12.375
new cpu utilization: 56, new hpa cpu threshold: 40, new number of pods: 5, new latency: 1.2000865936279297, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 2, latency: 2, latency_violation: 0, action: 2
reward: 6.104032009770045, new_value: 13.285411180568728
new cpu utilization: 42, new hpa cpu threshold: 40, new number of pods: 9, new latency: 1.5199349721272786, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 4, latency: 2, latency_violation: 0, action: 4
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 11.965803215431173, new_value: 16.57166011746035
new cpu utilization: 70, new hpa cpu threshold: 80, new number of pods: 4, new latency: 1.416762113571167, new latency_violation: 0
app: proxy, pod_cpu_util: 3, cpu_threshold: 3, number_of_pods: 2, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without excep

reward: 0, new_value: 12.438394274320045
new cpu utilization: 32, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.129205624262492, new latency_violation: 0
app: proxy, pod_cpu_util: 1, cpu_threshold: 0, number_of_pods: 4, latency: 2, latency_violation: 0, action: 4
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 10.626947678922349, new_value: 14.120749142796196
new cpu utilization: 58, new hpa cpu threshold: 60, new number of pods: 5, new latency: 1.4929049015045166, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 2, number_of_pods: 2, latency: 2, latency_violation: 0, action: 0
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 5.829451290877921, new_value: 13.59492886897455
new cpu utilization: 32, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.2514385382334392, new latency_violation: 0
app: proxy, pod_cpu_util: 1, cpu_threshold: 0, number_of_pods: 4, latency: 2, latency_viola

reward: 0, new_value: 12.597420840650493
new cpu utilization: 41, new hpa cpu threshold: 40, new number of pods: 7, new latency: 1.4940991401672363, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 3, latency: 2, latency_violation: 0, action: 0
reward: 0, new_value: 12.471446632243987
new cpu utilization: 45, new hpa cpu threshold: 40, new number of pods: 7, new latency: 1.2366832892100017, new latency_violation: 0
app: proxy, pod_cpu_util: 2, cpu_threshold: 1, number_of_pods: 3, latency: 2, latency_violation: 0, action: 1
Created new namespaced_horizontal_pod_autoscaler without exception
reward: 5.582977101173977, new_value: 13.380394663005724
new cpu utilization: 32, new hpa cpu threshold: 20, new number of pods: 10, new latency: 1.3263197739919026, new latency_violation: 0
app: proxy, pod_cpu_util: 1, cpu_threshold: 0, number_of_pods: 4, latency: 2, latency_violation: 0, action: 4
Created new namespaced_horizontal_pod_autoscaler without excepti

KeyboardInterrupt: 

In [25]:
with table_lock:
    np.save(q_table_file, q_table)

In [27]:
print(q_table[382]) 

[13.96471286 21.37426449 15.42381685]
