## Install Gym environments to interact with k8s

In [1]:
!pip install -e ../../gym_k8s_real

Defaulting to user installation because normal site-packages is not writeable
Obtaining file:///home/li/EPI-kube-scaling/HEURIST-MAL-k8s/gym_k8s_real
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: gym-k8s-real
  Attempting uninstall: gym-k8s-real
    Found existing installation: gym-k8s-real 0.0.1
    Uninstalling gym-k8s-real-0.0.1:
      Successfully uninstalled gym-k8s-real-0.0.1
  Running setup.py develop for gym-k8s-real
Successfully installed gym-k8s-real
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0m

In [6]:
import gym
import random
import subprocess
import time
import numpy as np
from threading import Lock, Thread
import datetime as dtime
import gym_k8s_real

## Packages to build DQN

In [13]:
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
import random

## Define DQN class - without Q-value

In [31]:
 class DQNAgent():
    def __init__(self, env, path, episodes, max_env_steps, epsilon_decay,
                 state_size=None, action_size=None, epsilon=0.97, epsilon_min=0.1, 
                 gamma=1, alpha=.01, alpha_decay=.01, batch_size=16, prints=False, step=0):
        self.memory = deque(maxlen=100000)
        self.env = env
        self.t = 0
        self.C = episodes
        
        if state_size is None: 
            self.state_size = self.env.observation_space.n 
        else: 
            self.state_size = state_size
 
        if action_size is None: 
            self.action_size = self.env.action_space.n 
        else: 
            self.action_size = action_size
 
        self.step = step
        self.env._max_episode_steps = max_env_steps
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.gamma = gamma
        self.alpha = alpha
        self.alpha_decay = alpha_decay
        self.batch_size = batch_size
        self.path = path                     #location where the model is saved to
        self.prints = prints                 #if true, the agent will print his scores
 
        self.model = self._build_model()
    
    #Build network model
    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='tanh'))
        model.add(Dense(48, activation='tanh'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.alpha, decay=self.alpha_decay))
        return model
    
    #Generate one action
    def generate_action(self, state):
        # Epsilon keeps getting smaller and stops when it reaches epsilon_min
        current_epsilon = pow(self.epsilon, self.step)
        eps = max(current_epsilon, self.epsilon_min)
        # epsilon-greey to take best action from action-value function
        if np.random.random() < eps:
            return self.env.action_space.sample()
        return np.argmax(self.model.predict(state)[0])
    
    #Add states into memory
    def remember(self, state, action, reward, next_state, done): 
        self.memory.append((state, action, reward, next_state, done))
    
    #Replay memory to train
    def replay(self, batch_size):
        x_batch, y_batch = [], []
        minibatch = random.sample(
            self.memory, min(len(self.memory), batch_size))
        for state, action, reward, next_state, done in minibatch:
            y_target = self.model.predict(state)
            y_target[0][action] = reward if done else reward + self.gamma * np.max(self.model.predict(next_state)[0])
            x_batch.append(state[0])
            y_batch.append(y_target[0])

        self.model.fit(np.array(x_batch), np.array(y_batch), batch_size=len(x_batch), verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        
    #Actual training process
    def learning(self):
        self.step += 1
        self.t = (self.t + 1) % self.C
        
        # update every C times and make sure buffer is filled with at least size batch size
        if self.t == 0:
            if len(self.memory) < self.batch_size: 
                return
            
            # init list states to store states 
            # init list of targets values forecast gernated by model Q associated with each state-action
            states, targets_forecast = [], []
            
            # random sample from replay buffer
            samples = random.sample(self.memory, self.batch_size)
            
            for state, action, reward, new_state, done in samples:
                if done:
                    target = reward
                else:
                    Q_new_state =  np.amax(self.target_model.predict(new_state)[0])
                    target = reward + self.gamma *  Q_new_state

                target_forecast = self.model.predict(state)
                target_forecast[0][action] = target
                
                # append to lists for batch processing outside the iteartion
                states.append(state[0])
                targets_forecast.append(target_forecast[0])
            
            # batch learning to train the model Q   
            self.model.fit(np.array(states), np.array(targets_forecast), epochs=1, verbose=0)
            self.train_target()
            
    # soft update to target model Q_hat from model Q
    def train_target(self):
        # target model and model are not updating at the same time
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = self.tau * weights[i] + (1 - self.tau) * target_weights[i]
        # assign new weights to target model
        self.target_model.set_weights(target_weights)
    
    #Save model
    def save_model(self, name='DQN_Model_ML'):
        self.model.save(self.path + name)
    
    def load_model(self, name='DQN_Model_ML'):
        self.model = load_model(self.path + name)

## Configuration about the kubernetes environment we deploy

In [32]:
# Timestep duration in minutes
# We wait these many minutes for our actions to be enforced
timestep_duration = 1
ml_app_names = ["firewall", "encrypt"]
ml_cluster_names = ["cluster1-cntx", "cluster2-cntx", "cluster3-cntx"]
#start: don't need to change
memory_req = '128Mi'
cpu_req = '80m'
app_dict = {"firewall": 500, "encrypt": 500}
#end
sla_latency = 2.6
#link: http://145.100.135.89:6088/latency
sla_host = 'http://145.100.135.89:6088/'
# latency metric
sla_metric_name = 'latency'
gym_env = 'gym_k8s_real:k8s-env-dqn-ml-v0'
#init Q_table
total_epochs = 10
num_of_services = 1
steps_per_epoch = 100
config_path = ""
#dict describes the connection between clusters
ml_connection = {0: [0, 1, 2], 1: [0, 1, 2], 2: [0, 1, 2]}
ml_pod_weight = 1 #resouce rewards
ml_latency_weight = 1.5 #performance rewards
#reward = ml_pod_weight * Rres + ml_latency_weight * Rperf
#ml_pod_weight + ml_latency_weight = 2.5

## Create historical states csv file if it doesn't exist

In [3]:
try:
    open('k8s_DQN_ML_historical_states.csv', 'r').close()
    print('File already present.')
except IOError:
    with open('k8s_DQN_ML_historical_states.csv', 'w') as f:
        f.write('app_name,timestep,reward,'
                'cpu-pods,cpu-podm,cpu-podl,'
                'noPod-cluster1,noPod-cluster2,noPod-cluster3,'
                'latency,latency_violation,datetime\n')
        print('File not present. Created successfully!')

File not present. Created successfully!


# Train the agent

## Agent training
This function trains our agent:

In [34]:
def train_agent():
    
    #make sure env is maked
    env = gym.make(
        gym_env,
        timestep_duration=timestep_duration,
        app_names=ml_app_names,
        app_configs = ['s', 'm', 'l'], 
        cluster_names = ml_cluster_names, 
        sla_latency = sla_latency,
        sla_host = sla_host, 
        sla_latency_metric_name = sla_metric_name, 
        max_pods = 20, 
        min_pods = 1,
        app_dict = app_dict,
        config_path = config_path,
        changes = [0, 1, 2],
        connection = ml_connection,
        pod_weight = ml_pod_weight,
        latency_weight = ml_latency_weight
    )
    
    #define the DQN agent
    #path: the path to save or load DQN model
    #episodes: the number of rounds between each updation of DQN model
    #epsilon: the probability that selects random actions
    #epsilon_min: the minimal probability that selects random actions
    #gamma and alpha: hyperparameters to update Q-value in RL algorithm
    #batch_size: the number of rounds or sample data for a batch of updation
    agent = DQNAgent(env, path = "", episodes = 20, max_env_steps = 10, epsilon_decay = 10,
                 state_size=None, action_size=None, epsilon=0.97, epsilon_min=0.1, 
                 gamma=1, alpha=.01, alpha_decay=.01, batch_size=16, prints=False)
    
    for epoch in range(0, total_epochs):
        #each epoch represents a new training, since you need to reset the environment
        state, _ = env.reset()
        
        done = False
        
        #steps_per_epoch: how many rounds or actions you want the agent to perform in each epoch of training
        for step in range(steps_per_epoch):
            current_timestep = epoch * steps_per_epoch + step
            
            if done:
                break
            else: 
                action = agent.generate_action(state)
                        
                #let the environment to take one step of action
                real_ob, reward, done, next_state = env.step(action)
                print("state right now:", real_ob)
#                 f.write('app_name,timestep,reward,'
#                 'cpu-cluster1,cpu-cluster2,cpu-cluster3,'
#                 'noPod-cluster1,noPod-cluster2,noPod-cluster3',
#                 'latency,latency_violation,info\n')

                now = dtime.datetime.now() + dtime.timedelta(hours=2)
                dt_string = now.strftime('%d/%m/%Y %H:%M:%S')
                dt_dict = {
                    'datetime': dt_string
                }
                datetime = dt_dict
                
                #save historical data
                with open('k8s_DQN_ML_historical_states.csv', 'a') as f:
                    apps = ['firewall', 'encrypt']
                    for i in range(2):
                        f.write(
                            '{},{},{},'.format(apps[i], current_timestep, reward) +
                            '{},{},{},'.format(real_ob[0][i][0], real_ob[0][i][1], real_ob[0][i][2]) +
                            '{},{},{},'.format(real_ob[1][i][0], real_ob[1][i][1], real_ob[1][i][2]) +
                            '{},{},{}'.format(real_ob[3], int(real_ob[3]>sla_latency), datetime) +
                            '\n'
                        )
                    
                #save this step in the memory of DQN model
                agent.remember(state, action, reward, next_state, done)
                
                #call learn method of the model; it will determine if to update the model according to some parameters
                agent.learning()
                
                #move to the latest state
                state = next_state
                
        #save model in the end of each epoch, make sure data will not lose
        agent.save_model()
        print('One epoch of training finished.\n')

In [None]:
train_agent()