In [1]:
# Code credit - partially based on https://github.com/gsurma/cartpole/blob/master/cartpole.py
import random
import gym
import gym_sdwan
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

Using TensorFlow backend.


In [2]:
ENV_NAME = "Sdwan-v0"

GAMMA = 0.95
LEARNING_RATE = 0.001

MEMORY_SIZE = 1000000
BATCH_SIZE = 20

EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995

In [3]:
class DQNSolver:

    def __init__(self, observation_space, action_space):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)

        self.model = Sequential()
        self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))
        self.model.add(Dense(24, activation="relu"))
        self.model.add(Dense(self.action_space, activation="linear"))
        self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            print ("Taking random action", action)
            return random.randrange(self.action_space)
        q_values = self.model.predict(state)
        action = np.argmax(q_values[0])
        print ("Taking predicted  action", action)
        return action

    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        batch = random.sample(self.memory, BATCH_SIZE)
        for state, action, reward, state_next, terminal in batch:
            q_update = reward
            if not terminal:
                q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))
            q_values = self.model.predict(state)
            q_values[0][action] = q_update
            self.model.fit(state, q_values, verbose=0)
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)


In [6]:
env = gym.make(ENV_NAME)
#score_logger = ScoreLogger(ENV_NAME)
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
dqn_solver = DQNSolver(observation_space, action_space)
run = 0
score = 0
while True:
    run += 1
    state = env.reset()
    state = np.reshape(state, [1, observation_space])
    step = 0
    while True:
        step += 1
        #env.render()
        action = dqn_solver.act(state)
        state_next, reward, terminal, info = env.step(action)
        #reward = reward if not terminal else -reward
        state_next = np.reshape(state_next, [1, observation_space])
        score += reward
        dqn_solver.remember(state, action, reward, state_next, terminal)
        state = state_next
        if terminal:
            print ("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(score))
            #score_logger.add_score(step, run)
            break
        dqn_solver.experience_replay()

2018-12-10 17:43:19,945 - root - INFO - SdwanEnv - Version 0.1.0


  result = entry_point.load(False)


Taking random action
2018-12-10 17:44:06,675 - root - INFO - current bw:8.58, sla bw:6.0
Taking random action
2018-12-10 17:44:21,744 - root - INFO - current bw:10.0, sla bw:6.0
Taking random action
2018-12-10 17:44:36,799 - root - INFO - current bw:7.22, sla bw:6.0
Taking random action
2018-12-10 17:44:51,855 - root - INFO - current bw:8.40, sla bw:6.0
Taking random action
2018-12-10 17:45:06,897 - root - INFO - current bw:7.55, sla bw:6.0
Taking random action
2018-12-10 17:45:21,939 - root - INFO - current bw:10.0, sla bw:6.0
Taking random action
2018-12-10 17:45:36,992 - root - INFO - current bw:10.0, sla bw:6.0
Taking random action
2018-12-10 17:45:52,045 - root - INFO - current bw:10.0, sla bw:6.0
Taking random action
2018-12-10 17:46:07,108 - root - INFO - current bw:6.00, sla bw:6.0
Taking random action
2018-12-10 17:46:22,164 - root - INFO - current bw:8.76, sla bw:6.0
Taking random action
2018-12-10 17:46:37,222 - root - INFO - current bw:10.0, sla bw:6.0
Taking random action


Taking random action
2018-12-10 18:04:20,967 - root - INFO - current bw:10.0, sla bw:6.0
Taking random action
2018-12-10 18:04:36,167 - root - INFO - current bw:6.87, sla bw:6.0
Taking predicted  action 0
2018-12-10 18:04:51,325 - root - INFO - current bw:9.37, sla bw:6.0
Taking random action
2018-12-10 18:05:06,544 - root - INFO - current bw:10.0, sla bw:6.0
Taking random action
2018-12-10 18:05:21,707 - root - INFO - current bw:10.0, sla bw:6.0
Taking random action
2018-12-10 18:05:36,893 - root - INFO - current bw:9.83, sla bw:6.0
Taking random action
2018-12-10 18:05:52,050 - root - INFO - current bw:10.0, sla bw:6.0
Taking random action
2018-12-10 18:06:07,298 - root - INFO - current bw:11.1, sla bw:6.0
Taking random action
2018-12-10 18:06:22,503 - root - INFO - current bw:7.55, sla bw:6.0
Taking random action
current link failure
2018-12-10 18:06:37,655 - root - INFO - current bw:5.22, sla bw:6.0
2018-12-10 18:06:37,663 - root - INFO - BW is less than SLA
Taking predicted  actio

KeyboardInterrupt: 

In [7]:
env.cleanup()