<a href="https://colab.research.google.com/github/diogomgsimoes/DRL-Network-Path-Selection-For-Multimedia-Traffic-in-SDNs/blob/main/DRL_routing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import networkx as nx
from itertools import islice
import copy
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import time
from collections import deque
import torch
from matplotlib import pylab as plt

In [23]:
TOPOLOGY_FILE_NAME = 'topology.txt'
NUMBER_OF_HOSTS = 8
NUMBER_OF_PATHS = 5
REWARD_SCALE = NUMBER_OF_HOSTS * NUMBER_OF_HOSTS * NUMBER_OF_PATHS

In [24]:
class DRLEngine():
    def __init__(self):
        self.graph = nx.Graph()
        self.link_bw_capacity = {}
        self.current_link_bw = {}
        self.hosts = {}
        self.paths = {}
        self.host_pairs = [('H1', 'H4'), ('H1', 'H5'), ('H2', 'H5'), ('H2', 'H8'), ('H3', 'H6'), ('H3', 'H7'), ('H4', 'H5'), ('H4', 'H8')]
        self.requests_bw = [5, 10, 15, 20, 25, 30]

        self.upload_topology()
        self.build_graph()
        self.calculate_paths()

    def upload_topology(self):
        with open(TOPOLOGY_FILE_NAME, 'r') as topo:
            for row in topo.readlines():
                row_data = row.split()
                if 'H' in row_data[0]:
                    self.hosts[row_data[0]] = row_data[1].replace("S", "")
                elif 'S' in row_data[0]:
                    src_id = row_data[0].replace("S", "")
                    dst_id = row_data[1].replace("S", "")
                    self.link_bw_capacity[(src_id, dst_id)] = int(row_data[2])
                    self.link_bw_capacity[(dst_id, src_id)] = int(row_data[2])

        self.current_link_bw = copy.deepcopy(self.link_bw_capacity)

    def build_graph(self):
        with open(TOPOLOGY_FILE_NAME, 'r') as topo:
            for line in topo.readlines():
                nodes = line.split()[:2]
                for node in nodes:
                    if not self.graph.has_node(node):
                        self.graph.add_node(node)
                self.graph.add_edge(nodes[0], nodes[1])
    
    def k_shortest_paths(self, graph, source, target, k):
        try: 
            calc = list(islice(nx.shortest_simple_paths(graph, source, target), k))
        except nx.NetworkXNoPath:
            calc = []
            
        return [path for path in calc]

    def calculate_paths(self):
        for src_host_id in range(1, NUMBER_OF_HOSTS+1):
            src = "H{}".format(src_host_id)
            for dst_host_id in range(1, NUMBER_OF_HOSTS+1):
                dst = "H{}".format(dst_host_id)
                self.paths[(src, dst)] = self.k_shortest_paths(self.graph, src, dst, NUMBER_OF_PATHS)
                for path in self.paths[(src, dst)]:
                    if len(path) != 0:
                        for i in range(0, len(path)):
                            if "S" in path[i]:
                                path[i] = path[i].replace("S", "")
                                path[i] = int(path[i])

    def make_reservation(self, path_id):
        random.shuffle(self.host_pairs)
        pair = self.host_pairs.pop(0)
        path = self.paths[(pair[0], pair[1])][path_id][1:-1]
        request_bw = self.requests_bw[random.randint(0, 5)]
        # request_bw = 20

        # print("Source-Destination:", pair)
        # print("Path:", path_id)
        # print("Request bw:", request_bw)

        for s1, s2 in zip(path[:-1], path[1:]):
            if self.current_link_bw.get((str(s1), str(s2))):
                self.current_link_bw[(str(s1), str(s2))] -= request_bw
                if self.current_link_bw[(str(s1), str(s2))] == 0:
                    self.current_link_bw[(str(s1), str(s2))] = 1
            if self.current_link_bw.get((str(s2), str(s1))):
                self.current_link_bw[(str(s2), str(s1))] -= request_bw
                if self.current_link_bw[(str(s2), str(s1))] == 0:
                    self.current_link_bw[(str(s2), str(s1))] = 1


    def build_state(self):
        state = np.empty((NUMBER_OF_HOSTS, NUMBER_OF_HOSTS, NUMBER_OF_PATHS, 1), dtype=object)
        
        for src in range(1, NUMBER_OF_HOSTS+1):
            h_src = "H{}".format(src)
            for dst in range(1, NUMBER_OF_HOSTS+1):
                h_dst = "H{}".format(dst)
                min_value = float('Inf')
                cnt = 0
                if len(self.paths[(h_src, h_dst)]) == 1:
                    if self.paths[(h_src, h_dst)] == []:
                        for idx in range(NUMBER_OF_PATHS):
                            state[src-1, dst-1, idx] = 1
                    else: 
                        state[src-1, dst-1, 0] = 100
                        for idx in range(1, NUMBER_OF_PATHS):
                            state[src-1, dst-1, idx] = 1
                else:
                    for path in self.paths[(h_src, h_dst)]:
                        path = path[1:-1]
                        for s1, s2 in zip(path[:-1], path[1:]):
                            stats = self.current_link_bw.get((str(s1), str(s2)))
                            if stats:
                                if float(stats) < float(min_value):
                                    min_value = self.current_link_bw[(str(s1), str(s2))]
                    
                        state[src-1, dst-1, cnt] = float(min_value)
                        cnt += 1
                        
                    for idx in range(len(self.paths[(h_src, h_dst)]), NUMBER_OF_PATHS):
                        state[src-1, dst-1, idx] = 1
                    
        return state

    def reset(self):
        # print("Bw:", self.current_link_bw)
        self.graph = nx.Graph()
        self.host_pairs = [('H1', 'H4'), ('H1', 'H5'), ('H2', 'H5'), ('H2', 'H8'), ('H3', 'H6'), ('H3', 'H7'), ('H4', 'H5'), ('H4', 'H8')]
        self.current_link_bw = copy.deepcopy(self.link_bw_capacity)

In [25]:
class RoutingEnv(Env):
    def __init__(self):
        self.requests = 0
        self.max_requests = random.randint(1, 8)
        # self.max_requests = 8
        self.done = False

        self.engine = DRLEngine()

        self.observation_space = Box(low=np.zeros((NUMBER_OF_HOSTS, NUMBER_OF_HOSTS, NUMBER_OF_PATHS, 1), dtype=np.float32), \
            high=np.full((NUMBER_OF_HOSTS, NUMBER_OF_HOSTS, NUMBER_OF_PATHS, 1), 100, dtype=np.float32), dtype=np.float32)
        
        self.action_space = Discrete(NUMBER_OF_PATHS)
        self.state = np.full((NUMBER_OF_HOSTS, NUMBER_OF_HOSTS, NUMBER_OF_PATHS, 1), 100, dtype=np.float32)

    def step(self, action):
        self.engine.make_reservation(action)
        self.requests += 1
        
        reward = 0
        self.state = self.engine.build_state()
        # print(self.state)

        for src in range(NUMBER_OF_HOSTS):
            for dst in range(NUMBER_OF_HOSTS):
                for path_number in range(NUMBER_OF_PATHS):
                    bw = self.state[src, dst, path_number]
                    if bw != None:
                        if bw > 75:
                            reward += 20
                        elif bw > 50: 
                            reward += 10
                        elif bw > 25: 
                            pass
                        elif bw > 0: 
                            reward -= 10
                        else:
                            reward -= 50

        if self.requests == self.max_requests:
            self.done = True

        # print("Action:", action)
        # print("Reward:", reward)
        # print("Requests:", self.max_requests)
        
        return self.state, reward/REWARD_SCALE, self.done, {}

    def render(self):
        pass

    def get_state(self):
        return self.state

    def reset(self):
        self.done = False
        self.state = np.full((NUMBER_OF_HOSTS, NUMBER_OF_HOSTS, NUMBER_OF_PATHS, 1), 100, dtype=np.float32)
        self.requests = 0
        self.max_requests = random.randint(1, 8)
        self.engine.reset()

        return self.state

In [21]:
env = RoutingEnv()
episodes = 10

for episode in range(1, episodes + 1):
    env.reset()
    done = False
    score = 0

    while not done:
        n_state, reward, done, info = env.step(random.randint(0, 4))
        score += reward

    print('Episode: {}, Score: {}'.format(episode, score))

NameError: ignored

In [71]:
env = RoutingEnv()

# l1 = 320
# l2 = 500
# l3 = 800
# l4 = 400
# l5 = 200
# l6 = 50
# l7 = 5

# model = torch.nn.Sequential(
#     torch.nn.Linear(l1, l2),
#     torch.nn.ReLU(),
#     torch.nn.Linear(l2, l3),
#     torch.nn.ReLU(),
#     torch.nn.Linear(l3,l4),
#     torch.nn.ReLU(),
#     torch.nn.Linear(l4, l5),
#     torch.nn.ReLU(),
#     torch.nn.Linear(l5, l6),
#     torch.nn.ReLU(),
#     torch.nn.Linear(l6,l7)
# )

l1 = 320
l2 = 160
l3 = 80
l4 = 5

model = torch.nn.Sequential(
    torch.nn.Linear(l1, l2),
    torch.nn.ReLU(),
    torch.nn.Linear(l2, l3),
    torch.nn.ReLU(),
    torch.nn.Linear(l3,l4)
)

model2 = copy.deepcopy(model)
model2.load_state_dict(model.state_dict())

gamma = 0.9
epsilon = 0.3
learning_rate = 1e-3

loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

losses = []
total_reward_list = []
epochs = 5000
mem_size = 512
batch_size = 32
sync_freq = 300
replay = deque(maxlen=mem_size)

for i in range(epochs):
    print("Starting training, epoch:", i)
    cnt = 0
    total_reward = 0
    _state = env.get_state()
    state1 = torch.flatten(torch.from_numpy(_state.astype(np.float32))).reshape(1,320)
    done = False
    env.reset()
    
    while not done: 
        print("Step:", cnt+1)
        cnt += 1
        qval = model(state1) 
        qval_ = qval.data.numpy()
        if (random.random() < epsilon):
            action_ = np.random.randint(0,4)
        else:
            action_ = np.argmax(qval_)
        
        state, reward, done, _ = env.step(action_)
        state2 = torch.flatten(torch.from_numpy(state.astype(np.float32))).reshape(1,320)
        
        exp = (state1, action_, reward, state2, done)
        replay.append(exp)
        state1 = state2
        
        if len(replay) > batch_size:
            minibatch = random.sample(replay, batch_size)
            state1_batch = torch.cat([s1 for (s1,a,r,s2,d) in minibatch])
            action_batch = torch.Tensor([a for (s1,a,r,s2,d) in minibatch])
            reward_batch = torch.Tensor([r for (s1,a,r,s2,d) in minibatch])
            state2_batch = torch.cat([s2 for (s1,a,r,s2,d) in minibatch])
            done_batch = torch.Tensor([d for (s1,a,r,s2,d) in minibatch])
            Q1 = model(state1_batch) 
            with torch.no_grad():
                Q2 = model2(state2_batch)
            
            Y = reward_batch + gamma * ((1-done_batch) * torch.max(Q2,dim=1)[0])
            X = Q1.gather(dim=1,index=action_batch.long().unsqueeze(dim=1)).squeeze()
            loss = loss_fn(X, Y.detach())
            print(i, loss.item())
            optimizer.zero_grad()
            loss.backward()
            losses.append(loss.item())
            optimizer.step()
            
            if cnt % sync_freq == 0:
                model2.load_state_dict(model.state_dict())
        
        total_reward += reward
    
    total_reward_list.append(total_reward)
    print("Episode reward:", total_reward)
        
    if epsilon > 0:
        epsilon -= (1/epochs)
        
print(total_reward_list)
   
print('Plotting losses ...')     
plt.figure(figsize=(10,7))
plt.plot(losses)
plt.xlabel("Epochs",fontsize=22)
plt.ylabel("Loss",fontsize=22)
plt.savefig('avg_loss.png') 

print('Plotting rewards ...')     
plt.figure(figsize=(10,7))
plt.plot(total_reward_list)
plt.xlabel("Epochs",fontsize=22)
plt.ylabel("Return",fontsize=22)
plt.savefig('avg_return.png')

sizes = [50, 100, 200]
for size in sizes:
    avg = []
    for idx in range(0, len(total_reward_list), size):
        avg += [sum(val for val in total_reward_list[idx:idx + size]) / size]

    plt.figure(figsize=(10,7))
    plt.plot(avg)
    plt.xlabel("Epochs",fontsize=22)
    plt.ylabel("Return",fontsize=22)
    plt.savefig('avg_return_{}.png'.format(size))

torch.save(model.state_dict(), 'dqn_model_exp_replay_target_network.pt')

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
340 33.247371673583984
Step: 3
340 27.963165283203125
Step: 4
340 36.37363815307617
Episode reward: 9.75
Starting training, epoch: 341
Step: 1
341 110.20429229736328
Step: 2
341 27.89902687072754
Episode reward: 28.0
Starting training, epoch: 342
Step: 1
342 47.120967864990234
Step: 2
342 38.707008361816406
Step: 3
342 50.04098892211914
Step: 4
342 60.28755569458008
Step: 5
342 35.34297561645508
Step: 6
342 30.79100799560547
Episode reward: 46.0
Starting training, epoch: 343
Step: 1
343 36.32905578613281
Step: 2
343 41.600242614746094
Step: 3
343 25.340744018554688
Step: 4
343 44.4360466003418
Step: 5
343 36.57091522216797
Step: 6
343 63.59545135498047
Episode reward: 18.25
Starting training, epoch: 344
Step: 1
344 29.94817352294922
Step: 2
344 33.3510627746582
Step: 3
344 35.62351608276367
Episode reward: 9.5
Starting training, epoch: 345
Step: 1
345 37.75789260864258
Step: 2
345 41.833892822265625
Episode reward

KeyboardInterrupt: ignored

In [72]:
def test_model(model):
    test_env = RoutingEnv()
    _state = test_env.reset()
    state = torch.flatten(torch.from_numpy(_state.astype(np.float32))).reshape(1,320)
    done = False
    rewards = []
    while not done:
        qval = model(state)
        qval_ = qval.data.numpy()
        action = np.argmax(qval_)
        _state, reward, done, _ = test_env.step(action)
        print("Action:", action)
        print("Done:", done)
        state = torch.flatten(torch.from_numpy(_state.astype(np.float32))).reshape(1,320)
        rewards.append(reward)
    print("Reward sum:", sum(rewards))


In [79]:
l1 = 320
l2 = 160
l3 = 80
l4 = 5

model_test = torch.nn.Sequential(
    torch.nn.Linear(l1, l2),
    torch.nn.ReLU(),
    torch.nn.Linear(l2, l3),
    torch.nn.ReLU(),
    torch.nn.Linear(l3,l4)
)

model_test.load_state_dict(torch.load("dqn_model_exp_replay_target_network.pt"))
test_model(model_test)

Action: 1
Done: True
Reward sum: 14.0
