https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/config.py


In [1]:
import numpy as np
np.random.seed(14)
import math, random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd 
import torch.nn.functional as F
import copy
import os,sys
from torch.utils import tensorboard as tb

In [2]:
# hidden_dim = 64
# max_step = 5000 #originally 500
# GAMMA = 0.99
# n_episode = 1000 #originally 800
# i_episode = 0
# buffer_size = 65000 #change back to 65000
# batch_size = 64 #change back to 64
# n_epoch = 100 #orginally 25
# epsilon = 0.7 #originally 0.9
# score = 0
# tau = 0.98

GRID_DIM = 50 # TODO: Tune this
NUM_TASKS = 2 # TODO: Tune this
ADJ_THRESHOLD = GRID_DIM / 4 # TODO: Tune this
USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs)

def is_legal(x,y):
    return (x>=0)&(x<GRID_DIM)&(y>=0)&(y<=GRID_DIM)

https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/buffer.py

In [3]:
class ReplayBufferGCare(object):
    """
    Replay buffer for storing the agent's experiences
    """

    def __init__(self, buffer_size, obs_space, n_action, n_tasks):
        """
        Initialize the replay buffer
        
        Params:
        buffer_size:
        obs_space:
        n_action:
        n_tasks:
        """
        self.buffer_size = buffer_size
        self.n_tasks = n_tasks
        self.pointer = 0
        self.len = 0
        self.actions = np.zeros((self.buffer_size,1),dtype = np.int32)
        self.rewards = np.zeros((self.buffer_size, 1))
        self.dones = np.zeros((self.buffer_size,1))
        self.obs = np.zeros((self.buffer_size,n_tasks,obs_space))
        self.next_obs = np.zeros((self.buffer_size,n_tasks,obs_space))
        self.matrix = np.zeros((self.buffer_size,self.n_tasks,self.n_tasks))
        self.next_matrix = np.zeros((self.buffer_size,self.n_tasks,self.n_tasks))

    def getBatch(self, batch_size):
        """
        Sample a batch of random entries from the replay buffer
        
        Params:
        batch_size:
        
        Returns:
        obs:
        action:
        reward
        next_obs:
        matrix:
        next_matrix:
        done:
        """
        index = np.random.choice(self.len, batch_size, replace=False)
        return self.obs[index], self.actions[index], self.rewards[index], self.next_obs[index], self.matrix[index], self.next_matrix[index], self.dones[index]

    def add(self, obs, action, reward, next_obs, matrix, next_matrix, done):
        """
        Add to the replay buffer
        
        Params:
        obs:
        action:
        reward:
        next_obs:
        matrix:
        next_matrix:
        done:
        """
        self.obs[self.pointer] = obs
        self.actions[self.pointer] = action
        self.rewards[self.pointer] = reward
        self.next_obs[self.pointer] = next_obs
        self.matrix[self.pointer] = matrix
        self.next_matrix[self.pointer] = next_matrix
        self.dones[self.pointer] = done
        self.pointer = (self.pointer + 1)%self.buffer_size
        self.len = min(self.len + 1, self.buffer_size)

https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/model.py

In [4]:
class MTRL_ATT(nn.Module):
    """
    """
    def __init__(self, din):
        super(MTRL_ATT, self).__init__()
        self.fc1 = nn.Linear(din, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        y = F.relu(self.fc1(x))
        y = F.relu(self.fc2(y))
        y = F.sigmoid(self.fc3(y))
        return y

class MTRL_Encoder(nn.Module): # TODO: Need to make it a CNN for higher dim obs space like MetaWorld
    """
    """
    def __init__(self, din=32, hidden_dim=128):
        super(MTRL_Encoder, self).__init__()
        self.fc = nn.Linear(din, hidden_dim)


    def forward(self, x):
        embedding = F.tanh(self.fc(x))
        return embedding

class MTRL_AttModel(nn.Module):
    """
    """
    def __init__(self, n_node, din, hidden_dim, dout):
        super(MTRL_AttModel, self).__init__()
        self.fcv = nn.Linear(din, hidden_dim)
        self.fck = nn.Linear(din, hidden_dim)
        self.fcq = nn.Linear(din, hidden_dim)
        self.fcout = nn.Linear(hidden_dim, dout)

    def forward(self, x, mask):
        v = F.tanh(self.fcv(x))
        q = F.tanh(self.fcq(x))
        k = F.tanh(self.fck(x)).permute(0,2,1)
        att = F.softmax(torch.mul(torch.bmm(q,k), mask) - 9e15*(1 - mask),dim=2)
        # Note: Order of applying adj matrix is different than that in paper. Don't get confused!
        out = torch.bmm(att,v)
        return out

class MTRL_Q_Net(nn.Module):
    """
    """
    def __init__(self, hidden_dim, dout):
        super(MTRL_Q_Net, self).__init__()
        # NOTE: This is now modified to have both h vectors from both of the attention layers
        # concatenated - originally it was only getting the h vector of the last layer
        # so the input dim of the linear layer was hidden_dim
        self.fc = nn.Linear(hidden_dim*2, dout)

    def forward(self, x):
        q = F.relu(self.fc(x))
        return q

    
class MTRL_DGN(nn.Module):
    """
    """
    def __init__(self,n_tasks,num_inputs,hidden_dim,num_actions):
        super(MTRL_DGN, self).__init__()

        self.encoder = MTRL_Encoder(num_inputs,hidden_dim)
        # TODO: Try both single encoder and mix of encoder settings
        # Will remain same for MTRL
        self.att_1 = MTRL_AttModel(n_tasks,hidden_dim,hidden_dim,hidden_dim)
        self.att_2 = MTRL_AttModel(n_tasks,hidden_dim,hidden_dim,hidden_dim)
        self.q_net = MTRL_Q_Net(hidden_dim,num_actions)
        # Q Net remains same for MTRL

    def forward(self, x, mask):
        h1 = self.encoder(x)
        h2 = self.att_1(h1, mask)
        h3 = self.att_2(h2, mask) 
        # TODO: try concatentation for MTRL
        
        h4 = torch.cat((h2,h3),dim=2)
        q = self.q_net(h4)
        # Note: No concatenation done. Output of last attention head used directly
        # Note: 2 attention heads used
        return q 

https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/surviving.py

In [5]:
class GridWorldWithCare(object):
    
    def __init__(self, n_tasks):
        """
        Initialize the gridworld
        
        Params:
        n_tasks:
        """
        super(GridWorldWithCare, self).__init__()
        self.n_action = 4
        self.n_tasks = n_tasks
        # TODO: maybe include food as part of task, reach dest with > 0 food or something
        self.tasks = [0]*self.n_tasks
        self.agent = [-1, -1]
        self.build_env()

        self.dones = np.zeros(self.n_tasks) # Array to indicate whether each task is done or not -- used to calculate rewards
        self.steps = 0
        self.len_obs = (self.n_tasks+1)*2

    def reset(self):
        """
        Reset the gridworld
        
        Returns:
        obs:
        adj:
        """

        self.build_env()
        self.dones = np.zeros(self.n_tasks)
        self.steps = 0
        return self.get_obs(), self.get_adj()

    def build_env(self):
        """
        Build the gridworld
        """
        for i in range(self.n_tasks):
            x = np.random.randint(0, GRID_DIM)
            y = np.random.randint(0, GRID_DIM)
            self.tasks[i] = [x, y]
            print("TASK NUMBER ", i, " DEST: ", x, y)
        self.agent[0] = np.random.randint(0, GRID_DIM)
        self.agent[1] = np.random.randint(0, GRID_DIM)

    def get_obs(self):
        """
        Get observations
        
        Returns:
        obs:
        """
        # TODO: change this for MTRL 
        obs = []
        
        x_agent = self.agent[0]
        y_agent = self.agent[1]

        obs.append(x_agent/GRID_DIM)
        obs.append(y_agent/GRID_DIM)

        # 		for i in range(-1,2):
        # 			for j in range(-1,2):
        # 				obs.append(self.maze[x_agent+i][y_agent+j])

        for i in range(self.n_tasks):
            obs.append((self.tasks[i][0]-x_agent)/GRID_DIM)
            obs.append((self.tasks[i][1]-y_agent)/GRID_DIM)

        # TODO: 1. if we include maze state or not, and if we do, we would need to figure out
        # how to effectively send that along with task destinations
        
        #Idea: use distance between agent and task as obs
        
        return obs

    def get_adj(self): # TODO: Change this to use task description encoding. 
        # In this case task description is the location of the destination.
        """
        Get adjacency matrix
        
        Returns:
        adj:
        """
        adj = np.zeros((self.n_tasks, self.n_tasks))

        # Calculate adjacency regarding to the distances of the tasks respect to the agent
        x_agent, y_agent = self.agent[0], self.agent[1]

        # HARD ATTENTION
        # Traverse through the tasks and calculate the Euclidean distance between them and the agent
#         for i in range(self.n_tasks):
#             x_task_i, y_task_i = self.tasks[i][0] - x_agent, self.tasks[i][1] - y_agent
#             for j in range(self.n_tasks):
#                 x_task_j, y_task_j = self.tasks[j][0] - x_agent, self.tasks[j][1] - y_agent
#                 task_dist = math.sqrt((x_task_j - x_task_i)**2 + (y_task_i - y_task_j)**2)
#                 if task_dist <= ADJ_THRESHOLD:
#                     adj[i,j] = 1
#                     adj[j,i] = 1
                    
        # SOFT ATTENTION
#         adj = np.ones((self.n_tasks, self.n_tasks)) # NOTE: 
        for i in range(self.n_tasks):
            x_task_i, y_task_i = self.tasks[i][0]-x_agent, self.tasks[i][1]-y_agent
            for j in range(self.n_tasks):
                x_task_j, y_task_j = self.tasks[j][0]-x_agent, self.tasks[j][1]-y_agent
                # Instead of having 1 or 0s, have their vectoral positions according to each other
                task_dist = math.sqrt((x_task_j - x_task_i)**2 + (y_task_j - y_task_i)**2)
                
#                 print('x_task_i: {}, y_task_i: {}, x_task_j: {}, y_task_j: {}, task_dist: {}'.format(
#                         x_task_i, y_task_i, x_task_j, y_task_j, task_dist
#                 ))
                
                # Set this distance / GRID_DIM
                adj[i,j] = 1 - float(task_dist)/GRID_DIM # Extract from 1 bc the closer the better
                adj[j,i] = 1 - float(task_dist)/GRID_DIM
                
        
                
#         print("ADJACENCY: {}".format(adj))

#         print('x_agent: {}, y_agent: {}'.format(x_agent, y_agent))

        return adj



    def step(self, action):
        """
        Take one step in the gridworld according to the given actions
        
        Params:
        action:
        
        Returns:
        obs:
        adj:
        reward:
        all_tasks_done:
        """

        # There are 4 different actions for the agent
        # If there is any place to go in the maze then the agent will go 
        # 0: Move up, 1: Move down, 2: Move left, 3: Move right

        self.steps += 1
        x_agent, y_agent = self.agent[0], self.agent[1]
#         print("AGENT LOCATION: ", agent_x, agent_y)
#         print("ACTION: ", action)
        if action == 0: # Move up (decrease x by one)
            if is_legal(x_agent-1, y_agent):
                # Change the agent and the maze
                self.agent[0] -= 1

        elif action == 1: # Move down (increase x by one)
            if is_legal(x_agent+1, y_agent):
                # Change the agent and the maze
                self.agent[0] += 1

        elif action == 2: # Move left (decrease y by one)
            if is_legal(x_agent, y_agent-1):
                # Change the agent and the maze
                self.agent[1] -= 1

        elif action == 3: # Move right (increase y by one)
            if is_legal(x_agent, y_agent+1):
                # Change the agent and the maze
                self.agent[1] += 1
                
        # Calculate the rewards for each task
        rewards = [0] * self.n_tasks
        total_reward = 0

        # Check if you reached to any destinations here
        new_agent_x, new_agent_y = self.agent[0], self.agent[1]
        for i in range(self.n_tasks):
            if self.tasks[i][0] == new_agent_x and self.tasks[i][1] == new_agent_y:
                if self.dones[i] == 0:
                    self.dones[i] = 1
                    rewards[i] = 1
                    total_reward += 1
                    print("Task ", i, " completed at step ", self.steps)
            else:
                total_reward += 1.0/float((math.sqrt((self.tasks[i][0]-new_agent_x)**2 + (self.tasks[i][1]-new_agent_y)**2)))
                

        # Only if all the tasks are done, then the episode is done
        all_tasks_done = not (0 in self.dones)



        return self.get_obs(), self.get_adj(), total_reward, all_tasks_done

In [None]:
# Class for making the training with different set of hyper parameters
class Training:
    """
    This class trains the model and holds the highest scores and the hyper parameter combination that gave us that
    highest score. 
    Prints tryout_index -> highest_score_in_that_tryout for each tryout to the file
    
    params:
    test_results_file: name of the file to hold the results
    vizier: boolean to indicate whether to do hyper parameter search
    """
    def __init__(self, file_name, vizier=False): # vizier indicates whether to do hyper param research
        self.file_name = file_name
        self.file = open(file_name, "w+")
        self.vizier = vizier # if vizier is true then the class makes 

        if self.vizier:

            # Dictionary to give us all options for hyper parameters
            self.hyper_params = {
                'hidden_dim': [64, 128], # 2
                'max_step': [10000], # 1
                'gamma': [0.99], # 1
                'n_episode': [1000], # 1
                'buffer_size': [65000, 100000, 10000000], # 3
                'batch_size': [128], # 1
                'n_epoch': [75, 150] # 2
                'epsilon': [0.5, 0.7, 0.9], # 3
                'tau': [0.75, 0.95], # 2
                'learning_rate': [0.0005, 0.001, 0.005] # 3
            }

            self.tryouts = []
            for key, value in self.hyper_params.items():
                tryouts_len = len(self.tryouts)
                if tryouts_len == 0:
                    for param in value:
                        self.tryouts.append({key : param})

                else:
                    params_len = len(value)
                    for i in range(params_len-1):
                        for j in range(tryouts_len):
                            self.tryouts.append(copy.deepcopy(self.tryouts[j]))

                    for j in range(params_len):
                        for i in range(tryouts_len):
                            self.tryouts[j*tryouts_len+i][key] = value[j]

            print('len(tryouts): {}'.format(len(self.tryouts)))
            self.num_tryout = len(self.tryouts)
            self.highest_scores = [0] * self.num_tryout # This will hold the highest score in each try out
            
        else: 
            
            self.hidden_dim = 64
            self.max_step = 5000 #originally 500
            self.gamma = 0.99
            self.n_episode = 1000 #originally 800
            self.buffer_size = 65000 #change back to 65000
            self.batch_size = 64 #change back to 64
            self.n_epoch = 100 #orginally 25
            self.epsilon = 0.7 #originally 0.9
            self.tau = 0.98
            self.learning_rate = 0.001
            
            self.num_tryout = 1
        
            
    def train(self):
        env = GridWorldWithCare(NUM_TASKS)
        observation_space = env.len_obs
        n_actions = env.n_action
        n_tasks = env.n_tasks
        
        # Set the hyper parameters 
        for tryout_index in range(self.num_tryout):
            if self.vizier:
                self.hidden_dim = self.tryouts[tryout_index]['hidden_dim']
                self.max_step = self.tryouts[tryout_index]['max_step']
                self.gamma = self.tryouts[tryout_index]['gamma']
                self.n_episode = self.tryouts[tryout_index]['n_episode']
                self.buffer_size = self.tryouts[tryout_index]['buffer_size']
                self.batch_size = self.tryouts[tryout_index]['batch_size']
                self.n_epoch = self.tryouts[tryout_index]['n_epoch']
                self.epsilon = self.tryouts[tryout_index]['epsilon']
                self.tau = self.tryouts[tryout_index]['tau']
                self.learning_rate = self.tryouts[tryout_index]['learning_rate']
                
            print('-------\nTRYOUT[{}]: (hidden_dim={}, max_step={}, gamma={}, n_episode={}, buffer_size={}, batch_size={}, n_epoch={}, epsilon={}, tau={}, learning_rate={})'.format(
                tryout_index, self.hidden_dim, self.max_step, self.gamma, self.n_episode, self.buffer_size, self.batch_size, self.n_epoch, self.epsilon, self.tau, self.learning_rate
            ))
                
                
            buff = ReplayBufferGCare(self.buffer_size, observation_space, n_actions, n_tasks)
            model = MTRL_DGN(n_tasks, observation_space, self.hidden_dim, n_actions).cuda()
            model_tar = MTRL_DGN(n_tasks, observation_space, self.hidden_dim, n_actions).cuda()
            optimizer = optim.Adam(model.parameters(), lr = self.learning_rate)
            criterion = nn.BCELoss()
            
            M_Null = torch.Tensor(np.array([np.eye(n_tasks)] * self.batch_size)).cuda()
            M_ZERO = torch.Tensor(np.zeros((self.batch_size, n_tasks, n_tasks))).cuda()
        
            i_episode = 0
            score = 0
            
            while i_episode < self.n_episode:
                if i_episode > 40:
                    self.epsilon -= 0.001
                    if self.epsilon < 0.01:
                        self.epsilon = 0.01
                i_episode+=1
                steps = 0
                obs, adj = env.reset()
                obs = np.resize(obs, (n_tasks, observation_space))
                
                if not self.vizier:
                    log_file_name = self.file_name+str(i)+"Episode
                    episode_summary_writer = tb.SummaryWriter(log_dir='./TB-Logs/'+log_file_name)
                    episode_epoch_count = 0
                
                while steps < self.max_step:
                    steps+=1 
                    # Get the action with forward prop and add the obs, adjs to replay buffer
                    q = model(torch.Tensor(np.array([obs])).cuda(), torch.Tensor(np.array([adj])).cuda())[0,0,:]
                    if np.random.rand() < self.epsilon:
                        action = np.random.randint(n_actions)
                    else:
                        action = q.argmax().item()
                    next_obs, next_adj, reward, terminated = env.step(action)
                    next_obs = np.resize(next_obs, (n_tasks, observation_space))
                    buff.add(np.array(obs),action,reward,np.array(next_obs),adj,next_adj,terminated)

                    obs = next_obs
                    adj = next_adj
                    score += reward

                if i_episode%20==0:
                    print(score)
                    episode_summary_writer.add_scalar("Score/Episode", score, i_episode)
                    if score > self.highest_scores[tryout_index]:
                        self.highest_scores[tryout_index] = score
                    score = 0
                
                # Train the model
                for e in range(self.n_epoch):
                    O,A,R,Next_O,Matrix,Next_Matrix,D = buff.getBatch(self.batch_size)
                    O = torch.Tensor(O).cuda()
                    Matrix = torch.Tensor(Matrix).cuda()
                    Next_O = torch.Tensor(Next_O).cuda()
                    Next_Matrix = torch.Tensor(Next_Matrix).cuda()

                    q_values = model(O, Matrix)
                    q_values = model(O, Matrix)[:,0, :]
                    target_q_values = model_tar(Next_O, Next_Matrix).max(dim = 2)[0][:,0]
                    target_q_values = np.array(target_q_values.cpu().data)
                    expected_q = np.array(q_values.cpu().data)

                    for j in range(self.batch_size):
                        expected_q[j][A[j][0]] = R[j][0] + (1-D[j][0])*self.gamma*target_q_values[j]

                    loss = (q_values - torch.Tensor(expected_q).cuda()).pow(2).mean()
                    episode_summary_writer.add_scalar('Loss', loss, episode_epoch_count)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    if e%10 == 0:
                        with torch.no_grad():
                            for p, p_targ in zip(model.parameters(), model_tar.parameters()):
                                p_targ.data.mul_(self.tau)
                                p_targ.data.add_((1 - self.tau) * p.data)
    
            # Print the highest score to the file
            self.file.write('Tryout[{}]: (hidden_dim={}, max_step={}, gamma={}, n_episode={}, buffer_size={}, batch_size={}, n_epoch={}, epsilon={}, tau={}, learning_rate={}): \t ---> \t Highest Score: {}\n'.format(
                tryout_index, self.hidden_dim, self.max_step, self.gamma, self.n_episode, self.buffer_size, self.batch_size, self.n_epoch, self.epsilon, self.tau, self.learning_rate, self.highest_scores[tryout_index]
            ))
            
            self.file.flush()

In [30]:
training = Training('VIZIER_OUTPUT.txt', True)
training.train()

len(tryouts): 104976
TASK NUMBER  0  DEST:  36 24
TASK NUMBER  1  DEST:  27 4
-------
TRYOUT[0]: (hidden_dim=32, max_step=500, gamma=0.99, n_episode=1000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  5 26
TASK NUMBER  1  DEST:  28 12
10.798059814414934
TASK NUMBER  0  DEST:  33 48
TASK NUMBER  1  DEST:  41 24
7.736252950302205
TASK NUMBER  0  DEST:  16 48
TASK NUMBER  1  DEST:  5 4
18.924235715113483
TASK NUMBER  0  DEST:  31 33
TASK NUMBER  1  DEST:  30 18
9.222761257257707
TASK NUMBER  0  DEST:  22 39
TASK NUMBER  1  DEST:  6 30
13.937725752974556
TASK NUMBER  0  DEST:  19 7
TASK NUMBER  1  DEST:  36 6
14.771769176326092
TASK NUMBER  0  DEST:  30 15
TASK NUMBER  1  DEST:  1 44
7.430241715576467
TASK NUMBER  0  DEST:  29 39
TASK NUMBER  1  DEST:  7 28
7.693890422074836
TASK NUMBER  0  DEST:  47 36
TASK NUMBER  1  DEST:  22 49
19.410906983987868
TASK NUMBER  0  DEST:  12 29
TASK NUMBER  1  DEST:  31 41
26.5389376686621

TASK NUMBER  0  DEST:  3 15
TASK NUMBER  1  DEST:  49 19
17.782958504894243
TASK NUMBER  0  DEST:  38 11
TASK NUMBER  1  DEST:  37 35
14.120024327235349
TASK NUMBER  0  DEST:  9 12
TASK NUMBER  1  DEST:  39 21
28.08103104665062
TASK NUMBER  0  DEST:  23 12
TASK NUMBER  1  DEST:  5 10
11.072118042642707
TASK NUMBER  0  DEST:  0 32
TASK NUMBER  1  DEST:  5 11
7.400956404482855
-------
TRYOUT[9]: (hidden_dim=32, max_step=10000, gamma=0.99, n_episode=1000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  17 36
TASK NUMBER  1  DEST:  27 21
7.937045840450337
TASK NUMBER  0  DEST:  12 2
TASK NUMBER  1  DEST:  34 22
8.807365145655956
TASK NUMBER  0  DEST:  24 1
TASK NUMBER  1  DEST:  10 47
8.68409066157631
TASK NUMBER  0  DEST:  18 33
TASK NUMBER  1  DEST:  29 37
32.73556375749102
TASK NUMBER  0  DEST:  5 32
TASK NUMBER  1  DEST:  36 25
15.093109716863092
TASK NUMBER  0  DEST:  2 10
TASK NUMBER  1  DEST:  49 10
12.513019612736606


TASK NUMBER  0  DEST:  40 13
TASK NUMBER  1  DEST:  21 41
8.645335769173053
TASK NUMBER  0  DEST:  31 21
TASK NUMBER  1  DEST:  21 36
26.391422231603865
TASK NUMBER  0  DEST:  5 17
TASK NUMBER  1  DEST:  12 17
13.269305045744474
TASK NUMBER  0  DEST:  6 28
TASK NUMBER  1  DEST:  33 11
14.08874388101183
TASK NUMBER  0  DEST:  21 10
TASK NUMBER  1  DEST:  43 40
9.78764337181089
TASK NUMBER  0  DEST:  31 27
TASK NUMBER  1  DEST:  44 29
11.608514963860191
TASK NUMBER  0  DEST:  42 21
TASK NUMBER  1  DEST:  12 35
22.38514700943878
TASK NUMBER  0  DEST:  1 30
TASK NUMBER  1  DEST:  13 36
17.803500167817337
-------
TRYOUT[18]: (hidden_dim=32, max_step=5000, gamma=0.999, n_episode=1000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  24 5
TASK NUMBER  1  DEST:  38 14
5.6511815705460595
TASK NUMBER  0  DEST:  25 19
TASK NUMBER  1  DEST:  17 48
9.974069033206813
TASK NUMBER  0  DEST:  15 18
TASK NUMBER  1  DEST:  12 28
19.411358717

TASK NUMBER  0  DEST:  21 31
TASK NUMBER  1  DEST:  11 13
20.245787328990193
TASK NUMBER  0  DEST:  21 40
TASK NUMBER  1  DEST:  41 36
16.515698083158988
TASK NUMBER  0  DEST:  42 18
TASK NUMBER  1  DEST:  17 33
9.821570742187063
TASK NUMBER  0  DEST:  25 42
TASK NUMBER  1  DEST:  2 22
12.425951682380472
TASK NUMBER  0  DEST:  38 23
TASK NUMBER  1  DEST:  18 28
11.173763662027964
TASK NUMBER  0  DEST:  6 6
TASK NUMBER  1  DEST:  30 40
8.986805701915987
TASK NUMBER  0  DEST:  7 11
TASK NUMBER  1  DEST:  6 7
10.541921680371145
TASK NUMBER  0  DEST:  18 48
TASK NUMBER  1  DEST:  16 25
5.487949368487205
TASK NUMBER  0  DEST:  19 21
TASK NUMBER  1  DEST:  26 6
6.532282785160202
-------
TRYOUT[27]: (hidden_dim=32, max_step=1000, gamma=0.9999, n_episode=1000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  43 12
TASK NUMBER  1  DEST:  17 20
9.001482260504728
TASK NUMBER  0  DEST:  43 33
TASK NUMBER  1  DEST:  28 41
15.1471158319

-------
TRYOUT[35]: (hidden_dim=128, max_step=10000, gamma=0.9999, n_episode=1000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  23 14
TASK NUMBER  1  DEST:  16 21
8.019814468643709
TASK NUMBER  0  DEST:  28 11
TASK NUMBER  1  DEST:  39 22
9.1750569018488
TASK NUMBER  0  DEST:  11 25
TASK NUMBER  1  DEST:  3 22
8.100542746920567
TASK NUMBER  0  DEST:  46 33
TASK NUMBER  1  DEST:  48 22
45.24256234094425
TASK NUMBER  0  DEST:  29 32
TASK NUMBER  1  DEST:  48 38
8.320554296565929
TASK NUMBER  0  DEST:  23 43
TASK NUMBER  1  DEST:  34 6
11.606130943434312
TASK NUMBER  0  DEST:  29 21
TASK NUMBER  1  DEST:  34 40
16.592347782063335
TASK NUMBER  0  DEST:  38 2
TASK NUMBER  1  DEST:  6 12
6.250610908827896
TASK NUMBER  0  DEST:  24 5
TASK NUMBER  1  DEST:  8 5
22.98554627094043
TASK NUMBER  0  DEST:  0 38
TASK NUMBER  1  DEST:  32 26
10.789530224736177
-------
TRYOUT[36]: (hidden_dim=32, max_step=500, gamma=0.99, n_episode=20

8.302628056076655
TASK NUMBER  0  DEST:  31 48
TASK NUMBER  1  DEST:  3 6
6.863129205696584
TASK NUMBER  0  DEST:  39 32
TASK NUMBER  1  DEST:  4 10
12.95101299527751
-------
TRYOUT[44]: (hidden_dim=128, max_step=5000, gamma=0.99, n_episode=2000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  20 24
TASK NUMBER  1  DEST:  31 0
23.66595441387116
TASK NUMBER  0  DEST:  13 22
TASK NUMBER  1  DEST:  32 18
11.682919849853786
TASK NUMBER  0  DEST:  34 30
TASK NUMBER  1  DEST:  24 4
6.686493226689904
TASK NUMBER  0  DEST:  32 36
TASK NUMBER  1  DEST:  12 32
16.875570544513565
TASK NUMBER  0  DEST:  30 38
TASK NUMBER  1  DEST:  28 48
7.2730867347052195
TASK NUMBER  0  DEST:  9 44
TASK NUMBER  1  DEST:  43 13
14.52236115564451
TASK NUMBER  0  DEST:  33 41
TASK NUMBER  1  DEST:  20 40
12.327799316551669
TASK NUMBER  0  DEST:  28 11
TASK NUMBER  1  DEST:  20 4
19.980812184419335
TASK NUMBER  0  DEST:  12 1
TASK NUMBER  1  DEST:  25 

7.5913714356101645
TASK NUMBER  0  DEST:  7 34
TASK NUMBER  1  DEST:  35 25
16.460639888270805
TASK NUMBER  0  DEST:  37 18
TASK NUMBER  1  DEST:  22 34
12.966423669746229
TASK NUMBER  0  DEST:  35 24
TASK NUMBER  1  DEST:  2 7
12.381741188417875
TASK NUMBER  0  DEST:  16 19
TASK NUMBER  1  DEST:  13 44
8.121734771741513
-------
TRYOUT[53]: (hidden_dim=128, max_step=1000, gamma=0.999, n_episode=2000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  47 28
TASK NUMBER  1  DEST:  7 10
9.50385567902437
TASK NUMBER  0  DEST:  2 36
TASK NUMBER  1  DEST:  17 7
5.239555378957874
TASK NUMBER  0  DEST:  21 18
TASK NUMBER  1  DEST:  17 40
17.478179874607306
TASK NUMBER  0  DEST:  44 44
TASK NUMBER  1  DEST:  30 10
6.978104486363177
TASK NUMBER  0  DEST:  38 47
TASK NUMBER  1  DEST:  32 31
7.250145943690546
TASK NUMBER  0  DEST:  24 17
TASK NUMBER  1  DEST:  38 35
9.233428647241013
TASK NUMBER  0  DEST:  41 11
TASK NUMBER  1  DEST:  1

7.884811115614954
TASK NUMBER  0  DEST:  49 18
TASK NUMBER  1  DEST:  41 18
8.904136831469486
TASK NUMBER  0  DEST:  10 22
TASK NUMBER  1  DEST:  35 2
7.987145264026186
TASK NUMBER  0  DEST:  6 28
TASK NUMBER  1  DEST:  7 24
7.840668506141436
TASK NUMBER  0  DEST:  47 10
TASK NUMBER  1  DEST:  19 24
Task  1  completed at step  65
23.3691128300997
TASK NUMBER  0  DEST:  27 3
TASK NUMBER  1  DEST:  0 25
5.636545648265042
TASK NUMBER  0  DEST:  16 39
TASK NUMBER  1  DEST:  2 36
5.887596387934137
TASK NUMBER  0  DEST:  46 33
TASK NUMBER  1  DEST:  15 15
Task  0  completed at step  92
31.909457612414098
-------
TRYOUT[62]: (hidden_dim=128, max_step=500, gamma=0.9999, n_episode=2000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  28 7
TASK NUMBER  1  DEST:  26 33
13.059545320555735
TASK NUMBER  0  DEST:  39 35
TASK NUMBER  1  DEST:  29 36
21.635818651185236
TASK NUMBER  0  DEST:  40 26
TASK NUMBER  1  DEST:  5 2
8.481806186611

20.35527902231965
-------
TRYOUT[70]: (hidden_dim=64, max_step=10000, gamma=0.9999, n_episode=2000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  18 41
TASK NUMBER  1  DEST:  44 37
18.5482945379188
TASK NUMBER  0  DEST:  31 26
TASK NUMBER  1  DEST:  44 17
10.056192659815643
TASK NUMBER  0  DEST:  36 31
TASK NUMBER  1  DEST:  10 13
11.179877493160626
TASK NUMBER  0  DEST:  41 36
TASK NUMBER  1  DEST:  39 24
16.156902706166186
TASK NUMBER  0  DEST:  37 3
TASK NUMBER  1  DEST:  3 46
12.104104591552177
TASK NUMBER  0  DEST:  7 0
TASK NUMBER  1  DEST:  49 0
5.936743277696898
TASK NUMBER  0  DEST:  29 37
TASK NUMBER  1  DEST:  39 10
8.333986232331885
TASK NUMBER  0  DEST:  33 0
TASK NUMBER  1  DEST:  30 16
8.088793720525489
TASK NUMBER  0  DEST:  5 39
TASK NUMBER  1  DEST:  10 4
11.26680961386783
TASK NUMBER  0  DEST:  12 33
TASK NUMBER  1  DEST:  23 2
10.547516514790926
-------
TRYOUT[71]: (hidden_dim=128, max_step=10000, ga

14.651170893126965
TASK NUMBER  0  DEST:  20 15
TASK NUMBER  1  DEST:  7 48
13.957525679060998
TASK NUMBER  0  DEST:  37 31
TASK NUMBER  1  DEST:  0 49
5.777685177134026
-------
TRYOUT[79]: (hidden_dim=64, max_step=5000, gamma=0.99, n_episode=5000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  31 49
TASK NUMBER  1  DEST:  38 20
14.779272427857162
TASK NUMBER  0  DEST:  18 23
TASK NUMBER  1  DEST:  41 47
12.902052921732185
TASK NUMBER  0  DEST:  1 32
TASK NUMBER  1  DEST:  2 0
5.180915811696496
TASK NUMBER  0  DEST:  4 13
TASK NUMBER  1  DEST:  36 3
4.902394117591553
TASK NUMBER  0  DEST:  21 22
TASK NUMBER  1  DEST:  41 47
9.518814497535017
TASK NUMBER  0  DEST:  46 6
TASK NUMBER  1  DEST:  8 5
10.05763703253637
TASK NUMBER  0  DEST:  25 36
TASK NUMBER  1  DEST:  20 36
12.001672477484869
TASK NUMBER  0  DEST:  12 35
TASK NUMBER  1  DEST:  27 15
15.124387919996186
TASK NUMBER  0  DEST:  44 41
TASK NUMBER  1  DEST:  40 33

41.550544240214485
TASK NUMBER  0  DEST:  6 34
TASK NUMBER  1  DEST:  16 49
Task  1  completed at step  80
37.20049228618771
TASK NUMBER  0  DEST:  42 27
TASK NUMBER  1  DEST:  22 6
18.037352784830865
TASK NUMBER  0  DEST:  3 21
TASK NUMBER  1  DEST:  45 6
6.24997488100928
TASK NUMBER  0  DEST:  37 11
TASK NUMBER  1  DEST:  12 46
18.212811442501547
-------
TRYOUT[88]: (hidden_dim=64, max_step=1000, gamma=0.999, n_episode=5000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  49 11
TASK NUMBER  1  DEST:  41 30
7.43545613843581
TASK NUMBER  0  DEST:  21 25
TASK NUMBER  1  DEST:  11 6
9.579325291478083
TASK NUMBER  0  DEST:  4 2
TASK NUMBER  1  DEST:  48 20
13.455798196229933
TASK NUMBER  0  DEST:  25 4
TASK NUMBER  1  DEST:  4 17
12.396128047666059
TASK NUMBER  0  DEST:  44 11
TASK NUMBER  1  DEST:  28 16
6.831988690396425
TASK NUMBER  0  DEST:  32 46
TASK NUMBER  1  DEST:  16 31
5.892758119256468
TASK NUMBER  0  DEST:  4 25

9.80791190429704
TASK NUMBER  0  DEST:  45 3
TASK NUMBER  1  DEST:  28 45
10.376794527660248
TASK NUMBER  0  DEST:  24 22
TASK NUMBER  1  DEST:  44 46
14.399710238076132
TASK NUMBER  0  DEST:  44 21
TASK NUMBER  1  DEST:  10 42
13.453999430701536
TASK NUMBER  0  DEST:  47 6
TASK NUMBER  1  DEST:  25 19
7.192049538930243
TASK NUMBER  0  DEST:  21 22
TASK NUMBER  1  DEST:  13 37
12.454904451391139
TASK NUMBER  0  DEST:  43 21
TASK NUMBER  1  DEST:  20 32
10.231270890371276
TASK NUMBER  0  DEST:  35 44
TASK NUMBER  1  DEST:  30 2
7.531262184936885
TASK NUMBER  0  DEST:  23 18
TASK NUMBER  1  DEST:  5 13
13.910503713546056
-------
TRYOUT[97]: (hidden_dim=64, max_step=500, gamma=0.9999, n_episode=5000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  22 29
TASK NUMBER  1  DEST:  25 46
6.4703788218965395
TASK NUMBER  0  DEST:  19 28
TASK NUMBER  1  DEST:  16 22
7.648646897726035
TASK NUMBER  0  DEST:  27 32
TASK NUMBER  1  DEST:

13.750231282929967
TASK NUMBER  0  DEST:  16 43
TASK NUMBER  1  DEST:  38 4
11.13115576122387
TASK NUMBER  0  DEST:  11 22
TASK NUMBER  1  DEST:  11 5
16.935298874712487
TASK NUMBER  0  DEST:  23 30
TASK NUMBER  1  DEST:  33 25
12.423900333735228
TASK NUMBER  0  DEST:  39 2
TASK NUMBER  1  DEST:  1 4
11.37801726915309
TASK NUMBER  0  DEST:  18 21
TASK NUMBER  1  DEST:  34 49
7.80468676111139
TASK NUMBER  0  DEST:  41 40
TASK NUMBER  1  DEST:  34 35
10.705977305962573
TASK NUMBER  0  DEST:  16 4
TASK NUMBER  1  DEST:  38 4
6.582645385979365
TASK NUMBER  0  DEST:  4 4
TASK NUMBER  1  DEST:  13 24
32.558437870469234
TASK NUMBER  0  DEST:  2 16
TASK NUMBER  1  DEST:  48 37
11.600209781082972
-------
TRYOUT[106]: (hidden_dim=64, max_step=10000, gamma=0.9999, n_episode=5000, buffer_size=65000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  38 11
TASK NUMBER  1  DEST:  30 46
5.711118133231565
TASK NUMBER  0  DEST:  1 32
TASK NUMBER  1  DEST:  22 

12.438561621995449
TASK NUMBER  0  DEST:  43 23
TASK NUMBER  1  DEST:  22 16
8.78864038855017
TASK NUMBER  0  DEST:  12 16
TASK NUMBER  1  DEST:  33 27
21.80954037475913
TASK NUMBER  0  DEST:  24 10
TASK NUMBER  1  DEST:  20 34
Task  0  completed at step  48
24.67826486924419
TASK NUMBER  0  DEST:  27 27
TASK NUMBER  1  DEST:  34 11
9.132442419463198
TASK NUMBER  0  DEST:  44 25
TASK NUMBER  1  DEST:  31 44
5.628604152974789
TASK NUMBER  0  DEST:  29 22
TASK NUMBER  1  DEST:  7 16
31.801082911299172
TASK NUMBER  0  DEST:  30 9
TASK NUMBER  1  DEST:  32 18
8.65300928607204
TASK NUMBER  0  DEST:  31 26
TASK NUMBER  1  DEST:  27 8
16.88617828481274
TASK NUMBER  0  DEST:  49 45
TASK NUMBER  1  DEST:  21 33
7.641540961234563
-------
TRYOUT[115]: (hidden_dim=64, max_step=5000, gamma=0.99, n_episode=1000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  4 1
TASK NUMBER  1  DEST:  33 26
21.57447174349044
TASK NUMBER  0  DEST:  7 

TASK NUMBER  0  DEST:  27 44
TASK NUMBER  1  DEST:  2 11
9.483218878230728
-------
TRYOUT[123]: (hidden_dim=32, max_step=1000, gamma=0.999, n_episode=1000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  2 13
TASK NUMBER  1  DEST:  21 25
8.004577919570368
TASK NUMBER  0  DEST:  24 9
TASK NUMBER  1  DEST:  46 6
6.329121489275901
TASK NUMBER  0  DEST:  6 36
TASK NUMBER  1  DEST:  42 45
17.503576722014085
TASK NUMBER  0  DEST:  6 15
TASK NUMBER  1  DEST:  8 39
13.18386671288865
TASK NUMBER  0  DEST:  8 41
TASK NUMBER  1  DEST:  29 3
6.732671129841714
TASK NUMBER  0  DEST:  42 33
TASK NUMBER  1  DEST:  37 44
6.659233280494884
TASK NUMBER  0  DEST:  43 45
TASK NUMBER  1  DEST:  22 42
5.205206853679937
TASK NUMBER  0  DEST:  40 31
TASK NUMBER  1  DEST:  48 17
7.299412376495655
TASK NUMBER  0  DEST:  10 25
TASK NUMBER  1  DEST:  6 34
Task  0  completed at step  14
37.81820863105957
TASK NUMBER  0  DEST:  29 42
TASK NUMBER  1  D

TASK NUMBER  0  DEST:  20 32
TASK NUMBER  1  DEST:  29 14
Task  0  completed at step  61
25.77444000864088
TASK NUMBER  0  DEST:  35 28
TASK NUMBER  1  DEST:  18 37
9.719071318702342
TASK NUMBER  0  DEST:  1 19
TASK NUMBER  1  DEST:  44 40
10.119832852159389
TASK NUMBER  0  DEST:  24 41
TASK NUMBER  1  DEST:  36 47
6.5349000480529424
TASK NUMBER  0  DEST:  24 34
TASK NUMBER  1  DEST:  30 45
11.426864951401535
-------
TRYOUT[132]: (hidden_dim=32, max_step=500, gamma=0.9999, n_episode=1000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  36 21
TASK NUMBER  1  DEST:  44 26
9.273270691695773
TASK NUMBER  0  DEST:  44 49
TASK NUMBER  1  DEST:  48 29
8.783754140197527
TASK NUMBER  0  DEST:  45 43
TASK NUMBER  1  DEST:  12 45
11.10326453610728
TASK NUMBER  0  DEST:  45 35
TASK NUMBER  1  DEST:  31 44
9.640359010747197
TASK NUMBER  0  DEST:  19 22
TASK NUMBER  1  DEST:  4 9
22.292514773880143
TASK NUMBER  0  DEST:  22 3
TASK NUM

TASK NUMBER  0  DEST:  49 1
TASK NUMBER  1  DEST:  47 5
Task  0  completed at step  77
53.290484855252146
TASK NUMBER  0  DEST:  48 31
TASK NUMBER  1  DEST:  7 46
12.779513358177528
TASK NUMBER  0  DEST:  1 23
TASK NUMBER  1  DEST:  47 41
7.844940959524424
TASK NUMBER  0  DEST:  16 0
TASK NUMBER  1  DEST:  39 47
9.903640490189542
TASK NUMBER  0  DEST:  17 27
TASK NUMBER  1  DEST:  34 19
21.51303543056964
TASK NUMBER  0  DEST:  10 25
TASK NUMBER  1  DEST:  23 35
20.217486379023526
TASK NUMBER  0  DEST:  20 15
TASK NUMBER  1  DEST:  34 4
17.16287028794884
TASK NUMBER  0  DEST:  46 1
TASK NUMBER  1  DEST:  15 13
12.340789677117778
TASK NUMBER  0  DEST:  0 48
TASK NUMBER  1  DEST:  47 44
5.3764803967721
-------
TRYOUT[141]: (hidden_dim=32, max_step=10000, gamma=0.9999, n_episode=1000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  34 5
TASK NUMBER  1  DEST:  31 29
9.002359905801073
TASK NUMBER  0  DEST:  38 39
TASK NUMBER  

-------
TRYOUT[149]: (hidden_dim=128, max_step=1000, gamma=0.99, n_episode=2000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  7 40
TASK NUMBER  1  DEST:  44 22
16.503879491221856
TASK NUMBER  0  DEST:  2 47
TASK NUMBER  1  DEST:  27 17
12.07361725178518
TASK NUMBER  0  DEST:  24 48
TASK NUMBER  1  DEST:  20 18
8.411397440175062
TASK NUMBER  0  DEST:  42 28
TASK NUMBER  1  DEST:  13 33
11.196037203425227
TASK NUMBER  0  DEST:  35 14
TASK NUMBER  1  DEST:  5 48
Task  0  completed at step  11
19.417374881600335
TASK NUMBER  0  DEST:  36 15
TASK NUMBER  1  DEST:  37 45
11.549834476806017
TASK NUMBER  0  DEST:  31 26
TASK NUMBER  1  DEST:  23 18
19.734159256574525
TASK NUMBER  0  DEST:  37 4
TASK NUMBER  1  DEST:  18 30
14.00621329579799
TASK NUMBER  0  DEST:  26 29
TASK NUMBER  1  DEST:  21 49
10.696392430193574
TASK NUMBER  0  DEST:  31 11
TASK NUMBER  1  DEST:  28 8
11.164625425173515
-------
TRYOUT[150]: (hidden_dim=32

TASK NUMBER  0  DEST:  1 6
TASK NUMBER  1  DEST:  30 34
9.554887433268174
TASK NUMBER  0  DEST:  4 20
TASK NUMBER  1  DEST:  31 1
15.849617888355171
TASK NUMBER  0  DEST:  24 32
TASK NUMBER  1  DEST:  35 8
15.254413480037403
-------
TRYOUT[158]: (hidden_dim=128, max_step=500, gamma=0.999, n_episode=2000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  7 36
TASK NUMBER  1  DEST:  7 44
35.78097380143305
TASK NUMBER  0  DEST:  28 4
TASK NUMBER  1  DEST:  39 3
9.172927337095329
TASK NUMBER  0  DEST:  12 45
TASK NUMBER  1  DEST:  39 46
8.692120468192508
TASK NUMBER  0  DEST:  35 6
TASK NUMBER  1  DEST:  9 49
8.655756665650589
TASK NUMBER  0  DEST:  35 6
TASK NUMBER  1  DEST:  8 10
9.573468166273601
TASK NUMBER  0  DEST:  24 9
TASK NUMBER  1  DEST:  24 11
11.021319535847205
TASK NUMBER  0  DEST:  1 15
TASK NUMBER  1  DEST:  7 36
10.912673041136525
TASK NUMBER  0  DEST:  2 4
TASK NUMBER  1  DEST:  47 26
31.96923468400284
TASK N

TASK NUMBER  0  DEST:  13 32
TASK NUMBER  1  DEST:  19 22
10.300496957142022
TASK NUMBER  0  DEST:  45 27
TASK NUMBER  1  DEST:  19 39
35.206969268502434
TASK NUMBER  0  DEST:  15 49
TASK NUMBER  1  DEST:  31 38
17.242225994408532
TASK NUMBER  0  DEST:  43 47
TASK NUMBER  1  DEST:  25 6
10.908265258918576
TASK NUMBER  0  DEST:  38 10
TASK NUMBER  1  DEST:  2 37
11.72733620642139
-------
TRYOUT[167]: (hidden_dim=128, max_step=10000, gamma=0.999, n_episode=2000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  43 38
TASK NUMBER  1  DEST:  22 10
9.649665219071862
TASK NUMBER  0  DEST:  25 49
TASK NUMBER  1  DEST:  11 17
25.436628779325424
TASK NUMBER  0  DEST:  39 0
TASK NUMBER  1  DEST:  0 17
8.948538978466097
TASK NUMBER  0  DEST:  11 31
TASK NUMBER  1  DEST:  3 38
16.977563027192886
TASK NUMBER  0  DEST:  3 18
TASK NUMBER  1  DEST:  2 19
7.422135112096128
TASK NUMBER  0  DEST:  31 4
TASK NUMBER  1  DEST:  12 22
14.3538214

TASK NUMBER  0  DEST:  6 0
TASK NUMBER  1  DEST:  38 5
10.328625358576128
TASK NUMBER  0  DEST:  15 34
TASK NUMBER  1  DEST:  43 43
20.74176692123872
TASK NUMBER  0  DEST:  26 35
TASK NUMBER  1  DEST:  21 26
12.009848831875045
TASK NUMBER  0  DEST:  16 41
TASK NUMBER  1  DEST:  30 29
18.126464826664286
TASK NUMBER  0  DEST:  18 5
TASK NUMBER  1  DEST:  35 30
17.658001539810964
TASK NUMBER  0  DEST:  48 47
TASK NUMBER  1  DEST:  38 31
11.412830684467474
TASK NUMBER  0  DEST:  29 8
TASK NUMBER  1  DEST:  14 1
6.9675340467220455
TASK NUMBER  0  DEST:  45 7
TASK NUMBER  1  DEST:  13 21
36.27051450566944
TASK NUMBER  0  DEST:  33 13
TASK NUMBER  1  DEST:  42 0
5.875176711082375
-------
TRYOUT[176]: (hidden_dim=128, max_step=5000, gamma=0.9999, n_episode=2000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  18 35
TASK NUMBER  1  DEST:  28 39
14.62586305941573
TASK NUMBER  0  DEST:  15 9
TASK NUMBER  1  DEST:  5 34
5.2966369562

TASK NUMBER  0  DEST:  16 49
TASK NUMBER  1  DEST:  42 40
5.0204187137854985
TASK NUMBER  0  DEST:  29 38
TASK NUMBER  1  DEST:  12 2
27.667759898594447
TASK NUMBER  0  DEST:  9 10
TASK NUMBER  1  DEST:  12 20
7.375037692413562
TASK NUMBER  0  DEST:  20 37
TASK NUMBER  1  DEST:  40 20
10.427044446664208
TASK NUMBER  0  DEST:  27 36
TASK NUMBER  1  DEST:  43 22
8.189749205671676
TASK NUMBER  0  DEST:  46 11
TASK NUMBER  1  DEST:  3 29
6.254692465047345
TASK NUMBER  0  DEST:  42 48
TASK NUMBER  1  DEST:  30 2
6.436429112154755
TASK NUMBER  0  DEST:  6 17
TASK NUMBER  1  DEST:  19 1
6.33867853157599
TASK NUMBER  0  DEST:  40 13
TASK NUMBER  1  DEST:  7 18
Task  1  completed at step  3
17.860751053725583
-------
TRYOUT[185]: (hidden_dim=128, max_step=1000, gamma=0.99, n_episode=5000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  23 25
TASK NUMBER  1  DEST:  37 49
7.5833951747089685
TASK NUMBER  0  DEST:  23 34
TASK NUMBER 

-------
TRYOUT[193]: (hidden_dim=64, max_step=500, gamma=0.999, n_episode=5000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  40 46
TASK NUMBER  1  DEST:  15 18
Task  1  completed at step  31
19.92255893415952
TASK NUMBER  0  DEST:  47 33
TASK NUMBER  1  DEST:  20 17
9.750747005435699
TASK NUMBER  0  DEST:  19 3
TASK NUMBER  1  DEST:  49 30
7.545280246012037
TASK NUMBER  0  DEST:  5 38
TASK NUMBER  1  DEST:  16 17
18.989325986289696
TASK NUMBER  0  DEST:  31 18
TASK NUMBER  1  DEST:  16 37
13.184671284147857
TASK NUMBER  0  DEST:  8 39
TASK NUMBER  1  DEST:  27 48
13.784946420683747
TASK NUMBER  0  DEST:  4 2
TASK NUMBER  1  DEST:  13 40
Task  0  completed at step  56
30.388741948247226
TASK NUMBER  0  DEST:  24 6
TASK NUMBER  1  DEST:  34 46
8.271325262858083
TASK NUMBER  0  DEST:  42 19
TASK NUMBER  1  DEST:  47 41
8.820162345047077
TASK NUMBER  0  DEST:  24 3
TASK NUMBER  1  DEST:  48 28
16.600530335364304
-------
T

TASK NUMBER  0  DEST:  3 38
TASK NUMBER  1  DEST:  38 36
12.965894522330105
TASK NUMBER  0  DEST:  36 22
TASK NUMBER  1  DEST:  5 34
14.794820362643122
TASK NUMBER  0  DEST:  13 23
TASK NUMBER  1  DEST:  11 0
8.98796738712198
TASK NUMBER  0  DEST:  23 47
TASK NUMBER  1  DEST:  39 33
20.956779054700018
TASK NUMBER  0  DEST:  13 34
TASK NUMBER  1  DEST:  45 1
7.544414382574254
-------
TRYOUT[202]: (hidden_dim=64, max_step=10000, gamma=0.999, n_episode=5000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  12 14
TASK NUMBER  1  DEST:  15 44
22.059432273618917
TASK NUMBER  0  DEST:  1 36
TASK NUMBER  1  DEST:  2 44
7.48228830225071
TASK NUMBER  0  DEST:  25 9
TASK NUMBER  1  DEST:  36 34
10.880900877512701
TASK NUMBER  0  DEST:  0 29
TASK NUMBER  1  DEST:  12 42
7.6336347394817645
TASK NUMBER  0  DEST:  8 41
TASK NUMBER  1  DEST:  18 42
6.96797843330447
TASK NUMBER  0  DEST:  3 17
TASK NUMBER  1  DEST:  12 44
10.4576996599561

TASK NUMBER  0  DEST:  29 35
TASK NUMBER  1  DEST:  15 26
23.036605392728216
TASK NUMBER  0  DEST:  11 6
TASK NUMBER  1  DEST:  5 0
29.41404628089745
TASK NUMBER  0  DEST:  13 45
TASK NUMBER  1  DEST:  34 48
5.6494108328384405
TASK NUMBER  0  DEST:  48 47
TASK NUMBER  1  DEST:  15 36
6.479464718723183
TASK NUMBER  0  DEST:  2 14
TASK NUMBER  1  DEST:  25 25
18.645128918273016
TASK NUMBER  0  DEST:  30 18
TASK NUMBER  1  DEST:  6 14
22.139131178397374
TASK NUMBER  0  DEST:  13 26
TASK NUMBER  1  DEST:  45 31
8.628193023932358
TASK NUMBER  0  DEST:  35 0
TASK NUMBER  1  DEST:  21 20
19.153664755168634
TASK NUMBER  0  DEST:  39 9
TASK NUMBER  1  DEST:  2 40
10.15120843967605
-------
TRYOUT[211]: (hidden_dim=64, max_step=5000, gamma=0.9999, n_episode=5000, buffer_size=100000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  49 42
TASK NUMBER  1  DEST:  35 38
26.594876727518837
TASK NUMBER  0  DEST:  19 35
TASK NUMBER  1  DEST:  43 42
25.60528898

-------
TRYOUT[219]: (hidden_dim=32, max_step=1000, gamma=0.99, n_episode=1000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  16 39
TASK NUMBER  1  DEST:  44 47
15.36950779166516
TASK NUMBER  0  DEST:  37 24
TASK NUMBER  1  DEST:  41 33
8.63101625505436
TASK NUMBER  0  DEST:  10 45
TASK NUMBER  1  DEST:  19 38
5.221992901483714
TASK NUMBER  0  DEST:  25 12
TASK NUMBER  1  DEST:  4 45
9.860615641168696
TASK NUMBER  0  DEST:  49 34
TASK NUMBER  1  DEST:  3 13
9.329620364280583
TASK NUMBER  0  DEST:  49 15
TASK NUMBER  1  DEST:  49 5
14.95319979722902
TASK NUMBER  0  DEST:  37 5
TASK NUMBER  1  DEST:  17 9
6.7832548203953795
TASK NUMBER  0  DEST:  12 33
TASK NUMBER  1  DEST:  17 44
9.584067307660241
TASK NUMBER  0  DEST:  14 19
TASK NUMBER  1  DEST:  25 28
16.663157448786695
TASK NUMBER  0  DEST:  37 47
TASK NUMBER  1  DEST:  36 28
20.156249564764355
-------
TRYOUT[220]: (hidden_dim=64, max_step=1000, gamma=0.99, n_epis

TASK NUMBER  0  DEST:  47 41
TASK NUMBER  1  DEST:  2 7
Task  1  completed at step  13
58.128190235604734
TASK NUMBER  0  DEST:  17 43
TASK NUMBER  1  DEST:  1 14
13.836232806616497
TASK NUMBER  0  DEST:  46 37
TASK NUMBER  1  DEST:  24 34
14.983354736488733
-------
TRYOUT[228]: (hidden_dim=32, max_step=500, gamma=0.999, n_episode=1000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  48 15
TASK NUMBER  1  DEST:  40 9
36.537044122888645
TASK NUMBER  0  DEST:  32 3
TASK NUMBER  1  DEST:  37 44
12.923113236954542
TASK NUMBER  0  DEST:  42 15
TASK NUMBER  1  DEST:  14 4
6.502560398025513
TASK NUMBER  0  DEST:  44 11
TASK NUMBER  1  DEST:  34 46
21.325107430417933
TASK NUMBER  0  DEST:  28 32
TASK NUMBER  1  DEST:  14 5
9.663890662141498
TASK NUMBER  0  DEST:  49 46
TASK NUMBER  1  DEST:  7 20
11.851956849536702
TASK NUMBER  0  DEST:  34 4
TASK NUMBER  1  DEST:  4 3
Task  0  completed at step  104
23.62420594969677
TASK NUM

TASK NUMBER  0  DEST:  18 24
TASK NUMBER  1  DEST:  27 7
11.715189055480764
TASK NUMBER  0  DEST:  30 12
TASK NUMBER  1  DEST:  34 49
7.721286216439564
TASK NUMBER  0  DEST:  28 7
TASK NUMBER  1  DEST:  8 21
12.704563228962103
TASK NUMBER  0  DEST:  23 24
TASK NUMBER  1  DEST:  9 20
10.22681112317349
TASK NUMBER  0  DEST:  48 17
TASK NUMBER  1  DEST:  29 20
10.848018237581972
TASK NUMBER  0  DEST:  33 24
TASK NUMBER  1  DEST:  29 26
10.782891868088742
TASK NUMBER  0  DEST:  15 16
TASK NUMBER  1  DEST:  28 6
20.750913442245977
-------
TRYOUT[237]: (hidden_dim=32, max_step=10000, gamma=0.999, n_episode=1000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  47 37
TASK NUMBER  1  DEST:  37 28
7.692909987183743
TASK NUMBER  0  DEST:  39 40
TASK NUMBER  1  DEST:  34 13
6.061705183118459
TASK NUMBER  0  DEST:  10 39
TASK NUMBER  1  DEST:  14 25
6.68130253016708
TASK NUMBER  0  DEST:  43 43
TASK NUMBER  1  DEST:  40 26
20.86432

TASK NUMBER  0  DEST:  31 13
TASK NUMBER  1  DEST:  30 43
13.552544522282071
TASK NUMBER  0  DEST:  15 23
TASK NUMBER  1  DEST:  40 30
14.465545581018418
TASK NUMBER  0  DEST:  42 47
TASK NUMBER  1  DEST:  37 41
Task  0  completed at step  97
29.380253716352303
TASK NUMBER  0  DEST:  32 36
TASK NUMBER  1  DEST:  28 24
9.893537483168133
TASK NUMBER  0  DEST:  19 24
TASK NUMBER  1  DEST:  21 1
7.610476245062545
TASK NUMBER  0  DEST:  36 31
TASK NUMBER  1  DEST:  45 37
6.742403264301869
TASK NUMBER  0  DEST:  24 39
TASK NUMBER  1  DEST:  13 4
16.304271138714885
TASK NUMBER  0  DEST:  16 30
TASK NUMBER  1  DEST:  24 9
10.426262291299546
TASK NUMBER  0  DEST:  41 23
TASK NUMBER  1  DEST:  33 39
6.866225893697497
-------
TRYOUT[246]: (hidden_dim=32, max_step=5000, gamma=0.9999, n_episode=1000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  34 6
TASK NUMBER  1  DEST:  7 24
11.128955105059246
TASK NUMBER  0  DEST:  39 26
TASK

TASK NUMBER  0  DEST:  23 3
TASK NUMBER  1  DEST:  44 40
11.037196566261525
TASK NUMBER  0  DEST:  23 6
TASK NUMBER  1  DEST:  24 34
9.536720890258248
TASK NUMBER  0  DEST:  43 2
TASK NUMBER  1  DEST:  28 48
10.431022333838925
TASK NUMBER  0  DEST:  34 4
TASK NUMBER  1  DEST:  7 46
10.983108921082927
TASK NUMBER  0  DEST:  8 15
TASK NUMBER  1  DEST:  29 9
11.562237009699412
TASK NUMBER  0  DEST:  27 16
TASK NUMBER  1  DEST:  32 4
12.971072121585165
TASK NUMBER  0  DEST:  29 17
TASK NUMBER  1  DEST:  7 37
29.33671005824356
TASK NUMBER  0  DEST:  13 43
TASK NUMBER  1  DEST:  6 44
11.101694796168225
TASK NUMBER  0  DEST:  1 33
TASK NUMBER  1  DEST:  16 29
9.007217454756622
-------
TRYOUT[255]: (hidden_dim=32, max_step=1000, gamma=0.99, n_episode=2000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  21 24
TASK NUMBER  1  DEST:  9 15
17.22260858097619
TASK NUMBER  0  DEST:  44 30
TASK NUMBER  1  DEST:  21 38
6.9514601775077

-------
TRYOUT[263]: (hidden_dim=128, max_step=10000, gamma=0.99, n_episode=2000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  15 39
TASK NUMBER  1  DEST:  49 27
13.134503210563999
TASK NUMBER  0  DEST:  38 18
TASK NUMBER  1  DEST:  40 6
6.564447198412913
TASK NUMBER  0  DEST:  36 5
TASK NUMBER  1  DEST:  28 14
8.709483900564436
TASK NUMBER  0  DEST:  14 24
TASK NUMBER  1  DEST:  13 20
5.938559271449124
TASK NUMBER  0  DEST:  0 21
TASK NUMBER  1  DEST:  7 2
4.509795784368694
TASK NUMBER  0  DEST:  42 2
TASK NUMBER  1  DEST:  17 42
6.872372089811401
TASK NUMBER  0  DEST:  0 11
TASK NUMBER  1  DEST:  47 34
18.083153715043967
TASK NUMBER  0  DEST:  39 40
TASK NUMBER  1  DEST:  34 30
7.7964867464468455
TASK NUMBER  0  DEST:  38 10
TASK NUMBER  1  DEST:  45 29
7.053773434304683
TASK NUMBER  0  DEST:  43 49
TASK NUMBER  1  DEST:  16 35
5.834621957223687
-------
TRYOUT[264]: (hidden_dim=32, max_step=500, gamma=0.999, n_epi

TASK NUMBER  0  DEST:  13 10
TASK NUMBER  1  DEST:  8 13
5.764316898200395
TASK NUMBER  0  DEST:  22 45
TASK NUMBER  1  DEST:  49 35
14.995307557802061
TASK NUMBER  0  DEST:  27 43
TASK NUMBER  1  DEST:  45 39
6.155598804642999
-------
TRYOUT[272]: (hidden_dim=128, max_step=5000, gamma=0.999, n_episode=2000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  31 14
TASK NUMBER  1  DEST:  15 19
14.72643047069225
TASK NUMBER  0  DEST:  37 40
TASK NUMBER  1  DEST:  5 0
11.14199054378505
TASK NUMBER  0  DEST:  31 47
TASK NUMBER  1  DEST:  14 16
12.046219175584415
TASK NUMBER  0  DEST:  31 49
TASK NUMBER  1  DEST:  1 29
11.308621249595953
TASK NUMBER  0  DEST:  23 23
TASK NUMBER  1  DEST:  36 10
7.455936808427629
TASK NUMBER  0  DEST:  18 16
TASK NUMBER  1  DEST:  45 22
10.00110206389452
TASK NUMBER  0  DEST:  35 33
TASK NUMBER  1  DEST:  40 49
9.217965429327363
TASK NUMBER  0  DEST:  4 12
TASK NUMBER  1  DEST:  24 22
9.1463735

TASK NUMBER  0  DEST:  34 4
TASK NUMBER  1  DEST:  14 42
10.541402219972271
TASK NUMBER  0  DEST:  49 16
TASK NUMBER  1  DEST:  28 8
6.559295769863136
TASK NUMBER  0  DEST:  46 6
TASK NUMBER  1  DEST:  8 44
23.610289362392802
TASK NUMBER  0  DEST:  29 40
TASK NUMBER  1  DEST:  22 4
17.674477579426732
TASK NUMBER  0  DEST:  2 22
TASK NUMBER  1  DEST:  1 41
28.324115963201997
-------
TRYOUT[281]: (hidden_dim=128, max_step=1000, gamma=0.9999, n_episode=2000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  22 28
TASK NUMBER  1  DEST:  42 44
19.04255484527183
TASK NUMBER  0  DEST:  14 4
TASK NUMBER  1  DEST:  42 1
5.357023298639383
TASK NUMBER  0  DEST:  15 41
TASK NUMBER  1  DEST:  1 13
16.021358737286487
TASK NUMBER  0  DEST:  22 33
TASK NUMBER  1  DEST:  11 32
Task  1  completed at step  13
25.145877425947333
TASK NUMBER  0  DEST:  29 43
TASK NUMBER  1  DEST:  24 40
6.134765772356323
TASK NUMBER  0  DEST:  14 25
TASK NUM

TASK NUMBER  0  DEST:  30 0
TASK NUMBER  1  DEST:  18 43
9.210750857666056
TASK NUMBER  0  DEST:  47 27
TASK NUMBER  1  DEST:  4 15
10.734521294181166
TASK NUMBER  0  DEST:  33 20
TASK NUMBER  1  DEST:  44 36
15.50656260598376
TASK NUMBER  0  DEST:  44 13
TASK NUMBER  1  DEST:  39 8
15.614231487030555
TASK NUMBER  0  DEST:  7 48
TASK NUMBER  1  DEST:  24 30
13.296293611927911
TASK NUMBER  0  DEST:  38 7
TASK NUMBER  1  DEST:  25 14
10.2143900343608
TASK NUMBER  0  DEST:  17 37
TASK NUMBER  1  DEST:  24 33
12.151177974899264
TASK NUMBER  0  DEST:  48 29
TASK NUMBER  1  DEST:  2 32
12.035793878551269
-------
TRYOUT[290]: (hidden_dim=128, max_step=500, gamma=0.99, n_episode=5000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  21 20
TASK NUMBER  1  DEST:  16 3
8.160493405200913
TASK NUMBER  0  DEST:  20 34
TASK NUMBER  1  DEST:  37 18
10.518358755223472
TASK NUMBER  0  DEST:  13 30
TASK NUMBER  1  DEST:  9 5
6.84310587895

TASK NUMBER  0  DEST:  4 5
TASK NUMBER  1  DEST:  2 2
60.72144554800064
TASK NUMBER  0  DEST:  12 0
TASK NUMBER  1  DEST:  38 33
20.005115048757666
TASK NUMBER  0  DEST:  37 7
TASK NUMBER  1  DEST:  22 47
20.428024026622307
TASK NUMBER  0  DEST:  30 24
TASK NUMBER  1  DEST:  40 10
10.43804366477927
TASK NUMBER  0  DEST:  31 25
TASK NUMBER  1  DEST:  40 20
19.666040963628177
TASK NUMBER  0  DEST:  20 14
TASK NUMBER  1  DEST:  27 13
25.534475590820197
TASK NUMBER  0  DEST:  25 44
TASK NUMBER  1  DEST:  3 17
7.568427043811406
TASK NUMBER  0  DEST:  34 7
TASK NUMBER  1  DEST:  27 2
6.080687711571985
TASK NUMBER  0  DEST:  8 41
TASK NUMBER  1  DEST:  16 44
5.245466057102794
-------
TRYOUT[299]: (hidden_dim=128, max_step=10000, gamma=0.99, n_episode=5000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  43 34
TASK NUMBER  1  DEST:  43 1
6.170252916306759
TASK NUMBER  0  DEST:  12 32
TASK NUMBER  1  DEST:  24 17
8.868801291010

-------
TRYOUT[307]: (hidden_dim=64, max_step=5000, gamma=0.999, n_episode=5000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  24 7
TASK NUMBER  1  DEST:  23 48
10.945337806295239
TASK NUMBER  0  DEST:  45 38
TASK NUMBER  1  DEST:  30 32
18.464884244838995
TASK NUMBER  0  DEST:  15 49
TASK NUMBER  1  DEST:  43 38
6.073819487774455
TASK NUMBER  0  DEST:  12 37
TASK NUMBER  1  DEST:  5 17
14.555258929279043
TASK NUMBER  0  DEST:  21 19
TASK NUMBER  1  DEST:  26 36
11.12238502382928
TASK NUMBER  0  DEST:  3 17
TASK NUMBER  1  DEST:  49 39
18.75915924876356
TASK NUMBER  0  DEST:  41 2
TASK NUMBER  1  DEST:  13 34
9.174998101512404
TASK NUMBER  0  DEST:  38 11
TASK NUMBER  1  DEST:  34 14
34.715953682081285
TASK NUMBER  0  DEST:  48 43
TASK NUMBER  1  DEST:  5 12
9.541516104586002
TASK NUMBER  0  DEST:  36 4
TASK NUMBER  1  DEST:  24 23
7.188873375721978
-------
TRYOUT[308]: (hidden_dim=128, max_step=5000, gamma=0.999, n_

TASK NUMBER  0  DEST:  4 26
TASK NUMBER  1  DEST:  42 39
22.11290250510204
TASK NUMBER  0  DEST:  9 2
TASK NUMBER  1  DEST:  35 19
17.255662599750057
TASK NUMBER  0  DEST:  24 26
TASK NUMBER  1  DEST:  38 5
8.43922015145697
TASK NUMBER  0  DEST:  37 42
TASK NUMBER  1  DEST:  36 46
13.604887371839604
TASK NUMBER  0  DEST:  11 17
TASK NUMBER  1  DEST:  23 9
8.561738911207925
-------
TRYOUT[316]: (hidden_dim=64, max_step=1000, gamma=0.9999, n_episode=5000, buffer_size=10000000, batch_size=32, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  15 29
TASK NUMBER  1  DEST:  7 47
9.252206851181004
TASK NUMBER  0  DEST:  30 0
TASK NUMBER  1  DEST:  8 5
5.428648679552684
TASK NUMBER  0  DEST:  43 20
TASK NUMBER  1  DEST:  7 14
6.859672699800531
TASK NUMBER  0  DEST:  44 39
TASK NUMBER  1  DEST:  45 24
9.73265216179457
TASK NUMBER  0  DEST:  42 24
TASK NUMBER  1  DEST:  47 0
22.16382316451842
TASK NUMBER  0  DEST:  41 41
TASK NUMBER  1  DEST:  13 4
6.415011894471541
T

TASK NUMBER  0  DEST:  0 43
TASK NUMBER  1  DEST:  15 22
9.599956916612204
TASK NUMBER  0  DEST:  48 13
TASK NUMBER  1  DEST:  16 8
5.535212928518281
TASK NUMBER  0  DEST:  46 44
TASK NUMBER  1  DEST:  28 42
21.615219933592694
TASK NUMBER  0  DEST:  46 49
TASK NUMBER  1  DEST:  34 23
9.465796884585606
TASK NUMBER  0  DEST:  16 41
TASK NUMBER  1  DEST:  12 18
Task  1  completed at step  37
23.841623496641315
TASK NUMBER  0  DEST:  11 34
TASK NUMBER  1  DEST:  12 48
36.34011494280606
TASK NUMBER  0  DEST:  35 16
TASK NUMBER  1  DEST:  1 22
11.893335181576608
TASK NUMBER  0  DEST:  20 6
TASK NUMBER  1  DEST:  41 15
8.857185451347275
TASK NUMBER  0  DEST:  25 3
TASK NUMBER  1  DEST:  34 5
13.330662538260684
-------
TRYOUT[325]: (hidden_dim=64, max_step=500, gamma=0.99, n_episode=1000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  27 36
TASK NUMBER  1  DEST:  11 22
17.623509049782793
TASK NUMBER  0  DEST:  0 22
TASK NUMBER  

-------
TRYOUT[333]: (hidden_dim=32, max_step=10000, gamma=0.99, n_episode=1000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  35 12
TASK NUMBER  1  DEST:  42 49
6.197945751502849
TASK NUMBER  0  DEST:  7 16
TASK NUMBER  1  DEST:  10 6
6.182426409757132
TASK NUMBER  0  DEST:  22 18
TASK NUMBER  1  DEST:  45 44
8.845860004911632
TASK NUMBER  0  DEST:  42 8
TASK NUMBER  1  DEST:  1 16
8.9739584914877
TASK NUMBER  0  DEST:  16 15
TASK NUMBER  1  DEST:  43 29
9.272069808196315
TASK NUMBER  0  DEST:  47 44
TASK NUMBER  1  DEST:  33 38
9.065843909744993
TASK NUMBER  0  DEST:  10 38
TASK NUMBER  1  DEST:  33 3
11.088781713908082
TASK NUMBER  0  DEST:  40 41
TASK NUMBER  1  DEST:  46 47
8.428610460950066
TASK NUMBER  0  DEST:  31 12
TASK NUMBER  1  DEST:  12 25
18.34115909129276
TASK NUMBER  0  DEST:  37 9
TASK NUMBER  1  DEST:  5 23
14.33252649794037
-------
TRYOUT[334]: (hidden_dim=64, max_step=10000, gamma=0.99, n_episode=10

TASK NUMBER  0  DEST:  4 30
TASK NUMBER  1  DEST:  47 18
11.593840493857742
TASK NUMBER  0  DEST:  32 9
TASK NUMBER  1  DEST:  13 48
13.10795001334328
TASK NUMBER  0  DEST:  18 0
TASK NUMBER  1  DEST:  46 38
7.129429788605362
TASK NUMBER  0  DEST:  20 14
TASK NUMBER  1  DEST:  13 24
34.715915745843844
-------
TRYOUT[342]: (hidden_dim=32, max_step=5000, gamma=0.999, n_episode=1000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  9 25
TASK NUMBER  1  DEST:  0 7
6.363401872518878
TASK NUMBER  0  DEST:  4 13
TASK NUMBER  1  DEST:  48 37
8.033237161114828
TASK NUMBER  0  DEST:  24 40
TASK NUMBER  1  DEST:  13 1
13.213476557082911
TASK NUMBER  0  DEST:  6 0
TASK NUMBER  1  DEST:  33 25
6.7165231727787695
TASK NUMBER  0  DEST:  43 22
TASK NUMBER  1  DEST:  8 4
6.464376544357119
TASK NUMBER  0  DEST:  34 29
TASK NUMBER  1  DEST:  22 31
9.63202017672553
TASK NUMBER  0  DEST:  40 41
TASK NUMBER  1  DEST:  22 29
16.78660207253865
TA

TASK NUMBER  0  DEST:  9 31
TASK NUMBER  1  DEST:  23 42
5.526526862915029
TASK NUMBER  0  DEST:  17 28
TASK NUMBER  1  DEST:  2 35
8.251456986052212
TASK NUMBER  0  DEST:  11 12
TASK NUMBER  1  DEST:  4 15
6.887881912328899
TASK NUMBER  0  DEST:  0 12
TASK NUMBER  1  DEST:  38 46
6.538747743113347
TASK NUMBER  0  DEST:  24 13
TASK NUMBER  1  DEST:  48 30
12.57330210562816
TASK NUMBER  0  DEST:  8 1
TASK NUMBER  1  DEST:  30 14
30.477494444844137
TASK NUMBER  0  DEST:  43 28
TASK NUMBER  1  DEST:  26 47
22.753008788199534
-------
TRYOUT[351]: (hidden_dim=32, max_step=1000, gamma=0.9999, n_episode=1000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  28 29
TASK NUMBER  1  DEST:  47 47
5.653789028840541
TASK NUMBER  0  DEST:  31 9
TASK NUMBER  1  DEST:  5 47
9.429491472127486
TASK NUMBER  0  DEST:  6 40
TASK NUMBER  1  DEST:  42 24
12.745072266260173
TASK NUMBER  0  DEST:  20 44
TASK NUMBER  1  DEST:  18 29
7.93594859462461

TASK NUMBER  0  DEST:  22 46
TASK NUMBER  1  DEST:  29 10
8.89877745675424
TASK NUMBER  0  DEST:  42 40
TASK NUMBER  1  DEST:  6 46
6.906617915385977
TASK NUMBER  0  DEST:  45 40
TASK NUMBER  1  DEST:  6 41
5.265391842148964
TASK NUMBER  0  DEST:  9 12
TASK NUMBER  1  DEST:  3 44
8.59764527821648
TASK NUMBER  0  DEST:  39 39
TASK NUMBER  1  DEST:  17 19
10.389970356098884
TASK NUMBER  0  DEST:  16 16
TASK NUMBER  1  DEST:  41 20
13.18795822271619
TASK NUMBER  0  DEST:  16 8
TASK NUMBER  1  DEST:  20 14
12.527391818248635
TASK NUMBER  0  DEST:  47 15
TASK NUMBER  1  DEST:  10 40
17.781321416541665
TASK NUMBER  0  DEST:  35 23
TASK NUMBER  1  DEST:  17 4
9.700701728623072
-------
TRYOUT[360]: (hidden_dim=32, max_step=500, gamma=0.99, n_episode=2000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  0 35
TASK NUMBER  1  DEST:  28 1
14.056480472804461
TASK NUMBER  0  DEST:  21 41
TASK NUMBER  1  DEST:  34 11
8.689810106387531
T

TASK NUMBER  0  DEST:  3 30
TASK NUMBER  1  DEST:  43 26
14.102717444508528
TASK NUMBER  0  DEST:  12 31
TASK NUMBER  1  DEST:  39 32
11.205326023229139
TASK NUMBER  0  DEST:  39 49
TASK NUMBER  1  DEST:  25 15
13.23264391409986
TASK NUMBER  0  DEST:  36 22
TASK NUMBER  1  DEST:  30 15
13.627500498306532
TASK NUMBER  0  DEST:  4 22
TASK NUMBER  1  DEST:  17 9
5.116558165958368
TASK NUMBER  0  DEST:  49 18
TASK NUMBER  1  DEST:  7 20
12.68723021574542
TASK NUMBER  0  DEST:  19 6
TASK NUMBER  1  DEST:  21 22
8.537702538085211
TASK NUMBER  0  DEST:  38 12
TASK NUMBER  1  DEST:  2 12
8.575462303770891
TASK NUMBER  0  DEST:  35 6
TASK NUMBER  1  DEST:  23 46
13.182838657213868
-------
TRYOUT[369]: (hidden_dim=32, max_step=10000, gamma=0.99, n_episode=2000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  9 4
TASK NUMBER  1  DEST:  12 12
6.114214060071878
TASK NUMBER  0  DEST:  5 20
TASK NUMBER  1  DEST:  2 19
7.18385078415026
T

TASK NUMBER  0  DEST:  15 40
TASK NUMBER  1  DEST:  23 32
6.565894257870812
TASK NUMBER  0  DEST:  4 40
TASK NUMBER  1  DEST:  31 4
9.155062687671563
TASK NUMBER  0  DEST:  41 18
TASK NUMBER  1  DEST:  8 29
15.67121651853717
TASK NUMBER  0  DEST:  39 4
TASK NUMBER  1  DEST:  30 6
37.508841570406815
TASK NUMBER  0  DEST:  41 34
TASK NUMBER  1  DEST:  22 45
11.407801434302229
TASK NUMBER  0  DEST:  14 34
TASK NUMBER  1  DEST:  41 31
13.425839870069806
TASK NUMBER  0  DEST:  37 33
TASK NUMBER  1  DEST:  18 4
16.05190341318071
TASK NUMBER  0  DEST:  23 47
TASK NUMBER  1  DEST:  39 38
10.05673091206924
TASK NUMBER  0  DEST:  25 37
TASK NUMBER  1  DEST:  15 37
26.896086093864604
-------
TRYOUT[378]: (hidden_dim=32, max_step=5000, gamma=0.999, n_episode=2000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  49 13
TASK NUMBER  1  DEST:  20 17
8.995466477123518
TASK NUMBER  0  DEST:  8 2
TASK NUMBER  1  DEST:  16 37
16.178162318066

TASK NUMBER  0  DEST:  38 13
TASK NUMBER  1  DEST:  27 46
11.105414229525826
TASK NUMBER  0  DEST:  8 22
TASK NUMBER  1  DEST:  41 35
9.067685283781442
TASK NUMBER  0  DEST:  29 29
TASK NUMBER  1  DEST:  9 45
13.58315346285267
TASK NUMBER  0  DEST:  6 46
TASK NUMBER  1  DEST:  9 28
11.408072293279712
TASK NUMBER  0  DEST:  6 6
TASK NUMBER  1  DEST:  30 27
13.999398305961043
TASK NUMBER  0  DEST:  18 32
TASK NUMBER  1  DEST:  14 34
15.359243124146051
TASK NUMBER  0  DEST:  9 19
TASK NUMBER  1  DEST:  20 7
24.502646999463305
TASK NUMBER  0  DEST:  48 39
TASK NUMBER  1  DEST:  13 43
12.838373295438386
TASK NUMBER  0  DEST:  37 26
TASK NUMBER  1  DEST:  8 4
13.783421921930255
-------
TRYOUT[387]: (hidden_dim=32, max_step=1000, gamma=0.9999, n_episode=2000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  43 45
TASK NUMBER  1  DEST:  40 0
6.88708087755085
TASK NUMBER  0  DEST:  24 15
TASK NUMBER  1  DEST:  5 45
12.6920662544269

-------
TRYOUT[395]: (hidden_dim=128, max_step=10000, gamma=0.9999, n_episode=2000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  12 13
TASK NUMBER  1  DEST:  19 24
6.7959490144344885
TASK NUMBER  0  DEST:  33 16
TASK NUMBER  1  DEST:  13 9
18.540176837098446
TASK NUMBER  0  DEST:  45 14
TASK NUMBER  1  DEST:  22 20
12.505955827170101
TASK NUMBER  0  DEST:  7 46
TASK NUMBER  1  DEST:  26 10
17.0577810198154
TASK NUMBER  0  DEST:  37 19
TASK NUMBER  1  DEST:  17 24
17.786126045642472
TASK NUMBER  0  DEST:  28 4
TASK NUMBER  1  DEST:  33 48
10.925033969546146
TASK NUMBER  0  DEST:  21 47
TASK NUMBER  1  DEST:  35 8
40.54980654264463
TASK NUMBER  0  DEST:  9 32
TASK NUMBER  1  DEST:  6 46
5.201919301848264
TASK NUMBER  0  DEST:  38 41
TASK NUMBER  1  DEST:  45 26
18.318644785996213
TASK NUMBER  0  DEST:  20 32
TASK NUMBER  1  DEST:  41 28
7.983090398975612
-------
TRYOUT[396]: (hidden_dim=32, max_step=500, gamma=0.99, n_ep

TASK NUMBER  0  DEST:  45 30
TASK NUMBER  1  DEST:  31 0
Task  1  completed at step  34
66.11933875966317
TASK NUMBER  0  DEST:  7 37
TASK NUMBER  1  DEST:  8 27
10.0709125978085
TASK NUMBER  0  DEST:  30 5
TASK NUMBER  1  DEST:  29 17
6.399745626767811
-------
TRYOUT[404]: (hidden_dim=128, max_step=5000, gamma=0.99, n_episode=5000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  44 28
TASK NUMBER  1  DEST:  35 18
8.983915771700985
TASK NUMBER  0  DEST:  22 37
TASK NUMBER  1  DEST:  6 15
19.46527567345572
TASK NUMBER  0  DEST:  15 34
TASK NUMBER  1  DEST:  49 9
9.645742398519003
TASK NUMBER  0  DEST:  40 7
TASK NUMBER  1  DEST:  41 44
8.637226507411482
TASK NUMBER  0  DEST:  12 34
TASK NUMBER  1  DEST:  18 45
40.669608318617605
TASK NUMBER  0  DEST:  1 10
TASK NUMBER  1  DEST:  29 43
19.6831578528173
TASK NUMBER  0  DEST:  17 24
TASK NUMBER  1  DEST:  48 33
11.147619248919868
TASK NUMBER  0  DEST:  32 12
TASK NUMBER  1  D

TASK NUMBER  0  DEST:  21 40
TASK NUMBER  1  DEST:  10 33
8.845046586271
TASK NUMBER  0  DEST:  0 25
TASK NUMBER  1  DEST:  14 21
7.2368330137145795
TASK NUMBER  0  DEST:  11 10
TASK NUMBER  1  DEST:  29 18
20.458649453777447
TASK NUMBER  0  DEST:  10 31
TASK NUMBER  1  DEST:  9 20
38.109122431882696
TASK NUMBER  0  DEST:  30 39
TASK NUMBER  1  DEST:  29 35
12.98703606235852
TASK NUMBER  0  DEST:  7 39
TASK NUMBER  1  DEST:  37 10
7.681466345547992
TASK NUMBER  0  DEST:  40 40
TASK NUMBER  1  DEST:  8 13
6.3526902712921745
-------
TRYOUT[413]: (hidden_dim=128, max_step=1000, gamma=0.999, n_episode=5000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  40 39
TASK NUMBER  1  DEST:  17 41
6.0931971262317015
TASK NUMBER  0  DEST:  37 35
TASK NUMBER  1  DEST:  27 43
8.065826988755896
TASK NUMBER  0  DEST:  33 15
TASK NUMBER  1  DEST:  32 20
23.07888639953441
TASK NUMBER  0  DEST:  34 2
TASK NUMBER  1  DEST:  17 41
Task  0  comp

TASK NUMBER  0  DEST:  1 31
TASK NUMBER  1  DEST:  8 6
5.514312420905139
TASK NUMBER  0  DEST:  2 37
TASK NUMBER  1  DEST:  48 38
10.791739431345666
TASK NUMBER  0  DEST:  21 24
TASK NUMBER  1  DEST:  32 18
12.40097331701426
TASK NUMBER  0  DEST:  5 3
TASK NUMBER  1  DEST:  16 34
6.155220033554268
TASK NUMBER  0  DEST:  45 3
TASK NUMBER  1  DEST:  41 36
29.26000114615592
TASK NUMBER  0  DEST:  39 23
TASK NUMBER  1  DEST:  24 19
18.077990847793295
TASK NUMBER  0  DEST:  28 0
TASK NUMBER  1  DEST:  24 48
8.55915226804483
TASK NUMBER  0  DEST:  22 35
TASK NUMBER  1  DEST:  2 26
7.666074368079581
TASK NUMBER  0  DEST:  17 38
TASK NUMBER  1  DEST:  38 0
7.36249345001634
-------
TRYOUT[422]: (hidden_dim=128, max_step=500, gamma=0.9999, n_episode=5000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  42 36
TASK NUMBER  1  DEST:  48 30
5.749295060665031
TASK NUMBER  0  DEST:  29 42
TASK NUMBER  1  DEST:  43 7
6.788465231688471
TAS

-------
TRYOUT[430]: (hidden_dim=64, max_step=10000, gamma=0.9999, n_episode=5000, buffer_size=65000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  36 20
TASK NUMBER  1  DEST:  13 23
18.490758516883236
TASK NUMBER  0  DEST:  23 9
TASK NUMBER  1  DEST:  47 42
12.588751288577962
TASK NUMBER  0  DEST:  49 34
TASK NUMBER  1  DEST:  0 32
14.836616607600657
TASK NUMBER  0  DEST:  49 44
TASK NUMBER  1  DEST:  48 25
8.688931866161312
TASK NUMBER  0  DEST:  9 25
TASK NUMBER  1  DEST:  2 45
24.865632997190026
TASK NUMBER  0  DEST:  39 46
TASK NUMBER  1  DEST:  7 40
10.926799744094177
TASK NUMBER  0  DEST:  0 23
TASK NUMBER  1  DEST:  9 7
23.910185144996706
TASK NUMBER  0  DEST:  47 31
TASK NUMBER  1  DEST:  14 18
10.828877299904605
TASK NUMBER  0  DEST:  17 24
TASK NUMBER  1  DEST:  5 0
9.96631200246442
TASK NUMBER  0  DEST:  9 27
TASK NUMBER  1  DEST:  35 46
Task  0  completed at step  28
28.64316884617327
-------
TRYOUT[431]: (hidden_dim=128, max

TASK NUMBER  0  DEST:  46 27
TASK NUMBER  1  DEST:  18 25
14.604413544644537
TASK NUMBER  0  DEST:  13 21
TASK NUMBER  1  DEST:  39 21
11.192903212704984
TASK NUMBER  0  DEST:  20 5
TASK NUMBER  1  DEST:  0 11
17.075945170653927
-------
TRYOUT[439]: (hidden_dim=64, max_step=5000, gamma=0.99, n_episode=1000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  37 49
TASK NUMBER  1  DEST:  1 35
15.362251063705141
TASK NUMBER  0  DEST:  0 14
TASK NUMBER  1  DEST:  32 49
11.396270473651395
TASK NUMBER  0  DEST:  10 24
TASK NUMBER  1  DEST:  8 46
5.582515830586701
TASK NUMBER  0  DEST:  4 26
TASK NUMBER  1  DEST:  8 14
5.2078399223592315
TASK NUMBER  0  DEST:  11 10
TASK NUMBER  1  DEST:  0 16
6.94382446878717
TASK NUMBER  0  DEST:  47 46
TASK NUMBER  1  DEST:  33 26
5.902696076950162
TASK NUMBER  0  DEST:  22 6
TASK NUMBER  1  DEST:  28 35
16.340954305867772
TASK NUMBER  0  DEST:  3 0
TASK NUMBER  1  DEST:  20 4
19.03319134371231

TASK NUMBER  0  DEST:  10 29
TASK NUMBER  1  DEST:  39 4
21.96772980894376
TASK NUMBER  0  DEST:  38 5
TASK NUMBER  1  DEST:  48 18
38.69942489063619
TASK NUMBER  0  DEST:  21 13
TASK NUMBER  1  DEST:  16 8
14.172426728112967
TASK NUMBER  0  DEST:  46 29
TASK NUMBER  1  DEST:  32 5
12.820869803679333
TASK NUMBER  0  DEST:  1 8
TASK NUMBER  1  DEST:  4 3
5.681140036659035
TASK NUMBER  0  DEST:  24 40
TASK NUMBER  1  DEST:  0 8
13.125972159810688
-------
TRYOUT[448]: (hidden_dim=64, max_step=1000, gamma=0.999, n_episode=1000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  25 18
TASK NUMBER  1  DEST:  10 25
22.994217279504458
TASK NUMBER  0  DEST:  19 19
TASK NUMBER  1  DEST:  42 39
17.683676155347033
TASK NUMBER  0  DEST:  20 36
TASK NUMBER  1  DEST:  15 5
15.577075408181797
TASK NUMBER  0  DEST:  44 8
TASK NUMBER  1  DEST:  26 44
24.399288586221978
TASK NUMBER  0  DEST:  45 45
TASK NUMBER  1  DEST:  8 44
5.40827309395891

TASK NUMBER  0  DEST:  7 49
TASK NUMBER  1  DEST:  39 16
19.05219538060244
TASK NUMBER  0  DEST:  47 24
TASK NUMBER  1  DEST:  4 46
5.551035755727561
TASK NUMBER  0  DEST:  17 21
TASK NUMBER  1  DEST:  13 29
7.779138870810186
TASK NUMBER  0  DEST:  18 39
TASK NUMBER  1  DEST:  11 2
12.396017391341015
TASK NUMBER  0  DEST:  36 7
TASK NUMBER  1  DEST:  43 7
8.523826444984609
TASK NUMBER  0  DEST:  25 39
TASK NUMBER  1  DEST:  2 12
8.182769399668397
TASK NUMBER  0  DEST:  43 24
TASK NUMBER  1  DEST:  1 10
9.035978524558315
TASK NUMBER  0  DEST:  49 49
TASK NUMBER  1  DEST:  20 18
11.776546641033221
TASK NUMBER  0  DEST:  11 34
TASK NUMBER  1  DEST:  2 44
21.933309793216395
-------
TRYOUT[457]: (hidden_dim=64, max_step=500, gamma=0.9999, n_episode=1000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  2 9
TASK NUMBER  1  DEST:  27 9
9.337567930003539
TASK NUMBER  0  DEST:  11 11
TASK NUMBER  1  DEST:  49 28
8.35437601713832
T

TASK NUMBER  0  DEST:  36 26
TASK NUMBER  1  DEST:  17 44
15.07563458510094
-------
TRYOUT[465]: (hidden_dim=32, max_step=10000, gamma=0.9999, n_episode=1000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  1 31
TASK NUMBER  1  DEST:  33 5
8.62247844361777
TASK NUMBER  0  DEST:  32 39
TASK NUMBER  1  DEST:  42 18
5.420698923659137
TASK NUMBER  0  DEST:  4 19
TASK NUMBER  1  DEST:  13 46
15.511182683378674
TASK NUMBER  0  DEST:  35 4
TASK NUMBER  1  DEST:  36 0
9.8494365621001
TASK NUMBER  0  DEST:  8 22
TASK NUMBER  1  DEST:  5 34
13.12274300151814
TASK NUMBER  0  DEST:  47 23
TASK NUMBER  1  DEST:  31 6
7.699610337376272
TASK NUMBER  0  DEST:  36 4
TASK NUMBER  1  DEST:  39 4
12.066764329320327
TASK NUMBER  0  DEST:  35 13
TASK NUMBER  1  DEST:  16 2
6.937997163677848
TASK NUMBER  0  DEST:  39 12
TASK NUMBER  1  DEST:  49 24
6.270436788703379
TASK NUMBER  0  DEST:  33 41
TASK NUMBER  1  DEST:  12 25
11.114779578826427
-

TASK NUMBER  0  DEST:  47 37
TASK NUMBER  1  DEST:  27 2
9.587878502317134
TASK NUMBER  0  DEST:  13 31
TASK NUMBER  1  DEST:  36 37
13.434562389563228
TASK NUMBER  0  DEST:  34 7
TASK NUMBER  1  DEST:  35 0
9.841458887855506
TASK NUMBER  0  DEST:  12 40
TASK NUMBER  1  DEST:  40 49
8.478001198655729
-------
TRYOUT[474]: (hidden_dim=32, max_step=5000, gamma=0.99, n_episode=2000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  22 14
TASK NUMBER  1  DEST:  36 28
9.523743323648205
TASK NUMBER  0  DEST:  28 22
TASK NUMBER  1  DEST:  36 39
18.088725263332595
TASK NUMBER  0  DEST:  6 22
TASK NUMBER  1  DEST:  36 33
13.227644032806934
TASK NUMBER  0  DEST:  35 42
TASK NUMBER  1  DEST:  43 4
14.337788639965433
TASK NUMBER  0  DEST:  1 1
TASK NUMBER  1  DEST:  10 4
5.165419053963379
TASK NUMBER  0  DEST:  47 35
TASK NUMBER  1  DEST:  28 10
11.377922306810278
TASK NUMBER  0  DEST:  41 26
TASK NUMBER  1  DEST:  38 30
13.22905618460

TASK NUMBER  0  DEST:  4 9
TASK NUMBER  1  DEST:  1 7
20.328305062873568
TASK NUMBER  0  DEST:  6 43
TASK NUMBER  1  DEST:  36 1
19.21617256744966
TASK NUMBER  0  DEST:  24 23
TASK NUMBER  1  DEST:  31 27
9.824564468004974
TASK NUMBER  0  DEST:  30 6
TASK NUMBER  1  DEST:  47 40
12.625121543416268
TASK NUMBER  0  DEST:  27 7
TASK NUMBER  1  DEST:  12 41
15.112947465785945
TASK NUMBER  0  DEST:  12 15
TASK NUMBER  1  DEST:  36 44
12.630006005323043
-------
TRYOUT[483]: (hidden_dim=32, max_step=1000, gamma=0.999, n_episode=2000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  41 16
TASK NUMBER  1  DEST:  11 32
11.977180051688359
TASK NUMBER  0  DEST:  30 18
TASK NUMBER  1  DEST:  35 6
20.97435419568518
TASK NUMBER  0  DEST:  21 18
TASK NUMBER  1  DEST:  46 43
Task  1  completed at step  100
46.8721402721906
TASK NUMBER  0  DEST:  39 15
TASK NUMBER  1  DEST:  32 10
11.653438050846608
TASK NUMBER  0  DEST:  30 29
TASK NUMBER

TASK NUMBER  0  DEST:  18 41
TASK NUMBER  1  DEST:  18 0
18.398982841950893
TASK NUMBER  0  DEST:  12 0
TASK NUMBER  1  DEST:  45 14
5.942101668778245
TASK NUMBER  0  DEST:  27 21
TASK NUMBER  1  DEST:  31 15
14.976202903374148
TASK NUMBER  0  DEST:  37 7
TASK NUMBER  1  DEST:  39 28
6.471244222689961
TASK NUMBER  0  DEST:  45 9
TASK NUMBER  1  DEST:  21 16
11.752283791102885
TASK NUMBER  0  DEST:  39 17
TASK NUMBER  1  DEST:  6 44
8.779253633063519
TASK NUMBER  0  DEST:  49 47
TASK NUMBER  1  DEST:  42 45
Task  0  completed at step  82
51.283636341327174
TASK NUMBER  0  DEST:  21 43
TASK NUMBER  1  DEST:  6 23
9.907388358492403
TASK NUMBER  0  DEST:  43 46
TASK NUMBER  1  DEST:  8 33
5.5241826789509325
-------
TRYOUT[492]: (hidden_dim=32, max_step=500, gamma=0.9999, n_episode=2000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  29 33
TASK NUMBER  1  DEST:  49 45
6.965904222132513
TASK NUMBER  0  DEST:  21 32
TASK NUMBE

-------
TRYOUT[500]: (hidden_dim=128, max_step=5000, gamma=0.9999, n_episode=2000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  4 8
TASK NUMBER  1  DEST:  17 40
8.109516886604414
TASK NUMBER  0  DEST:  24 17
TASK NUMBER  1  DEST:  15 34
15.70131781587671
TASK NUMBER  0  DEST:  39 10
TASK NUMBER  1  DEST:  7 24
12.531109286719579
TASK NUMBER  0  DEST:  7 17
TASK NUMBER  1  DEST:  9 21
6.884363575037716
TASK NUMBER  0  DEST:  4 18
TASK NUMBER  1  DEST:  5 15
6.74963878387253
TASK NUMBER  0  DEST:  47 29
TASK NUMBER  1  DEST:  49 31
7.357642705850002
TASK NUMBER  0  DEST:  27 5
TASK NUMBER  1  DEST:  9 22
Task  1  completed at step  8
25.227061508885587
TASK NUMBER  0  DEST:  25 21
TASK NUMBER  1  DEST:  41 43
12.21914051127992
TASK NUMBER  0  DEST:  27 1
TASK NUMBER  1  DEST:  1 15
12.894636406681501
TASK NUMBER  0  DEST:  40 38
TASK NUMBER  1  DEST:  20 10
13.047475531898307
-------
TRYOUT[501]: (hidden_dim=32, max_ste

15.511511529462911
TASK NUMBER  0  DEST:  33 33
TASK NUMBER  1  DEST:  47 3
5.670579168651056
TASK NUMBER  0  DEST:  26 4
TASK NUMBER  1  DEST:  37 4
16.45653785516461
TASK NUMBER  0  DEST:  29 26
TASK NUMBER  1  DEST:  10 38
8.15801211499031
TASK NUMBER  0  DEST:  25 11
TASK NUMBER  1  DEST:  25 12
10.146941036608403
-------
TRYOUT[509]: (hidden_dim=128, max_step=1000, gamma=0.99, n_episode=5000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  25 20
TASK NUMBER  1  DEST:  9 26
14.318121352394854
TASK NUMBER  0  DEST:  42 37
TASK NUMBER  1  DEST:  17 6
11.065027468164347
TASK NUMBER  0  DEST:  19 16
TASK NUMBER  1  DEST:  49 46
29.544553617055637
TASK NUMBER  0  DEST:  18 43
TASK NUMBER  1  DEST:  47 30
13.273630102778199
TASK NUMBER  0  DEST:  10 23
TASK NUMBER  1  DEST:  18 15
7.306139970424259
TASK NUMBER  0  DEST:  4 2
TASK NUMBER  1  DEST:  49 21
9.651995441660686
TASK NUMBER  0  DEST:  5 40
TASK NUMBER  1  DEST:  3

11.664254494754962
TASK NUMBER  0  DEST:  11 48
TASK NUMBER  1  DEST:  16 4
13.013519557171472
TASK NUMBER  0  DEST:  24 16
TASK NUMBER  1  DEST:  33 11
13.331574408953227
TASK NUMBER  0  DEST:  47 21
TASK NUMBER  1  DEST:  43 44
11.280594426914526
TASK NUMBER  0  DEST:  3 18
TASK NUMBER  1  DEST:  33 32
17.644358556487113
TASK NUMBER  0  DEST:  10 2
TASK NUMBER  1  DEST:  28 35
21.07544244783321
TASK NUMBER  0  DEST:  21 48
TASK NUMBER  1  DEST:  9 43
8.77886474357813
TASK NUMBER  0  DEST:  21 45
TASK NUMBER  1  DEST:  14 4
11.389586559252667
-------
TRYOUT[518]: (hidden_dim=128, max_step=500, gamma=0.999, n_episode=5000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  10 40
TASK NUMBER  1  DEST:  45 46
6.376985964449476
TASK NUMBER  0  DEST:  36 2
TASK NUMBER  1  DEST:  4 30
7.522299886583035
TASK NUMBER  0  DEST:  33 8
TASK NUMBER  1  DEST:  3 37
22.50737815418517
TASK NUMBER  0  DEST:  6 9
TASK NUMBER  1  DEST:  34 0

TASK NUMBER  0  DEST:  30 35
TASK NUMBER  1  DEST:  1 16
Task  1  completed at step  49
20.552163604480143
TASK NUMBER  0  DEST:  22 44
TASK NUMBER  1  DEST:  49 4
8.178816068114513
TASK NUMBER  0  DEST:  24 5
TASK NUMBER  1  DEST:  6 3
21.428119583297637
TASK NUMBER  0  DEST:  31 3
TASK NUMBER  1  DEST:  49 10
6.711967613314381
TASK NUMBER  0  DEST:  25 20
TASK NUMBER  1  DEST:  32 3
8.801888623505937
TASK NUMBER  0  DEST:  47 3
TASK NUMBER  1  DEST:  38 22
35.95639315390381
TASK NUMBER  0  DEST:  13 27
TASK NUMBER  1  DEST:  33 19
Task  1  completed at step  75
31.354328804904572
TASK NUMBER  0  DEST:  48 13
TASK NUMBER  1  DEST:  2 8
15.702461073179299
TASK NUMBER  0  DEST:  28 44
TASK NUMBER  1  DEST:  10 14
Task  1  completed at step  18
25.240765450380636
-------
TRYOUT[527]: (hidden_dim=128, max_step=10000, gamma=0.999, n_episode=5000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  45 28
TASK NUMBER  1  DEST:  7 

TASK NUMBER  0  DEST:  15 7
TASK NUMBER  1  DEST:  12 8
19.517707426379545
-------
TRYOUT[535]: (hidden_dim=64, max_step=5000, gamma=0.9999, n_episode=5000, buffer_size=100000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  27 14
TASK NUMBER  1  DEST:  32 18
7.431825380133313
TASK NUMBER  0  DEST:  1 7
TASK NUMBER  1  DEST:  16 14
9.286347651228226
TASK NUMBER  0  DEST:  45 28
TASK NUMBER  1  DEST:  10 35
11.87677738648868
TASK NUMBER  0  DEST:  44 6
TASK NUMBER  1  DEST:  19 26
15.164368109839158
TASK NUMBER  0  DEST:  4 40
TASK NUMBER  1  DEST:  6 39
9.819987159649779
TASK NUMBER  0  DEST:  26 39
TASK NUMBER  1  DEST:  13 16
16.6506487527527
TASK NUMBER  0  DEST:  19 28
TASK NUMBER  1  DEST:  17 5
13.395093050810264
TASK NUMBER  0  DEST:  24 9
TASK NUMBER  1  DEST:  22 18
13.354440578190406
TASK NUMBER  0  DEST:  19 8
TASK NUMBER  1  DEST:  5 21
21.072046060366528
TASK NUMBER  0  DEST:  41 22
TASK NUMBER  1  DEST:  28 23
16.0612697550209

TASK NUMBER  0  DEST:  4 36
TASK NUMBER  1  DEST:  23 18
11.837549453205098
TASK NUMBER  0  DEST:  12 18
TASK NUMBER  1  DEST:  32 7
11.43814385400601
TASK NUMBER  0  DEST:  15 18
TASK NUMBER  1  DEST:  15 38
25.822514902580895
TASK NUMBER  0  DEST:  42 40
TASK NUMBER  1  DEST:  31 41
16.766327721705668
-------
TRYOUT[544]: (hidden_dim=64, max_step=1000, gamma=0.99, n_episode=1000, buffer_size=10000000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  16 17
TASK NUMBER  1  DEST:  40 6
10.135490336498052
TASK NUMBER  0  DEST:  10 17
TASK NUMBER  1  DEST:  6 34
6.100727761958986
TASK NUMBER  0  DEST:  19 44
TASK NUMBER  1  DEST:  19 49
7.761194321011543
TASK NUMBER  0  DEST:  27 27
TASK NUMBER  1  DEST:  37 25
16.041382182345696
TASK NUMBER  0  DEST:  43 15
TASK NUMBER  1  DEST:  46 29
17.969706288066625
TASK NUMBER  0  DEST:  11 32
TASK NUMBER  1  DEST:  38 15
10.09416123975634
TASK NUMBER  0  DEST:  1 7
TASK NUMBER  1  DEST:  21 40
8.5347041

TASK NUMBER  0  DEST:  45 11
TASK NUMBER  1  DEST:  46 32
11.476122564965413
TASK NUMBER  0  DEST:  2 15
TASK NUMBER  1  DEST:  4 6
13.517836466032065
TASK NUMBER  0  DEST:  2 30
TASK NUMBER  1  DEST:  15 39
6.009490867142882
TASK NUMBER  0  DEST:  19 29
TASK NUMBER  1  DEST:  39 12
18.70368581127187
TASK NUMBER  0  DEST:  42 7
TASK NUMBER  1  DEST:  19 6
28.336107503907492
TASK NUMBER  0  DEST:  6 42
TASK NUMBER  1  DEST:  8 1
5.447264754162835
-------
TRYOUT[553]: (hidden_dim=64, max_step=500, gamma=0.999, n_episode=1000, buffer_size=10000000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  40 14
TASK NUMBER  1  DEST:  10 20
13.819888876114446
TASK NUMBER  0  DEST:  19 30
TASK NUMBER  1  DEST:  39 17
9.293840004815756
TASK NUMBER  0  DEST:  36 42
TASK NUMBER  1  DEST:  20 36
23.488919332965153
TASK NUMBER  0  DEST:  19 41
TASK NUMBER  1  DEST:  13 21
14.904834094179476
TASK NUMBER  0  DEST:  5 21
TASK NUMBER  1  DEST:  31 46
24.7361393077

TASK NUMBER  0  DEST:  7 31
TASK NUMBER  1  DEST:  30 29
25.4221127504646
TASK NUMBER  0  DEST:  41 0
TASK NUMBER  1  DEST:  29 17
15.72713649300129
TASK NUMBER  0  DEST:  1 33
TASK NUMBER  1  DEST:  36 47
5.80999272157272
TASK NUMBER  0  DEST:  19 26
TASK NUMBER  1  DEST:  12 27
7.7165444949804325
TASK NUMBER  0  DEST:  34 46
TASK NUMBER  1  DEST:  49 16
13.307337245038303
TASK NUMBER  0  DEST:  4 14
TASK NUMBER  1  DEST:  25 37
14.346950792957278
TASK NUMBER  0  DEST:  3 19
TASK NUMBER  1  DEST:  25 46
9.808733910936986
TASK NUMBER  0  DEST:  41 3
TASK NUMBER  1  DEST:  27 22
13.495322735453618
-------
TRYOUT[562]: (hidden_dim=64, max_step=10000, gamma=0.999, n_episode=1000, buffer_size=10000000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  6 28
TASK NUMBER  1  DEST:  11 32
8.11505488990855
TASK NUMBER  0  DEST:  23 19
TASK NUMBER  1  DEST:  11 26
7.950984056621362
TASK NUMBER  0  DEST:  42 32
TASK NUMBER  1  DEST:  18 15
24.1674474931

TASK NUMBER  0  DEST:  43 20
TASK NUMBER  1  DEST:  9 25
19.107538565627138
TASK NUMBER  0  DEST:  35 27
TASK NUMBER  1  DEST:  16 34
11.491240239418522
TASK NUMBER  0  DEST:  25 4
TASK NUMBER  1  DEST:  24 10
12.091014302744673
TASK NUMBER  0  DEST:  18 28
TASK NUMBER  1  DEST:  29 1
8.0541089545755
TASK NUMBER  0  DEST:  8 4
TASK NUMBER  1  DEST:  9 16
21.17548017681801
TASK NUMBER  0  DEST:  32 32
TASK NUMBER  1  DEST:  9 27
33.23416217998395
TASK NUMBER  0  DEST:  9 40
TASK NUMBER  1  DEST:  27 27
12.069152741816616
TASK NUMBER  0  DEST:  24 21
TASK NUMBER  1  DEST:  15 41
26.912499973818683
TASK NUMBER  0  DEST:  29 20
TASK NUMBER  1  DEST:  38 8
Task  0  completed at step  50
27.11342653589691
-------
TRYOUT[571]: (hidden_dim=64, max_step=5000, gamma=0.9999, n_episode=1000, buffer_size=10000000, batch_size=64, n_epoch=25, epsilon=0.5, tau=0.5, learning_rate=0.0005)
TASK NUMBER  0  DEST:  15 25
TASK NUMBER  1  DEST:  19 26
12.58868342833885
TASK NUMBER  0  DEST:  28 46
TASK NUMBER

KeyboardInterrupt: 

https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/main.py

In [None]:
env = GridWorldWithCare(NUM_TASKS)
observation_space = env.len_obs
n_actions = env.n_action
n_tasks = env.n_tasks

buff = ReplayBufferGCare(buffer_size,observation_space,n_actions,n_tasks)
model = MTRL_DGN(n_tasks,observation_space,hidden_dim,n_actions)
model_tar = MTRL_DGN(n_tasks,observation_space,hidden_dim,n_actions)
model = model.cuda()
model_tar = model_tar.cuda()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
# att = MTRL_ATT(observation_space).cuda()
# att_tar = MTRL_ATT(observation_space).cuda()
# att_tar.load_state_dict(att.state_dict())
# optimizer_att = optim.Adam(att.parameters(), lr = 0.001)
criterion = nn.BCELoss()

M_Null = torch.Tensor(np.array([np.eye(n_tasks)]*batch_size)).cuda()
M_ZERO = torch.Tensor(np.zeros((batch_size,n_tasks,n_tasks))).cuda()
# threshold = float(sys.argv[1]) TODO: figure this out
# f = open(sys.argv[1]+'-'+sys.argv[2]+'.txt','w+')
f = open("TRIAL-9-SoftAttention-Concat.txt", "w+")
while i_episode<n_episode:
    if i_episode > 40:
        epsilon -= 0.001
        if epsilon < 0.01:
            epsilon = 0.01
    i_episode+=1
    steps = 0
    obs, adj = env.reset()
    obs = np.resize(obs, (n_tasks, observation_space))
    while steps < max_step:
        steps+=1 
#         cost_all += adj.sum()
#         v_a = np.array(att(torch.Tensor(np.array([obs])).cuda())[0].cpu().data)
#         for i in range(n_tasks):
#             if np.random.rand() < epsilon:
#                 adj[i] = adj[i]*0 if np.random.rand() < 0.5 else adj[i]*1
#             else:
#                 adj[i] = adj[i]*0 if v_a[i][0] < threshold else adj[i]*1
        # Note: above loop is epsilon greedy exploration to give less importance to observations that fall below a certain threshold
        # May not be needed if we use single encoder but could be useful in the case of mixture of encoders
        # Pruning "less imp" neighbours whose obs fall below a certain threshold
#         n_adj = adj*comm_flag
#         cost_comm += n_adj.sum()
#         n_adj = n_adj + np.eye(n_tasks)
#         q_dummy = model(torch.Tensor(np.array([obs])).cuda(), torch.Tensor(np.array([adj])).cuda())
#         print("model output shape", q_dummy.shape)
        q = model(torch.Tensor(np.array([obs])).cuda(), torch.Tensor(np.array([adj])).cuda())[0,0,:]
#         print("Shape of Q: ", q.shape)
        if np.random.rand() < epsilon:
#             print("HERE RANDOM")
            a = np.random.randint(n_actions)
        else:
#             print("HERE FROM MODEL")
            a = q.argmax().item()

        action = a
        
        next_obs, next_adj, reward, terminated = env.step(action)
#         print('action: {}, next_obs: {}\nnext_adj:\n{}'.format(
#             action, next_obs, next_adj
#         ))
        
        next_obs = np.resize(next_obs, (n_tasks, observation_space))
        
        buff.add(np.array(obs),action,reward,np.array(next_obs),adj,next_adj,terminated)
        
        obs = next_obs
        adj = next_adj
        score += reward

    if i_episode%20==0:
        print(score)
        #print(score/2000)
        f.write(str(score)+'\n')
        # Cost (neighbors in adj matrix)after pruning/ Cost before pruning
        f.flush()
        score = 0

#     if i_episode < 40:
#         continue

    for e in range(n_epoch):

        O,A,R,Next_O,Matrix,Next_Matrix,D = buff.getBatch(batch_size)
        O = torch.Tensor(O).cuda()
        Matrix = torch.Tensor(Matrix).cuda()
        Next_O = torch.Tensor(Next_O).cuda()
        Next_Matrix = torch.Tensor(Next_Matrix).cuda()

#         label = model(Next_O, Next_Matrix+M_Null).max(dim = 2)[0] - model(Next_O, M_Null).max(dim = 2)[0]
#         #print("Label", label.shape)
#         label = (label - label.mean())/(label.std()+0.000001) + 0.5
#         label = torch.clamp(label, 0, 1).unsqueeze(-1).detach()
#         #print("Label after clamping", label.shape)
#         #print("ATT output", label_dummy.shape)
#         loss = criterion(a(Next_O), label)
#         optimizer_att.zero_grad()
#         loss.backward()
#         optimizer_att.step()
        # Basically att is learning which obs from the maze help return the max q value

#         V_A_D = att_tar(Next_O).expand(-1,-1,n_ant)
#         Next_Matrix = torch.where(V_A_D > threshold, Next_Matrix, M_ZERO)
#         Next_Matrix = Next_Matrix*comm_flag + M_Null

        q_values = model(O, Matrix)
#         print("Q Vals Before slicing: ", q_values.shape)
        q_values = model(O, Matrix)[:,0, :]
#         print("Q Vals After slicing: ", q_values_final.shape)
        target_q_values = model_tar(Next_O, Next_Matrix).max(dim = 2)[0][:,0]
#         print("Target Q Vals: ", target_q_values.shape)
        target_q_values = np.array(target_q_values.cpu().data)
        expected_q = np.array(q_values.cpu().data)

        for j in range(batch_size):
#             for i in range(n_tasks):
            expected_q[j][A[j][0]] = R[j][0] + (1-D[j][0])*GAMMA*target_q_values[j]

        loss = (q_values - torch.Tensor(expected_q).cuda()).pow(2).mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if e%10 == 0:
            with torch.no_grad():
                for p, p_targ in zip(model.parameters(), model_tar.parameters()):
                    p_targ.data.mul_(tau)
                    p_targ.data.add_((1 - tau) * p.data)
#                 for p, p_targ in zip(att.parameters(), att_tar.parameters()):
#                     p_targ.data.mul_(tau)
#                     p_targ.data.add_((1 - tau) * p.data)

TASK NUMBER  0  DEST:  27 20
TASK NUMBER  1  DEST:  19 16
TASK NUMBER  0  DEST:  26 3
TASK NUMBER  1  DEST:  38 3
TASK NUMBER  0  DEST:  8 25
TASK NUMBER  1  DEST:  30 13
TASK NUMBER  0  DEST:  14 31
TASK NUMBER  1  DEST:  32 8
TASK NUMBER  0  DEST:  2 31
TASK NUMBER  1  DEST:  11 3
TASK NUMBER  0  DEST:  39 35
TASK NUMBER  1  DEST:  30 8
TASK NUMBER  0  DEST:  32 1
TASK NUMBER  1  DEST:  29 31
TASK NUMBER  0  DEST:  21 0
TASK NUMBER  1  DEST:  29 46
TASK NUMBER  0  DEST:  21 37
TASK NUMBER  1  DEST:  38 11
TASK NUMBER  0  DEST:  33 35
TASK NUMBER  1  DEST:  20 17
TASK NUMBER  0  DEST:  34 35
TASK NUMBER  1  DEST:  49 5
TASK NUMBER  0  DEST:  28 5
TASK NUMBER  1  DEST:  10 24
TASK NUMBER  0  DEST:  42 27
TASK NUMBER  1  DEST:  7 10
TASK NUMBER  0  DEST:  6 17
TASK NUMBER  1  DEST:  16 29
Task  0  completed at step  148
TASK NUMBER  0  DEST:  33 13
TASK NUMBER  1  DEST:  23 46
Task  1  completed at step  559
TASK NUMBER  0  DEST:  35 44
TASK NUMBER  1  DEST:  19 16
TASK NUMBER  0  DEST:

Task  1  completed at step  1501
TASK NUMBER  0  DEST:  32 29
TASK NUMBER  1  DEST:  29 26
Task  0  completed at step  1775
Task  1  completed at step  1859
TASK NUMBER  0  DEST:  49 42
TASK NUMBER  1  DEST:  48 49
TASK NUMBER  0  DEST:  20 46
TASK NUMBER  1  DEST:  27 15
TASK NUMBER  0  DEST:  31 30
TASK NUMBER  1  DEST:  26 49
TASK NUMBER  0  DEST:  30 11
TASK NUMBER  1  DEST:  38 46
Task  1  completed at step  108
TASK NUMBER  0  DEST:  0 46
TASK NUMBER  1  DEST:  38 23
TASK NUMBER  0  DEST:  49 24
TASK NUMBER  1  DEST:  25 16
Task  1  completed at step  2980
TASK NUMBER  0  DEST:  25 22
TASK NUMBER  1  DEST:  6 47
TASK NUMBER  0  DEST:  41 40
TASK NUMBER  1  DEST:  36 33
TASK NUMBER  0  DEST:  42 47
TASK NUMBER  1  DEST:  21 21
TASK NUMBER  0  DEST:  48 27
TASK NUMBER  1  DEST:  5 10
Task  0  completed at step  371
TASK NUMBER  0  DEST:  17 43
TASK NUMBER  1  DEST:  14 39
TASK NUMBER  0  DEST:  11 28
TASK NUMBER  1  DEST:  42 32
TASK NUMBER  0  DEST:  1 12
TASK NUMBER  1  DEST:  45

TASK NUMBER  0  DEST:  23 38
TASK NUMBER  1  DEST:  24 46
Task  1  completed at step  291
TASK NUMBER  0  DEST:  35 0
TASK NUMBER  1  DEST:  8 33
Task  1  completed at step  692
TASK NUMBER  0  DEST:  14 40
TASK NUMBER  1  DEST:  30 32
Task  1  completed at step  700
TASK NUMBER  0  DEST:  29 5
TASK NUMBER  1  DEST:  21 48
Task  1  completed at step  201
TASK NUMBER  0  DEST:  1 30
TASK NUMBER  1  DEST:  0 0
TASK NUMBER  0  DEST:  16 41
TASK NUMBER  1  DEST:  14 13
Task  1  completed at step  195
TASK NUMBER  0  DEST:  23 3
TASK NUMBER  1  DEST:  45 41
Task  1  completed at step  139
TASK NUMBER  0  DEST:  11 48
TASK NUMBER  1  DEST:  28 1
Task  1  completed at step  55
TASK NUMBER  0  DEST:  14 47
TASK NUMBER  1  DEST:  12 8
Task  1  completed at step  96
TASK NUMBER  0  DEST:  7 14
TASK NUMBER  1  DEST:  39 24
Task  1  completed at step  62
43745.12080061297
TASK NUMBER  0  DEST:  46 28
TASK NUMBER  1  DEST:  15 14
TASK NUMBER  0  DEST:  43 35
TASK NUMBER  1  DEST:  38 27
Task  1  co