https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/config.py


In [1]:
import numpy as np
np.random.seed(14)
import math, random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd 
import torch.nn.functional as F
import copy
import os,sys
from torch.utils import tensorboard as tb



In [2]:
GRID_DIM = 50 # TODO: Tune this
NUM_TASKS = 5 # TODO: Tune this
NUM_ATTENTION_HEADS = 5
ADJ_THRESHOLD = GRID_DIM / 4 # TODO: Tune this
WAIT_TILL_ALL_TASKS_DONE = False
USE_OBS_DIST = False
USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs)

def is_legal(x,y):
    return (x>=0)&(x<GRID_DIM)&(y>=0)&(y<=GRID_DIM)

https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/buffer.py

In [3]:
class ReplayBufferGCare(object):
    """
    Replay buffer for storing the agent's experiences
    """

    def __init__(self, buffer_size, obs_space, n_action, n_tasks):
        """
        Initialize the replay buffer
        
        Params:
        buffer_size:
        obs_space:
        n_action:
        n_tasks:
        """
        self.buffer_size = buffer_size
        self.n_tasks = n_tasks
        self.pointer = 0
        self.len = 0
        self.actions = np.zeros((self.buffer_size,1),dtype = np.int32)
        self.rewards = np.zeros((self.buffer_size, 1))
        self.dones = np.zeros((self.buffer_size,1))
        self.obs = np.zeros((self.buffer_size,n_tasks,obs_space))
        self.next_obs = np.zeros((self.buffer_size,n_tasks,obs_space))
        self.matrix = np.zeros((self.buffer_size,self.n_tasks,self.n_tasks))
        self.next_matrix = np.zeros((self.buffer_size,self.n_tasks,self.n_tasks))

    def getBatch(self, batch_size):
        """
        Sample a batch of random entries from the replay buffer
        
        Params:
        batch_size:
        
        Returns:
        obs:
        action:
        reward
        next_obs:
        matrix:
        next_matrix:
        done:
        """
        index = np.random.choice(self.len, batch_size, replace=False)
        return self.obs[index], self.actions[index], self.rewards[index], self.next_obs[index], self.matrix[index], self.next_matrix[index], self.dones[index]

    def add(self, obs, action, reward, next_obs, matrix, next_matrix, done):
        """
        Add to the replay buffer
        
        Params:
        obs:
        action:
        reward:
        next_obs:
        matrix:
        next_matrix:
        done:
        """
        self.obs[self.pointer] = obs
        self.actions[self.pointer] = action
        self.rewards[self.pointer] = reward
        self.next_obs[self.pointer] = next_obs
        self.matrix[self.pointer] = matrix
        self.next_matrix[self.pointer] = next_matrix
        self.dones[self.pointer] = done
        self.pointer = (self.pointer + 1)%self.buffer_size
        self.len = min(self.len + 1, self.buffer_size)

https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/model.py

In [4]:
class MTRL_ATT(nn.Module):
    """
    """
    def __init__(self, din):
        super(MTRL_ATT, self).__init__()
        self.fc1 = nn.Linear(din, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        y = F.relu(self.fc1(x))
        y = F.relu(self.fc2(y))
        y = F.sigmoid(self.fc3(y))
        return y

class MTRL_Encoder(nn.Module): # TODO: Need to make it a CNN for higher dim obs space like MetaWorld
    """
    """
    def __init__(self, din=32, hidden_dim=128):
        super(MTRL_Encoder, self).__init__()
        self.fc = nn.Linear(din, hidden_dim)


    def forward(self, x):
        embedding = F.tanh(self.fc(x))
        return embedding

class MTRL_AttModel(nn.Module):
    """
    """
    def __init__(self, n_node, din, hidden_dim, dout):
        super(MTRL_AttModel, self).__init__()
        self.fcv = nn.Linear(din, hidden_dim)
        self.fck = nn.Linear(din, hidden_dim)
        self.fcq = nn.Linear(din, hidden_dim)
        self.fcout = nn.Linear(hidden_dim, dout)

    def forward(self, x, mask):
        v = F.tanh(self.fcv(x))
        q = F.tanh(self.fcq(x))
        k = F.tanh(self.fck(x)).permute(0,2,1)
        att = F.softmax(torch.mul(torch.bmm(q,k), mask) - 9e15*(1 - mask),dim=2)
        # Note: Order of applying adj matrix is different than that in paper. Don't get confused!
        out = torch.bmm(att,v)
        return out

class MTRL_Q_Net(nn.Module):
    """
    """
    def __init__(self, hidden_dim, dout):
        super(MTRL_Q_Net, self).__init__()
        # NOTE: This is now modified to have both h vectors from both of the attention layers
        # concatenated - originally it was only getting the h vector of the last layer
        # so the input dim of the linear layer was hidden_dim
        self.fc = nn.Linear(hidden_dim*(NUM_ATTENTION_HEADS + 1), dout)

    def forward(self, x):
        q = F.relu(self.fc(x))
        return q

    
class MTRL_DGN(nn.Module):
    """
    """
    def __init__(self,n_tasks,num_inputs,hidden_dim,num_actions):
        super(MTRL_DGN, self).__init__()

        self.encoder = MTRL_Encoder(num_inputs,hidden_dim)
        self.attention_heads = [MTRL_AttModel(n_tasks,hidden_dim,hidden_dim,hidden_dim).cuda() for _ in range(NUM_ATTENTION_HEADS)]
        self.q_net = MTRL_Q_Net(hidden_dim,num_actions)

    def forward(self, x, mask):
        mask = mask.cuda()
        h = self.encoder(x)
        attention_heads = [h]
        for i in range(NUM_ATTENTION_HEADS):
            h = h.cuda()
            h = self.attention_heads[i](h, mask)
            attention_heads.append(h)
#         h2 = self.att_1(h1, mask)
#         h3 = self.att_2(h2, mask) 
        
        # TODO: try concatentation for MTRL
        
        h = torch.cat(attention_heads, dim=-1)
#         h4 = torch.cat((h1,h2,h3),dim=-1)
        q = self.q_net(h)
        # Note: No concatenation done. Output of last attention head used directly
        # Note: 2 attention heads used
        return q 

https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/surviving.py

In [5]:
class GridWorldWithCare(object):
    
    def __init__(self, n_tasks):
        """
        Initialize the gridworld
        
        Params:
        n_tasks:
        """
        super(GridWorldWithCare, self).__init__()
        self.n_action = 4
        self.n_tasks = n_tasks
        # TODO: maybe include food as part of task, reach dest with > 0 food or something
        self.tasks = [0]*self.n_tasks
        self.agent = [-1, -1]
        self.build_env()

        self.dones = np.zeros(self.n_tasks) # Array to indicate whether each task is done or not -- used to calculate rewards
        self.steps = 0
        if USE_OBS_DIST:
            self.len_obs = self.n_tasks + 2
        else:
            self.len_obs = (self.n_tasks+1)*2

    def reset(self):
        """
        Reset the gridworld
        
        Returns:
        obs:
        adj:
        """

        self.build_env()
        self.dones = np.zeros(self.n_tasks)
        self.steps = 0
        return self.get_obs(), self.get_adj()

    def build_env(self):
        """
        Build the gridworld
        """
        for i in range(self.n_tasks):
            x = np.random.randint(0, GRID_DIM)
            y = np.random.randint(0, GRID_DIM)
            self.tasks[i] = [x, y]
            print("TASK NUMBER ", i, " DEST: ", x, y)
        self.agent[0] = np.random.randint(0, GRID_DIM)
        self.agent[1] = np.random.randint(0, GRID_DIM)

    def get_obs(self):
        """
        Get observations
        
        Returns:
        obs:
        """
        # TODO: change this for MTRL 
        obs = []
        
        x_agent = self.agent[0]
        y_agent = self.agent[1]

        obs.append(x_agent/GRID_DIM)
        obs.append(y_agent/GRID_DIM)

        # 		for i in range(-1,2):
        # 			for j in range(-1,2):
        # 				obs.append(self.maze[x_agent+i][y_agent+j])

        if USE_OBS_DIST:
            for i in range(self.n_tasks):
                obs.append(math.sqrt((self.tasks[i][0]-x_agent)**2 + (self.tasks[i][1]-y_agent)**2)/GRID_DIM)
        else:
            for i in range(self.n_tasks):
                obs.append((self.tasks[i][0]-x_agent)/GRID_DIM)
                obs.append((self.tasks[i][1]-y_agent)/GRID_DIM)

        # TODO: 1. if we include maze state or not, and if we do, we would need to figure out
        # how to effectively send that along with task destinations
        
        #Idea: use distance between agent and task as obs
        
        return obs

    def get_adj(self): # TODO: Change this to use task description encoding. 
        # In this case task description is the location of the destination.
        """
        Get adjacency matrix
        
        Returns:
        adj:
        """
        adj = np.zeros((self.n_tasks, self.n_tasks))

        # Calculate adjacency regarding to the distances of the tasks respect to the agent
        x_agent, y_agent = self.agent[0], self.agent[1]

        # HARD ATTENTION
        # Traverse through the tasks and calculate the Euclidean distance between them and the agent
#         for i in range(self.n_tasks):
#             x_task_i, y_task_i = self.tasks[i][0] - x_agent, self.tasks[i][1] - y_agent
#             for j in range(self.n_tasks):
#                 x_task_j, y_task_j = self.tasks[j][0] - x_agent, self.tasks[j][1] - y_agent
#                 task_dist = math.sqrt((x_task_j - x_task_i)**2 + (y_task_i - y_task_j)**2)
#                 if task_dist <= ADJ_THRESHOLD:
#                     adj[i,j] = 1
#                     adj[j,i] = 1
                    
        # SOFT ATTENTION
#         adj = np.ones((self.n_tasks, self.n_tasks)) # NOTE: 
        for i in range(self.n_tasks):
            x_task_i, y_task_i = self.tasks[i][0]-x_agent, self.tasks[i][1]-y_agent
            for j in range(self.n_tasks):
                x_task_j, y_task_j = self.tasks[j][0]-x_agent, self.tasks[j][1]-y_agent
                # Instead of having 1 or 0s, have their vectoral positions according to each other
                task_dist = math.sqrt((x_task_j - x_task_i)**2 + (y_task_j - y_task_i)**2)
                
                # Set this distance / GRID_DIM
                adj[i,j] = 1 - float(task_dist)/GRID_DIM # Extract from 1 bc the closer the better
                adj[j,i] = 1 - float(task_dist)/GRID_DIM

        return adj



    def step(self, action):
        """
        Take one step in the gridworld according to the given actions
        
        Params:
        action:
        
        Returns:
        obs:
        adj:
        reward:
        all_tasks_done:
        """

        # There are 4 different actions for the agent
        # If there is any place to go in the maze then the agent will go 
        # 0: Move up, 1: Move down, 2: Move left, 3: Move right

        self.steps += 1
        x_agent, y_agent = self.agent[0], self.agent[1]
        if action == 0: # Move up (decrease x by one)
            if is_legal(x_agent-1, y_agent):
                # Change the agent and the maze
                self.agent[0] -= 1

        elif action == 1: # Move down (increase x by one)
            if is_legal(x_agent+1, y_agent):
                # Change the agent and the maze
                self.agent[0] += 1

        elif action == 2: # Move left (decrease y by one)
            if is_legal(x_agent, y_agent-1):
                # Change the agent and the maze
                self.agent[1] -= 1

        elif action == 3: # Move right (increase y by one)
            if is_legal(x_agent, y_agent+1):
                # Change the agent and the maze
                self.agent[1] += 1
                
        # Calculate the rewards for each task
        rewards = [0] * self.n_tasks
        total_reward = 0

        # Check if you reached to any destinations here
        new_agent_x, new_agent_y = self.agent[0], self.agent[1]
        for i in range(self.n_tasks):
            if self.tasks[i][0] == new_agent_x and self.tasks[i][1] == new_agent_y:
                if self.dones[i] == 0:
                    self.dones[i] = 1
                    rewards[i] = 10
                    total_reward += 10
                    print("Task ", i, " completed at step ", self.steps)
        # TODO: Uncomment these lines for soft reward
#             else:
#                 total_reward += 1.0/float((math.sqrt((self.tasks[i][0]-new_agent_x)**2 + (self.tasks[i][1]-new_agent_y)**2)))
                

        # Only if all the tasks are done, then the episode is done
        all_tasks_done = not (0 in self.dones)
        score = sum(self.dones) / len(self.dones)
        


        return self.get_obs(), self.get_adj(), total_reward, all_tasks_done, score

In [6]:
# Class for making the training with different set of hyper parameters
class Training:
    """
    This class trains the model and holds the highest scores and the hyper parameter combination that gave us that
    highest score. 
    Prints tryout_index -> highest_score_in_that_tryout for each tryout to the file
    
    params:
    test_results_file: name of the file to hold the results
    vizier: boolean to indicate whether to do hyper parameter search
    """
    def __init__(self, file_name, vizier=False): # vizier indicates whether to do hyper param research
        self.file_name = file_name
        self.file = open(file_name, "w+")
        self.vizier = vizier # if vizier is true then the class makes 

        if self.vizier:

            # Dictionary to give us all options for hyper parameters
            self.hyper_params = {
                'hidden_dim': [64], # 2
                'max_step': [5000], # 1
                'gamma': [0.99], # 1
                'n_episode': [800], # 1
                'buffer_size': [100000, 10000000], # 3
                'batch_size': [128], # 1
                'n_epoch': [100], # 2
                'epsilon': [0.5, 0.7, 0.9], # 3
                'tau': [0.95], # 2
                'learning_rate': [0.0005, 0.001, 0.005] # 3
            }

            self.tryouts = []
            for key, value in self.hyper_params.items():
                tryouts_len = len(self.tryouts)
                if tryouts_len == 0:
                    for param in value:
                        self.tryouts.append({key : param})

                else:
                    params_len = len(value)
                    for i in range(params_len-1):
                        for j in range(tryouts_len):
                            self.tryouts.append(copy.deepcopy(self.tryouts[j]))

                    for j in range(params_len):
                        for i in range(tryouts_len):
                            self.tryouts[j*tryouts_len+i][key] = value[j]

            print('len(tryouts): {}'.format(len(self.tryouts)))
            print('tryouts: {}'.format(tryouts))
            self.num_tryout = len(self.tryouts)
            self.highest_scores = [0] * self.num_tryout # This will hold the highest score in each try out
            
        else: 
            
            self.hidden_dim = 64
            self.max_step = 5000 #originally 500
            self.gamma = 0.99
            self.n_episode = 800 #originally 800
            self.buffer_size = 1000000 #change back to 65000
            self.batch_size = 64 #change back to 64
            self.n_epoch = 100 #orginally 25
            self.epsilon = 0.7 #originally 0.9
            self.tau = 0.95
            self.learning_rate = 0.00005
            
            self.num_tryout = 1
            self.highest_scores = [0] * self.num_tryout # This will hold the highest score in each try out
        
            
    def train(self):
        env = GridWorldWithCare(NUM_TASKS)
        observation_space = env.len_obs
        n_actions = env.n_action
        n_tasks = env.n_tasks
        
        # Set the hyper parameters 
        for tryout_index in range(self.num_tryout):
            if self.vizier:
                self.hidden_dim = self.tryouts[tryout_index]['hidden_dim']
                self.max_step = self.tryouts[tryout_index]['max_step']
                self.gamma = self.tryouts[tryout_index]['gamma']
                self.n_episode = self.tryouts[tryout_index]['n_episode']
                self.buffer_size = self.tryouts[tryout_index]['buffer_size']
                self.batch_size = self.tryouts[tryout_index]['batch_size']
                self.n_epoch = self.tryouts[tryout_index]['n_epoch']
                self.epsilon = self.tryouts[tryout_index]['epsilon']
                self.tau = self.tryouts[tryout_index]['tau']
                self.learning_rate = self.tryouts[tryout_index]['learning_rate']
                
            print('-------\nTRYOUT[{}]: (hidden_dim={}, max_step={}, gamma={}, n_episode={}, buffer_size={}, batch_size={}, n_epoch={}, epsilon={}, tau={}, learning_rate={})'.format(
                tryout_index, self.hidden_dim, self.max_step, self.gamma, self.n_episode, self.buffer_size, self.batch_size, self.n_epoch, self.epsilon, self.tau, self.learning_rate
            ))
                
                
            buff = ReplayBufferGCare(self.buffer_size, observation_space, n_actions, n_tasks)
            model = MTRL_DGN(n_tasks, observation_space, self.hidden_dim, n_actions).cuda()
            model_tar = MTRL_DGN(n_tasks, observation_space, self.hidden_dim, n_actions).cuda()
            optimizer = optim.Adam(model.parameters(), lr = self.learning_rate)
            criterion = nn.BCELoss()
            
            M_Null = torch.Tensor(np.array([np.eye(n_tasks)] * self.batch_size)).cuda()
            M_ZERO = torch.Tensor(np.zeros((self.batch_size, n_tasks, n_tasks))).cuda()
        
            i_episode = 0
            score = 0
            
            if not self.vizier:
                tb_summary_writer = tb.SummaryWriter(log_dir = "./TB-Logs/"+self.file_name.split(".txt")[0])
                global_step_count = 0
            
            while i_episode < self.n_episode:
                if i_episode > 40:
                    self.epsilon -= 0.001
                    if self.epsilon < 0.01:
                        self.epsilon = 0.01
                        
                i_episode+=1
                steps = 0
                obs, adj = env.reset()
                terminated = False
                obs = np.resize(obs, (n_tasks, observation_space))
                

                if WAIT_TILL_ALL_TASKS_DONE:
                    while not terminated:
                        steps+=1 
                        global_step_count += 1
                        # Get the action with forward prop and add the obs, adjs to replay buffer
                        q = model(torch.Tensor(np.array([obs])).cuda(), torch.Tensor(np.array([adj])).cuda())[0,0,:]
                        if np.random.rand() < self.epsilon:
                            action = np.random.randint(n_actions)
                        else:
                            action = q.argmax().item()
                        next_obs, next_adj, reward, terminated, step_score = env.step(action)
                        if not self.vizier:
                            tb_summary_writer.add_scalar("Reward on one step", reward, global_step_count)
                        next_obs = np.resize(next_obs, (n_tasks, observation_space))
                        buff.add(np.array(obs),action,reward,np.array(next_obs),adj,next_adj,terminated)

                        obs = next_obs
                        adj = next_adj
                        score += step_score
                        
                else:
                    while steps < self.max_step:
                        steps+=1 
                        global_step_count += 1
                        # Get the action with forward prop and add the obs, adjs to replay buffer
                        q = model(torch.Tensor(np.array([obs])).cuda(), torch.Tensor(np.array([adj])).cuda())[0,0,:]
                        if np.random.rand() < self.epsilon:
                            action = np.random.randint(n_actions)
                        else:
                            action = q.argmax().item()
                        next_obs, next_adj, reward, terminated, step_score = env.step(action)
                        if not self.vizier:
                            tb_summary_writer.add_scalar("Reward on one step", reward, global_step_count)
                        next_obs = np.resize(next_obs, (n_tasks, observation_space))
                        buff.add(np.array(obs),action,reward,np.array(next_obs),adj,next_adj,terminated)

                        obs = next_obs
                        adj = next_adj
                        score += step_score

                if i_episode%20==0:
                    print(score/20)
                    if not self.vizier:
                        tb_summary_writer.add_scalar("Score", score/20, global_step_count)
                    else:
                        if score/20 > self.highest_scores[tryout_index]:
                            self.highest_scores[tryout_index] = score/20
                    score = 0
                
                
                episode_loss = 0
                
                # Train the model
                for e in range(self.n_epoch):
                    O,A,R,Next_O,Matrix,Next_Matrix,D = buff.getBatch(self.batch_size)
                    O = torch.Tensor(O).cuda()
                    Matrix = torch.Tensor(Matrix).cuda()
                    Next_O = torch.Tensor(Next_O).cuda()
                    Next_Matrix = torch.Tensor(Next_Matrix).cuda()

                    q_values = model(O, Matrix)
                    q_values = model(O, Matrix)[:,0, :]
                    target_q_values = model_tar(Next_O, Next_Matrix).max(dim = 2)[0][:,0]
                    target_q_values = np.array(target_q_values.cpu().data)
                    expected_q = np.array(q_values.cpu().data)

                    for j in range(self.batch_size):
                        expected_q[j][A[j][0]] = R[j][0] + (1-D[j][0])*self.gamma*target_q_values[j]

                    loss = (q_values - torch.Tensor(expected_q).cuda()).pow(2).mean()
                    episode_loss += loss
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    if e%10 == 0:
                        with torch.no_grad():
                            for p, p_targ in zip(model.parameters(), model_tar.parameters()):
                                p_targ.data.mul_(self.tau)
                                p_targ.data.add_((1 - self.tau) * p.data)
                
                if not self.vizier:
                    tb_summary_writer.add_scalar('Loss', episode_loss, global_step_count)
    
            # Print the highest score to the file
            self.file.write('Tryout[{}]: (hidden_dim={}, max_step={}, gamma={}, n_episode={}, buffer_size={}, batch_size={}, n_epoch={}, epsilon={}, tau={}, learning_rate={}): \t ---> \t Highest Score: {}\n'.format(
                tryout_index, self.hidden_dim, self.max_step, self.gamma, self.n_episode, self.buffer_size, self.batch_size, self.n_epoch, self.epsilon, self.tau, self.learning_rate, self.highest_scores[tryout_index]
            ))
            
            self.file.flush()

In [None]:
log_file_name = str(GRID_DIM) + "DIM_"+ str(NUM_TASKS) +"TASKS_" + str(NUM_ATTENTION_HEADS) + "ATT_" + str(int(WAIT_TILL_ALL_TASKS_DONE)) + "WAITALLTASKS_" + str(int(USE_OBS_DIST)) + "USEOBSDIST.txt"
training = Training(log_file_name, False)
training.train()

TASK NUMBER  0  DEST:  43 24
TASK NUMBER  1  DEST:  12 22
TASK NUMBER  2  DEST:  7 38
TASK NUMBER  3  DEST:  42 28
TASK NUMBER  4  DEST:  10 16
-------
TRYOUT[0]: (hidden_dim=64, max_step=5000, gamma=0.99, n_episode=800, buffer_size=1000000, batch_size=64, n_epoch=100, epsilon=0.7, tau=0.95, learning_rate=0.0005)
TASK NUMBER  0  DEST:  26 25
TASK NUMBER  1  DEST:  23 43
TASK NUMBER  2  DEST:  32 4
TASK NUMBER  3  DEST:  13 10
TASK NUMBER  4  DEST:  10 0




Task  4  completed at step  177
TASK NUMBER  0  DEST:  17 38
TASK NUMBER  1  DEST:  44 33
TASK NUMBER  2  DEST:  29 27
TASK NUMBER  3  DEST:  11 1
TASK NUMBER  4  DEST:  34 39
Task  3  completed at step  84
TASK NUMBER  0  DEST:  3 25
TASK NUMBER  1  DEST:  46 49
TASK NUMBER  2  DEST:  14 17
TASK NUMBER  3  DEST:  32 36
TASK NUMBER  4  DEST:  17 27
TASK NUMBER  0  DEST:  24 43
TASK NUMBER  1  DEST:  12 45
TASK NUMBER  2  DEST:  6 45
TASK NUMBER  3  DEST:  22 16
TASK NUMBER  4  DEST:  10 33
Task  3  completed at step  169
TASK NUMBER  0  DEST:  41 16
TASK NUMBER  1  DEST:  49 38
TASK NUMBER  2  DEST:  0 39
TASK NUMBER  3  DEST:  20 0
TASK NUMBER  4  DEST:  15 45
TASK NUMBER  0  DEST:  11 15
TASK NUMBER  1  DEST:  1 15
TASK NUMBER  2  DEST:  26 40
TASK NUMBER  3  DEST:  13 19
TASK NUMBER  4  DEST:  18 30
TASK NUMBER  0  DEST:  21 36
TASK NUMBER  1  DEST:  6 32
TASK NUMBER  2  DEST:  6 6
TASK NUMBER  3  DEST:  44 7
TASK NUMBER  4  DEST:  26 6
TASK NUMBER  0  DEST:  16 33
TASK NUMBER  1  D

https://github.com/jiechuanjiang/pytorch_DGN/blob/main/Surviving/DGN%2BATOC/main.py