In [1]:
# ROBOTICS assignment 2022-2023, PG Applied AI
# Suggesting interesting walks

# FRANK VAN GAAL, student number 202206412

In [2]:
import random
import numpy as np
import osmnx as ox
import networkx as nx
import gymnasium as gym
import stable_baselines3 as sb3

# needed to capture user input

import os
import keyboard

In [3]:
class MyEnv(gym.Env):

    
    def __init__(self):
        super().__init__()

        self.USER_RATING = False    # set to True when user has to rate walks for additional reward, otherwise False
        self.USER_INFO = True       # set to True when user wants to see walks (nodes) and obs features
        
        self.MIN_WALK_DIST = 417    # distance in meters
        self.MAX_WALK_DIST = 4167   # distance in meters
        
        self.MIN_WALK_TIME = self.MIN_WALK_DIST / 1000 * 1/5 * 60   # time in minutes v=5km/h
        self.MAX_WALK_TIME = self.MAX_WALK_DIST / 1000 * 1/5 * 60   # time in minutes v=5km/h

        # action space       
        
        self.action_space = gym.spaces.Discrete(3)
        
        # observation space
        
        self.observation_space = gym.spaces.Box(low=float('-inf'), high=float('Inf'), shape=(8,),
                                                dtype=np.float32)

        # location
        
        self.location = "Onafhankelijkheidsstraat 42, Edegem"
        
        # graph of walkable pathways in 500 m distance, dist can be made larger
        
        graph = ox.graph_from_address(self.location, dist=500, network_type='walk')

        # projected graph (includes coordinate system & measures)
        
        self.G = ox.project_graph(graph)

        # starting point of the walk (node)
        
        self.starting_point = 36242661
        
        # candidate nodes = all nodes from the graph
        
        self.candidate_nodes = list(self.G.nodes)
        
        # interesting nodes = nodes according to amenities (cafe, bar, restaurant, pub, library) 
        # these nodes are collected in advance using a seperate python script using osmnx
        # option 1: within a distance of 30 m of a walking node
        
        self.candidate_inodes = [30349181, 436368334, 295899939, 450940987, 36242668, 36508334, 451908623, 5128521073, 36508336,
                                 451914645, 8534199865, 81026594, 2572086168, 451915681, 451915858, 8489963209, 8534199864]
        
        # interesting nodes = nodes according to amenities 
        # option 2: within a distance of 50 m
        
        # [2572086150, 180170248, 7520932877, 451908623, 34094868, 451914645, 2572086168, 180168857, 2572086176, 451915681,
        # 81026594, 295899939, 8534199847, 450940973, 36508334, 8534199853, 36508336, 8534199858, 7524270519, 8534199864, 
        # 8534199865, 8534199866, 450940987, 5206243901, 5128546494, 451915971, 8489963209, 8489995722, 295899723, 8489963212, 
        # 436368334, 8489963214, 451915858, 451914977, 5128548962, 5128548963, 5128548965, 1592630249, 451914987, 36242668, 
        # 36242669, 5128521073, 30349180, 30349181, 451917438]
        
        # delete inodes from nodes resulting in a seperate list with nodes and a list with inodes without overlap
        
        self.candidate_nodes = list(set(self.candidate_nodes) - set(self.candidate_inodes))
        
        # log of actions to monitor, not part of the obs space
        
        self.log_actions = []        
        
    def current_state(self):
        # Based on the concept of a 'pure' MDP: agent can only see the current state!
        # This means there are no features in the obs that rely on previous states
        
        # dist = current walking distance of walk at state (m or min)
        # diff = difference from desired distance (m or min)
        # n_nodes = number of nodes of walk at state (#)
        # avg_dist = dist / n_nodes (m or min) 
        # n_inodes = number of interesting nodes (#)
        # avg_dist_inodes = (m or min)
        # ratio_inodes (%) = number of inodes w.r.t. number of candidate inodes
        # ratio_inodes_nodes (%) = number of inodes w.r.t. the total number of nodes in the walk
        
        if len(self.current_loop) > 1:
            
            # make route starting from first node to last node and close by returning from last node to first node
            
            self.route = []
            for point in range(0, len(self.current_loop) - 1):
                if len(self.route) > 0:
                    del self.route[-1]
                self.route += nx.shortest_path(self.G, source=self.current_loop[point], target=self.current_loop[point + 1], weight='length')
            if len(self.route) > 0:
                    del self.route[-1]
            self.route += nx.shortest_path(self.G, source=self.current_loop[-1], target=self.current_loop[0], weight='length')
            
            # the distance calculus is based on a loop starting at the first node and ending at the first node
            # the distance is obtained by a custom method (not with nx loop function)
            # taking shortest distances from node to node and closing at the end by returning to the first node
            
            self.dist = 0.0
            for point in range(0, len(self.current_loop) - 1):
                self.dist += nx.shortest_path_length(self.G, self.current_loop[point], self.current_loop[point + 1], weight='length')
            self.dist += nx.shortest_path_length(self.G, self.current_loop[-1], self.current_loop[0], weight='length')
            
            # when working with time goal, the distance is time based
            
            self.dist = self.dist / 1000 * 1/5 * 60
            
            self.diff = self.dist - self.desired_distance
            self.n_nodes = len(self.current_loop)
            
            if self.n_nodes > 0:
                self.avg_dist = self.dist / self.n_nodes
            else:
                self.avg_dist = 0.0
            
            # number of selected interesting nodes = length of the intersection 
            # of the current_loop and the candidate interesting nodes
            
            self.n_inodes = len(list(set(self.candidate_inodes).intersection(set(self.current_loop))))
            
            # other obs, these are ratio's so we must take care of 0 values
            
            if self.n_inodes > 0:
                self.avg_dist_inodes = self.dist / self.n_inodes
            else:
                self.avg_dist_inodes = 0.0
            
            if self.n_inodes > 0 and len(self.candidate_inodes) > 0:
                self.ratio_inodes = round(self.n_inodes / len(self.candidate_inodes), 2)
            else:
                self.ratio_inodes = 0.0
            
            if self.n_inodes > 0 and self.n_nodes > 0:
                self.ratio_inodes_nodes = round(self.n_inodes / self.n_nodes, 2)
            else:
                self.ratio_inodes_nodes = 0.0
        else:
            self.route = []
            self.dist = 0.0
            self.diff = 0.0
            self.n_nodes = 0.0
            self.avg_dist = 0.0
            self.n_inodes = 0.0
            self.avg_dist_inodes = 0.0
            self.ratio_inodes = 0.0
            self.ratio_inodes_nodes = 0.0
 
        # filling the obs space for the agent
        # basic version = obs space with 8 float variables or features
        # different options: which features are useful for the agent? (see report)
    
        curr_state = np.zeros(8)
        curr_state[0] = self.dist
        curr_state[1] = self.diff
        curr_state[2] = self.n_nodes
        curr_state[3] = self.avg_dist
        curr_state[4] = self.n_inodes
        curr_state[5] = self.avg_dist_inodes
        curr_state[6] = self.ratio_inodes
        curr_state[7] = self.ratio_inodes_nodes
        return curr_state

    
    def reset(self):

        # desired distance or duration of the walk (meters or minutes)
        # learn different walks
        
        self.desired_distance = random.uniform(self.MIN_WALK_TIME, self.MAX_WALK_TIME)
        
        # route of the current walk, not part of the obs space  
        
        self.route = []
        
        # current walk (list of selected node_ids by agent)
        
        self.current_loop = []
        self.current_loop.append(self.starting_point)

        self.timestep = 0

        self.dist = 0.0
        self.diff = 0.0
        self.n_nodes = 0.0
        self.avg_dist = 0.0
        self.n_inodes = 0.0
        self.avg_dist_inodes = 0.0
        self.ratio_inodes = 0.0
        self.ratio_inodes_nodes = 0.0

        self.reward = 0

        # just for logging, not part of spaces
        
        self.total_reward = 0

        # just for logging, not part of obs space
        
        self.log_actions = []
        
        return self.current_state(), {}

    
    def step(self, action):
        
        # action the choose from by the agent
        
        if action == 0:
            
            # delete the last node, respect starting node
            
            if len(self.current_loop) > 1:
                self.current_loop.remove(self.current_loop[-1])
            
            self.log_actions.append("A0")
        
        if action == 1:
            
            # add a node from candidate nodes (random)
            # option 1: add random node from list
            
            node_to_add = random.choice(self.candidate_nodes)
            self.current_loop.append(node_to_add)
            
            self.log_actions.append("A1")
            
            # option 2: add a random node from a list of neighbour nodes based on the edges. 
            # this can also be implemented as a seperate action. 
            
            # _, edges = ox.graph_to_gdfs(self.G)
            # next_pos_nodes = edges.loc[self.current_loop[-1], slice(None), slice(None)]
            # pos = random.randint(0, len(next_pos_nodes)-1)
            # self.current_loop.append(next_pos_nodes.iloc[pos].name[0])
        
        if action == 2:
            
            # add a node from candidate inodes (random)
            
            node_to_add = random.choice(self.candidate_inodes)
            self.current_loop.append(node_to_add)
            
            self.log_actions.append("A2")
        
        done = False
        truncated = False
        self.timestep += 1
        if self.timestep >= 250:        # there are +/- 250 nodes to choose from, if after 250 trials no result just truncate
            truncated = True
        else:
                        
            # reward function
            
            reward = 0
            
            # reward for reaching the target desired_distance defined as a range of +/- 1%
            
            cur_dist = self.dist
            min_dist = self.desired_distance * 0.99
            max_dist = self.desired_distance * 1.01
            if min_dist <= cur_dist <= max_dist:
                self.reward = 10
                done = True
                
                # when info is required
                
                if self.USER_INFO:
                    print("************************ NEW WALK MADE! **************************************************")
                    print(f"Desired Dist (min)   : {self.desired_distance}, min: {min_dist}, max: {max_dist}")
                    print(f"Current Dist (min)   : {cur_dist}")
                    print(f"Current Walk (nodes) : {self.current_loop}")
                    print(f"Number of nodes (#)  : {self.n_nodes}")
                    print(f"Number of inodes (#) : {self.n_inodes}")
                    print(f"% used inodes        : {self.ratio_inodes}")
                    print(f"% inodes in walk     : {self.ratio_inodes_nodes}")
                    print(f"Detailed route       : {self.route}")
                    print(f"reward: {env.reward}, total_reward: {self.total_reward}")
                    print(f"Actions by agent     : {self.log_actions}")
                
                # when user wants to rate suggested walks
                
                if self.USER_RATING:
                    ox.plot_graph_route(self.G, self.route)
                    print("Please rate the walk by pressing <a> for reward or other for no reward: ")
                    #os.system("cls")
                    if keyboard.read_key() == 'a':
                        self.reward += 5
                        print("Additional reward!")
                    print("Thx for rating. Agent will continue...")

            else:
                
                # negative reward for planning too long walks
                
                if cur_dist > self.desired_distance:
                    self.reward = -1
                
                # reward for making the walk longer
                
                if cur_dist < self.desired_distance:
                    self.reward = 1
                    
                    # if the last node added is an inode then bonus
                    # to do: using a % and some tresholds to get extra rewards?
                    
                    if self.current_loop[-1] in self.candidate_inodes:
                        self.reward += 1

            self.total_reward += self.reward

        return self.current_state(), self.reward, done, truncated, {}
    
    
    def render(self, mode="human"):
        print("location: ", self.location)
        print("starting point: ", self.starting_point)
        print("candidate nodes", self.candidate_nodes)
        print("length candidate nodes", len(self.candidate_nodes))
        print("candidate inodes", self.candidate_inodes)
        print("length candidate inodes", len(self.candidate_inodes))
        print("====================================================")
        print("desired duration: ", self.desired_distance)
        print("current loop:", self.current_loop)
        print("====================================================")
        print("timestep: ", self.timestep)
        print("current loop:", self.current_loop)
        print("detailed route: ", self.route)
        print("current_state", self.current_state())
        print(f"reward: {env.reward}, total_reward: {self.total_reward}")
        print(f"actions by agent     : {self.log_actions}")
        
        return None
    
    
    

In [4]:
# TEST THE ENVIRONMENT BEFORE RUNNING AGENT

env = MyEnv()
env.reset()
env.render()
env.step(1)
env.render()
env.step(1)
env.render()
env.step(2)
env.render()
env.step(2)
env.render()
env.step(0)
env.render()


location:  Onafhankelijkheidsstraat 42, Edegem
starting point:  36242661
candidate nodes [7520932868, 2797280265, 7520932875, 295899149, 7520932877, 5206248463, 5206248468, 6239396885, 6345026583, 5206248474, 5206248475, 5206248483, 5206248484, 180166695, 450940973, 5206248494, 8622275641, 5206248508, 5206248509, 5206248517, 5206248518, 5206248519, 8622275657, 8622275662, 8622275664, 8622275666, 8622275668, 8622275669, 8622275670, 8622275671, 8622275672, 8622275681, 8622275682, 8622275683, 8622275684, 8622275685, 8622275686, 60056679, 60056680, 8622275689, 8622281920, 8560427115, 8622275691, 8622275693, 8622275694, 8622275695, 8622275692, 8622275696, 8622275697, 8622275698, 8622275699, 8622275700, 8622275702, 8622275703, 8622275704, 8622275705, 8622275701, 8622275706, 8622275707, 8622275708, 8622275710, 8622275709, 8622275711, 8622275712, 8622275714, 8622275713, 8622275715, 8622275716, 5801761933, 5801761936, 180168857, 5206248621, 247277743, 5206248623, 5206248625, 5206248635, 5206248

In [5]:
# check action space and obs space

print(env.action_space)
print(env.observation_space)

Discrete(3)
Box(-inf, inf, (8,), float32)


In [6]:
# RUNNING AGENT
agent = sb3.PPO('MlpPolicy', env=env, tensorboard_log="\LOG", verbose=1)
agent.learn(total_timesteps=60_000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to \LOG\PPO_14
************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 34.68449359694337, min: 34.33764866097393, max: 35.031338532912805
Current Dist (min)   : 34.469376000000004
Current Walk (nodes) : [36242661, 5128521073, 8622275705, 451915858, 8706816413, 36508334, 36508334]
Number of nodes (#)  : 8
Number of inodes (#) : 3
% used inodes        : 0.18
% inodes in walk     : 0.38
Detailed route       : [36242661, 295900371, 36242669, 36242668, 36508334, 5128521073, 36508334, 36242668, 36242669, 36242667, 60056679, 36242662, 36242682, 36242679, 36242680, 8622275662, 8621999376, 8622275657, 8622275691, 8622275692, 8622275694, 8622275697, 8622275700, 8622275707, 8622275708, 8622275705, 8622275708, 8622275707, 8622275700, 8622275697, 8622275694, 8622275692, 8622275691, 8622275657, 8621999376, 8622275662, 36242680, 36242679, 3624

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 46.110369360771784, min: 45.649265667164066, max: 46.5714730543795
Current Dist (min)   : 45.696600000000004
Current Walk (nodes) : [36242661, 4935936620, 8622281921, 450933176, 436368334, 2572086168, 2572086168]
Number of nodes (#)  : 6
Number of inodes (#) : 2
% used inodes        : 0.12
% inodes in walk     : 0.33
Detailed route       : [36242661, 295900371, 36242669, 36242668, 36508334, 451908623, 451915858, 451915971, 451915973, 451914977, 8560427115, 8560449918, 8534199881, 8534199896, 8534199887, 8534199909, 34564928, 4935936620, 34564928, 8534199909, 5128552424, 8534199871, 1592630249, 8534199865, 36508336, 295899971, 295899959, 36242667, 60056679, 36242662, 36242682, 36242679, 36242680, 8622275662, 8622275641, 8622275702, 8622281928, 8622281918, 8622281921, 8622281918, 8622281928, 8622275702, 8622275641, 8622275662, 8621999376, 8622275671, 8622275669, 8622275685, 8

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 9.799392360508712, min: 9.701398436903625, max: 9.897386284113798
Current Dist (min)   : 9.829367999999999
Current Walk (nodes) : [36242661]
Number of nodes (#)  : 2
Number of inodes (#) : 1
% used inodes        : 0.06
% inodes in walk     : 0.5
Detailed route       : [36242661, 295900371, 36242669, 36242668, 36508334, 451908623, 451915681, 451914645, 36508336, 8534199865, 36508336, 451914645, 451915681, 451908623, 36508334, 36242668, 36242669, 295900371, 36242661]
reward: 10, total_reward: 55
Actions by agent     : ['A0', 'A1', 'A0', 'A1', 'A2', 'A0', 'A0', 'A1', 'A0', 'A2', 'A0', 'A2', 'A0', 'A0', 'A0', 'A1', 'A0', 'A1', 'A0', 'A1', 'A0', 'A0', 'A2', 'A2', 'A0', 'A0', 'A0', 'A1', 'A2', 'A0', 'A1', 'A2', 'A0', 'A2', 'A2', 'A0', 'A1', 'A0', 'A1', 'A0', 'A0', 'A0', 'A2', 'A0', 'A0', 'A0', 'A2', 'A0', 'A0', 'A1', 'A0', 'A0', 'A1', 'A0', 'A0', 'A0', 'A0', 'A2', 'A0', 'A2', 'A2

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 15.623109088718426, min: 15.466877997831242, max: 15.77934017960561
Current Dist (min)   : 15.703800000000001
Current Walk (nodes) : [36242661]
Number of nodes (#)  : 2
Number of inodes (#) : 0
% used inodes        : 0.0
% inodes in walk     : 0.0
Detailed route       : [36242661, 8573220359, 295900706, 6328539741, 36242659, 36242656, 800044371, 36242653, 36242654, 36242655, 2572086150, 36242655, 36242654, 36242653, 800044371, 36242656, 36242659, 6328539741, 295900706, 8573220359, 36242661]
reward: 10, total_reward: 53
Actions by agent     : ['A0', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A0', 'A2', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A0', 'A0', 'A1', 'A0', 'A1', 'A0', 'A0', 'A2', 'A0', 'A0', 'A2', 'A1', 'A0', 'A0', 'A1', 'A0', 'A2', 'A2', 'A0', 'A0', 'A2', 'A0', 'A1', 'A0', 'A1', 'A0', 'A2', 'A1', 'A1', 'A0', 'A0', '

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 24.525332307210704, min: 24.280078984138598, max: 24.77058563028281
Current Dist (min)   : 24.667752000000004
Current Walk (nodes) : [36242661, 8706816414]
Number of nodes (#)  : 3
Number of inodes (#) : 0
% used inodes        : 0.0
% inodes in walk     : 0.0
Detailed route       : [36242661, 295900371, 36242669, 36242668, 36508334, 451908623, 451915858, 451915971, 451915973, 451914977, 8560427115, 8560449918, 8534199881, 8534199896, 8534199887, 8534199909, 34564928, 4935936620, 5128551738, 8706816413, 8706816414, 8706816413, 5128551738, 4935936620, 34564928, 8534199909, 8534199887, 8534199896, 8534199881, 8560449918, 8560427115, 451914977, 451915973, 451915971, 451915858, 451908623, 36508334, 450940987, 450940973, 34094859, 6328539753, 36242670, 36242658, 308580750, 81042780, 5206248644, 5206248668, 5206248644, 81042780, 308580750, 36242658, 36242659, 6328539741, 295900706

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 11.419050536085816, min: 11.304860030724958, max: 11.533241041446674
Current Dist (min)   : 11.482536
Current Walk (nodes) : [36242661]
Number of nodes (#)  : 2
Number of inodes (#) : 0
% used inodes        : 0.0
% inodes in walk     : 0.0
Detailed route       : [36242661, 8573220359, 295900706, 6328539741, 36242659, 36242658, 308580750, 81042780, 5206248644, 5206248668, 5206248637, 5206248635, 5206248637, 5206248668, 5206248644, 81042780, 308580750, 36242658, 36242659, 6328539741, 295900706, 8573220359, 36242661]
reward: 10, total_reward: 63
Actions by agent     : ['A1', 'A0', 'A1', 'A0', 'A2', 'A0', 'A1', 'A0', 'A2', 'A0', 'A2', 'A0', 'A1', 'A0', 'A2', 'A0', 'A1', 'A0', 'A1', 'A0', 'A2', 'A0', 'A1', 'A0', 'A1', 'A0', 'A0', 'A2', 'A0', 'A1', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A1', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A1', 'A0', 'A2', 'A0', 'A0', 'A0', 'A1', '

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 16.748875626316174, min: 16.581386870053013, max: 16.916364382579335
Current Dist (min)   : 16.770432
Current Walk (nodes) : [36242661]
Number of nodes (#)  : 2
Number of inodes (#) : 0
% used inodes        : 0.0
% inodes in walk     : 0.0
Detailed route       : [36242661, 295900371, 36242669, 36242668, 36508334, 451908623, 451915858, 451915971, 451916271, 451917250, 451917249, 7520932877, 295899723, 5206243901, 295899723, 7520932877, 451917249, 451917250, 451916271, 451915971, 451915858, 451908623, 36508334, 36242668, 36242669, 295900371, 36242661]
reward: 10, total_reward: 230
Actions by agent     : ['A2', 'A0', 'A2', 'A2', 'A0', 'A2', 'A2', 'A1', 'A0', 'A0', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A2', 'A0', 'A0', 'A2', 'A2', 'A0', 'A2', 'A2', 'A0', 'A2', 'A0', 'A0', 'A0', 'A2', 'A0', 'A2', 'A2', 'A0', 'A2', 'A0', 'A2', 'A2', 'A0', 'A0', 'A0', 'A0', 'A2', 'A0', 'A0', 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 197        |
|    ep_rew_mean          | 161        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 11         |
|    time_elapsed         | 388        |
|    total_timesteps      | 22528      |
| train/                  |            |
|    approx_kl            | 0.00986148 |
|    clip_fraction        | 0.0535     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.807     |
|    explained_variance   | 0.236      |
|    learning_rate        | 0.0003     |
|    loss                 | 65.4       |
|    n_updates            | 100        |
|    policy_gradient_loss | -0.00705   |
|    value_loss           | 143        |
----------------------------------------
************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 22.225534514358834, min: 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 197         |
|    ep_rew_mean          | 204         |
| time/                   |             |
|    fps                  | 62          |
|    iterations           | 12          |
|    time_elapsed         | 395         |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.010021635 |
|    clip_fraction        | 0.129       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.768      |
|    explained_variance   | 0.0129      |
|    learning_rate        | 0.0003      |
|    loss                 | 66.5        |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.0125     |
|    value_loss           | 162         |
-----------------------------------------
************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 32.0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 202         |
|    ep_rew_mean          | 274         |
| time/                   |             |
|    fps                  | 69          |
|    iterations           | 14          |
|    time_elapsed         | 411         |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.006857304 |
|    clip_fraction        | 0.0711      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.65       |
|    explained_variance   | 0.272       |
|    learning_rate        | 0.0003      |
|    loss                 | 55.9        |
|    n_updates            | 130         |
|    policy_gradient_loss | -0.00507    |
|    value_loss           | 147         |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 212     

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 22.573254886532432, min: 22.347522337667108, max: 22.798987435397756
Current Dist (min)   : 22.645836000000003
Current Walk (nodes) : [36242661, 450940987, 451908623, 451908623]
Number of nodes (#)  : 5
Number of inodes (#) : 3
% used inodes        : 0.18
% inodes in walk     : 0.6
Detailed route       : [36242661, 295900371, 36242669, 36242668, 450940987, 36508334, 451908623, 451915681, 451914645, 36508336, 295899971, 36508234, 30349180, 5128548965, 5128548962, 30349181, 436368334, 436368333, 34564878, 8621999373, 450933176, 30349183, 30349184, 8489995739, 8489995729, 8489963212, 8489963209, 8489963212, 8489995729, 8489995739, 30349184, 30349183, 450933176, 8621999373, 34564878, 10684421728, 10684421726, 10684421750, 36242679, 36242682, 36242662, 60056680, 36242661]
reward: 10, total_reward: 337
Actions by agent     : ['A2', 'A0', 'A2', 'A2', 'A2', 'A0', 'A0', 'A2', 'A2', 

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 6.489657866137223, min: 6.42476128747585, max: 6.554554444798595
Current Dist (min)   : 6.4530720000000015
Current Walk (nodes) : [36242661]
Number of nodes (#)  : 2
Number of inodes (#) : 1
% used inodes        : 0.06
% inodes in walk     : 0.5
Detailed route       : [36242661, 295900371, 36242669, 36242668, 36508334, 451908623, 451915858, 451908623, 36508334, 36242668, 36242669, 295900371, 36242661]
reward: 10, total_reward: 96
Actions by agent     : ['A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A0', 'A2', 'A2', 'A0', 'A0', 'A2', 'A0', 'A0', 'A2', 'A0', 'A2', 'A2', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A1', 'A0', 'A2', 'A0', 'A2', 'A

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 22.906507529668108, min: 22.677442454371427, max: 23.13557260496479
Current Dist (min)   : 22.892280000000003
Current Walk (nodes) : [36242661, 36242668, 451914645, 451915858]
Number of nodes (#)  : 5
Number of inodes (#) : 4
% used inodes        : 0.24
% inodes in walk     : 0.8
Detailed route       : [36242661, 295900371, 36242669, 36242668, 36508334, 451908623, 451915681, 451914645, 451915858, 451914645, 36508336, 295899971, 36508234, 30349180, 5128548965, 5128548962, 30349181, 436368334, 436368333, 34564878, 8621999373, 450933176, 30349183, 30349184, 8489995739, 8489995729, 8489963212, 8489963209, 8489963212, 8489995729, 8489995739, 30349184, 30349183, 450933176, 8621999373, 34564878, 10684421728, 10684421726, 10684421750, 36242679, 36242682, 36242662, 60056680, 36242661]
reward: 10, total_reward: 316
Actions by agent     : ['A2', 'A0', 'A2', 'A2', 'A0', 'A2', 'A0', 'A2

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 219         |
|    ep_rew_mean          | 380         |
| time/                   |             |
|    fps                  | 106         |
|    iterations           | 24          |
|    time_elapsed         | 462         |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.008686172 |
|    clip_fraction        | 0.0825      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.407      |
|    explained_variance   | 0.412       |
|    learning_rate        | 0.0003      |
|    loss                 | 219         |
|    n_updates            | 230         |
|    policy_gradient_loss | -0.00674    |
|    value_loss           | 149         |
-----------------------------------------
************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 11.5

************************ NEW WALK MADE! **************************************************
Desired Dist (min)   : 21.27507193284081, min: 21.0623212135124, max: 21.487822652169218
Current Dist (min)   : 21.463007999999995
Current Walk (nodes) : [36242661, 36242668, 451915858]
Number of nodes (#)  : 4
Number of inodes (#) : 3
% used inodes        : 0.18
% inodes in walk     : 0.75
Detailed route       : [36242661, 295900371, 36242669, 36242668, 36508334, 451908623, 451915858, 451908623, 36508334, 450940987, 450940973, 34094859, 6328539753, 36242670, 36242658, 36242657, 180170248, 180168857, 81026594, 180168857, 180170248, 36242657, 36242656, 36242659, 6328539741, 295900706, 8573220359, 36242661]
reward: 10, total_reward: 91
Actions by agent     : ['A2', 'A0', 'A2', 'A2', 'A0', 'A0', 'A2', 'A2', 'A2', 'A0', 'A0', 'A2', 'A2', 'A0', 'A0', 'A2', 'A0', 'A2', 'A2', 'A0', 'A2', 'A0', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A0', 'A2', 'A2', 'A0', 'A2', 'A0', 'A2', 'A2', 'A0', 'A2', 'A0', 'A0', 'A2

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 210          |
|    ep_rew_mean          | 381          |
| time/                   |              |
|    fps                  | 123          |
|    iterations           | 29           |
|    time_elapsed         | 480          |
|    total_timesteps      | 59392        |
| train/                  |              |
|    approx_kl            | 0.0022621143 |
|    clip_fraction        | 0.0289       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.431       |
|    explained_variance   | 0.713        |
|    learning_rate        | 0.0003       |
|    loss                 | 20.9         |
|    n_updates            | 280          |
|    policy_gradient_loss | -0.000724    |
|    value_loss           | 85.3         |
------------------------------------------
************************ NEW WALK MADE! **************************************************
Desire

<stable_baselines3.ppo.ppo.PPO at 0x24297c41f90>

In [8]:
# save model and close env

agent.save("WalksPPO")
env.close()

In [None]:
# End