## 1. Import Dependencies

In [3]:
import gym 
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete 
import numpy as np
import random
import os
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3 import PPO

## 2. Building an Environment

In [12]:
import numpy as np
from gym import Env
from gym.spaces import Discrete, Box

class Hot_or_Cold_Env(Env):
    def __init__(self):
        # Actions we can take: left, right, forward, backward
        self.action_space = Discrete(4)
        
        # Observation space: 100x100 grid
        self.observation_space = Box(low=0, high=99, shape=(2,), dtype=np.int32)
       
        # Randomly initialize the agent's starting position within the 100x100 grid
        self.state = np.array([np.random.randint(0, 100), np.random.randint(0, 100)])
        
        # Set the target position randomly
        self.target = np.array([np.random.randint(0, 100), np.random.randint(0, 100)])
        
        # Ensure the target is not in the same position as the agent
        while np.array_equal(self.state, self.target):
            self.target = np.array([np.random.randint(0, 100), np.random.randint(0, 100)])

        # Set the period duration (in seconds)
        self.period_duration = 60 
        
        # Initialize step count
        self.steps_taken = 0
        
        # Initialize previous distance
        self.previous_distance_to_target = np.inf
        
    def step(self, action):
        # Apply action
        if action == 0:  # backward
            self.state[1] = max(0, self.state[1] - 1)  # y ekseninde -1, alt sınır
        elif action == 1:  # right
            self.state[0] = min(99, self.state[0] + 1)  # x ekseninde +1, üst sınır
        elif action == 2:  # forward
            self.state[1] = min(99, self.state[1] + 1)  # y ekseninde +1, üst sınır
        elif action == 3:  # left
            self.state[0] = max(0, self.state[0] - 1)  # x ekseninde -1, alt sınır
        
        # Reduce game length by 1 second
        self.period_duration -= 1 
        
        # Increment step count
        self.steps_taken += 1
        
        # Calculate the reward
        reward = -0.1  # Small negative reward for each step taken
        
        # Calculate distance to the target
        distance_to_target = np.linalg.norm(self.state - self.target)
        
        # Check if the agent reached the target
        if np.array_equal(self.state, self.target):
            reward += 100  # Large positive reward for reaching the target
            done = True  # End the episode
        else:
            done = False  # Not done yet
            
            # Update previous distance
            if self.steps_taken > 1:  # Skip this check for the first step
                if distance_to_target < self.previous_distance_to_target:
                    reward += 1  # Positive reward for getting closer
                else:
                    reward -= 0.5  # Negative reward for getting further away
            
        # Update previous distance
        self.previous_distance_to_target = distance_to_target  
        
        # Check if the game is done
        if self.period_duration <= 0:
            done = True
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def reset(self):
        # Reset the environment
        self.state = np.array([np.random.randint(0, 100), np.random.randint(0, 100)])
        self.target = np.array([np.random.randint(0, 100), np.random.randint(0, 100)])  # New target
        
        # Ensure the target is not in the same position as the agent
        while np.array_equal(self.state, self.target):
            self.target = np.array([np.random.randint(0, 100), np.random.randint(0, 100)])

        self.period_duration = 60  # Reset period duration
        self.steps_taken = 0  # Reset step count
        self.previous_distance_to_target = np.inf  # Initialize previous distance
        return self.state

## 3.Test Environment

In [None]:
# The Hot_or_Cold_Env class definition should be placed here.

# Test function for the Hot or Cold environment
def test_hot_or_cold_env():
    env = Hot_or_Cold_Env()  # Create an instance of the environment
    state = env.reset()  # Reset the environment to get the initial state
    
    print("Initial State:", state)
    print("Target:", env.target)

    total_reward = 0  # Variable to track the total reward
    for step in range(10):  # Perform a maximum of 10 steps
        action = np.random.choice(4)  # Randomly select an action
        next_state, reward, done, info = env.step(action)  # Take the action and get the next state and reward

        # Update the total reward
        total_reward += reward

        # Print the current state and reward information
        print(f"Step {step + 1}:")
        print(f"  Action: {action}")
        print(f"  New State: {next_state}")
        print(f"  Reward: {reward}")
        print(f"  Total Reward: {total_reward}")
        print(f"  Distance to Target: {np.linalg.norm(next_state - env.target)}")
        
        if done:  # Check if the episode is done
            print("Agent reached the target or time has run out.")
            break  # Exit the loop if done

# Run the test
test_hot_or_cold_env()

## 4.Model Training with PPO / MlPolicy

In [13]:
log_path = os.path.join('Training', 'Logs')

In [14]:
# Create the environment
env = Hot_or_Cold_Env()

In [27]:
# Create the PPO model
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [16]:
# Train the model
model.learn(total_timesteps=1000000)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 58.9     |
|    ep_rew_mean     | 10.9     |
| time/              |          |
|    fps             | 7768     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.5        |
|    ep_rew_mean          | 10.6        |
| time/                   |             |
|    fps                  | 5661        |
|    iterations           | 2           |
|    time_elapsed         | 0           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.017111693 |
|    clip_fraction        | 0.267       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | -0.00584    |
|    learning_rate        | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 14.3        |
| time/                   |             |
|    fps                  | 4640        |
|    iterations           | 11          |
|    time_elapsed         | 4           |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.010049952 |
|    clip_fraction        | 0.0548      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.23       |
|    explained_variance   | 0.314       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.49        |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.00538    |
|    value_loss           | 15.7        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 18.2         |
| time/                   |              |
|    fps                  | 4563         |
|    iterations           | 21           |
|    time_elapsed         | 9            |
|    total_timesteps      | 43008        |
| train/                  |              |
|    approx_kl            | 0.0058709756 |
|    clip_fraction        | 0.0646       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.21        |
|    explained_variance   | 0.675        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.81         |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00408     |
|    value_loss           | 13.4         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 18.1         |
| time/                   |              |
|    fps                  | 4515         |
|    iterations           | 31           |
|    time_elapsed         | 14           |
|    total_timesteps      | 63488        |
| train/                  |              |
|    approx_kl            | 0.0064889845 |
|    clip_fraction        | 0.069        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.16        |
|    explained_variance   | 0.625        |
|    learning_rate        | 0.0003       |
|    loss                 | 3.4          |
|    n_updates            | 300          |
|    policy_gradient_loss | -0.00579     |
|    value_loss           | 12.2         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.9        |
|    ep_rew_mean          | 17.7        |
| time/                   |             |
|    fps                  | 4474        |
|    iterations           | 41          |
|    time_elapsed         | 18          |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.008330135 |
|    clip_fraction        | 0.148       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.16       |
|    explained_variance   | 0.65        |
|    learning_rate        | 0.0003      |
|    loss                 | 6.23        |
|    n_updates            | 400         |
|    policy_gradient_loss | -0.00281    |
|    value_loss           | 12.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.9  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 19.8         |
| time/                   |              |
|    fps                  | 4437         |
|    iterations           | 51           |
|    time_elapsed         | 23           |
|    total_timesteps      | 104448       |
| train/                  |              |
|    approx_kl            | 0.0077857487 |
|    clip_fraction        | 0.0953       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.01        |
|    explained_variance   | 0.626        |
|    learning_rate        | 0.0003       |
|    loss                 | 3.09         |
|    n_updates            | 500          |
|    policy_gradient_loss | -0.00563     |
|    value_loss           | 10.6         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.4        |
| time/                   |             |
|    fps                  | 4405        |
|    iterations           | 61          |
|    time_elapsed         | 28          |
|    total_timesteps      | 124928      |
| train/                  |             |
|    approx_kl            | 0.010457354 |
|    clip_fraction        | 0.137       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.06       |
|    explained_variance   | 0.321       |
|    learning_rate        | 0.0003      |
|    loss                 | 9.03        |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.00613    |
|    value_loss           | 18          |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 21.3        |
| time/                   |             |
|    fps                  | 4381        |
|    iterations           | 71          |
|    time_elapsed         | 33          |
|    total_timesteps      | 145408      |
| train/                  |             |
|    approx_kl            | 0.010811519 |
|    clip_fraction        | 0.0887      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.05       |
|    explained_variance   | 0.626       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.86        |
|    n_updates            | 700         |
|    policy_gradient_loss | 9.78e-05    |
|    value_loss           | 14.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 19.2        |
| time/                   |             |
|    fps                  | 4361        |
|    iterations           | 81          |
|    time_elapsed         | 38          |
|    total_timesteps      | 165888      |
| train/                  |             |
|    approx_kl            | 0.005548371 |
|    clip_fraction        | 0.0965      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.989      |
|    explained_variance   | 0.212       |
|    learning_rate        | 0.0003      |
|    loss                 | 18.1        |
|    n_updates            | 800         |
|    policy_gradient_loss | -0.00196    |
|    value_loss           | 48.9        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.4        |
| time/                   |             |
|    fps                  | 4343        |
|    iterations           | 91          |
|    time_elapsed         | 42          |
|    total_timesteps      | 186368      |
| train/                  |             |
|    approx_kl            | 0.018495984 |
|    clip_fraction        | 0.144       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.04       |
|    explained_variance   | 0.291       |
|    learning_rate        | 0.0003      |
|    loss                 | 9.99        |
|    n_updates            | 900         |
|    policy_gradient_loss | -0.00411    |
|    value_loss           | 19          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.5  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.6        |
| time/                   |             |
|    fps                  | 4326        |
|    iterations           | 101         |
|    time_elapsed         | 47          |
|    total_timesteps      | 206848      |
| train/                  |             |
|    approx_kl            | 0.009233354 |
|    clip_fraction        | 0.0852      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.995      |
|    explained_variance   | 0.526       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.05        |
|    n_updates            | 1000        |
|    policy_gradient_loss | -0.00373    |
|    value_loss           | 15.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.4        |
|    ep_rew_mean          | 20.2        |
| time/                   |             |
|    fps                  | 4313        |
|    iterations           | 111         |
|    time_elapsed         | 52          |
|    total_timesteps      | 227328      |
| train/                  |             |
|    approx_kl            | 0.008664322 |
|    clip_fraction        | 0.175       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.1        |
|    explained_variance   | 0.539       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.98        |
|    n_updates            | 1100        |
|    policy_gradient_loss | -0.00616    |
|    value_loss           | 17.5        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 59.4    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.1        |
| time/                   |             |
|    fps                  | 4301        |
|    iterations           | 121         |
|    time_elapsed         | 57          |
|    total_timesteps      | 247808      |
| train/                  |             |
|    approx_kl            | 0.011019772 |
|    clip_fraction        | 0.115       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.994      |
|    explained_variance   | 0.514       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.6        |
|    n_updates            | 1200        |
|    policy_gradient_loss | -0.0054     |
|    value_loss           | 18.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 21.2         |
| time/                   |              |
|    fps                  | 4288         |
|    iterations           | 131          |
|    time_elapsed         | 62           |
|    total_timesteps      | 268288       |
| train/                  |              |
|    approx_kl            | 0.0080750035 |
|    clip_fraction        | 0.105        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.06        |
|    explained_variance   | 0.366        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.39         |
|    n_updates            | 1300         |
|    policy_gradient_loss | -0.00316     |
|    value_loss           | 14           |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 20.3        |
| time/                   |             |
|    fps                  | 4279        |
|    iterations           | 141         |
|    time_elapsed         | 67          |
|    total_timesteps      | 288768      |
| train/                  |             |
|    approx_kl            | 0.007612466 |
|    clip_fraction        | 0.0879      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.999      |
|    explained_variance   | 0.614       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.02        |
|    n_updates            | 1400        |
|    policy_gradient_loss | -0.00445    |
|    value_loss           | 13.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 19.3        |
| time/                   |             |
|    fps                  | 4270        |
|    iterations           | 151         |
|    time_elapsed         | 72          |
|    total_timesteps      | 309248      |
| train/                  |             |
|    approx_kl            | 0.011101801 |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.08       |
|    explained_variance   | 0.744       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.98        |
|    n_updates            | 1500        |
|    policy_gradient_loss | -0.00507    |
|    value_loss           | 11.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 19.2        |
| time/                   |             |
|    fps                  | 4256        |
|    iterations           | 161         |
|    time_elapsed         | 77          |
|    total_timesteps      | 329728      |
| train/                  |             |
|    approx_kl            | 0.008171749 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.994      |
|    explained_variance   | 0.62        |
|    learning_rate        | 0.0003      |
|    loss                 | 7.09        |
|    n_updates            | 1600        |
|    policy_gradient_loss | -0.00209    |
|    value_loss           | 15.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.4        |
|    ep_rew_mean          | 21.2        |
| time/                   |             |
|    fps                  | 4242        |
|    iterations           | 171         |
|    time_elapsed         | 82          |
|    total_timesteps      | 350208      |
| train/                  |             |
|    approx_kl            | 0.017664805 |
|    clip_fraction        | 0.134       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.91       |
|    explained_variance   | 0.462       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.28        |
|    n_updates            | 1700        |
|    policy_gradient_loss | -0.00339    |
|    value_loss           | 21.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.4  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.5        |
| time/                   |             |
|    fps                  | 4236        |
|    iterations           | 181         |
|    time_elapsed         | 87          |
|    total_timesteps      | 370688      |
| train/                  |             |
|    approx_kl            | 0.008289275 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.942      |
|    explained_variance   | 0.712       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.43        |
|    n_updates            | 1800        |
|    policy_gradient_loss | 2.44e-06    |
|    value_loss           | 11.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 18           |
| time/                   |              |
|    fps                  | 4227         |
|    iterations           | 191          |
|    time_elapsed         | 92           |
|    total_timesteps      | 391168       |
| train/                  |              |
|    approx_kl            | 0.0068010627 |
|    clip_fraction        | 0.0835       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.915       |
|    explained_variance   | 0.553        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.9         |
|    n_updates            | 1900         |
|    policy_gradient_loss | -0.00288     |
|    value_loss           | 19.1         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 17           |
| time/                   |              |
|    fps                  | 4216         |
|    iterations           | 201          |
|    time_elapsed         | 97           |
|    total_timesteps      | 411648       |
| train/                  |              |
|    approx_kl            | 0.0068379827 |
|    clip_fraction        | 0.0766       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.998       |
|    explained_variance   | 0.467        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.8         |
|    n_updates            | 2000         |
|    policy_gradient_loss | -0.00277     |
|    value_loss           | 19.8         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 23.1        |
| time/                   |             |
|    fps                  | 4204        |
|    iterations           | 211         |
|    time_elapsed         | 102         |
|    total_timesteps      | 432128      |
| train/                  |             |
|    approx_kl            | 0.012325495 |
|    clip_fraction        | 0.156       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.955      |
|    explained_variance   | 0.579       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.44        |
|    n_updates            | 2100        |
|    policy_gradient_loss | -0.00879    |
|    value_loss           | 16.9        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 18.3         |
| time/                   |              |
|    fps                  | 4192         |
|    iterations           | 221          |
|    time_elapsed         | 107          |
|    total_timesteps      | 452608       |
| train/                  |              |
|    approx_kl            | 0.0057269097 |
|    clip_fraction        | 0.136        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1           |
|    explained_variance   | 0.577        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.46         |
|    n_updates            | 2200         |
|    policy_gradient_loss | 0.000504     |
|    value_loss           | 14           |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.3        |
| time/                   |             |
|    fps                  | 4181        |
|    iterations           | 231         |
|    time_elapsed         | 113         |
|    total_timesteps      | 473088      |
| train/                  |             |
|    approx_kl            | 0.011679988 |
|    clip_fraction        | 0.149       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.974      |
|    explained_variance   | 0.582       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.83        |
|    n_updates            | 2300        |
|    policy_gradient_loss | -0.00891    |
|    value_loss           | 15.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 21.1         |
| time/                   |              |
|    fps                  | 4175         |
|    iterations           | 241          |
|    time_elapsed         | 118          |
|    total_timesteps      | 493568       |
| train/                  |              |
|    approx_kl            | 0.0039806142 |
|    clip_fraction        | 0.113        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.99        |
|    explained_variance   | 0.21         |
|    learning_rate        | 0.0003       |
|    loss                 | 11.9         |
|    n_updates            | 2400         |
|    policy_gradient_loss | -0.00172     |
|    value_loss           | 57.1         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 17.5        |
| time/                   |             |
|    fps                  | 4171        |
|    iterations           | 251         |
|    time_elapsed         | 123         |
|    total_timesteps      | 514048      |
| train/                  |             |
|    approx_kl            | 0.008805963 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.996      |
|    explained_variance   | 0.556       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.59        |
|    n_updates            | 2500        |
|    policy_gradient_loss | -0.00372    |
|    value_loss           | 17.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 20.4        |
| time/                   |             |
|    fps                  | 4161        |
|    iterations           | 261         |
|    time_elapsed         | 128         |
|    total_timesteps      | 534528      |
| train/                  |             |
|    approx_kl            | 0.009025125 |
|    clip_fraction        | 0.105       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.91       |
|    explained_variance   | 0.558       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.18        |
|    n_updates            | 2600        |
|    policy_gradient_loss | -0.00337    |
|    value_loss           | 17.7        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 60         |
|    ep_rew_mean          | 18.6       |
| time/                   |            |
|    fps                  | 4151       |
|    iterations           | 271        |
|    time_elapsed         | 133        |
|    total_timesteps      | 555008     |
| train/                  |            |
|    approx_kl            | 0.01855236 |
|    clip_fraction        | 0.146      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.9       |
|    explained_variance   | 0.569      |
|    learning_rate        | 0.0003     |
|    loss                 | 6.66       |
|    n_updates            | 2700       |
|    policy_gradient_loss | -0.00411   |
|    value_loss           | 14.2       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 59.9       |
|    ep_rew_mean

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 18.8         |
| time/                   |              |
|    fps                  | 4146         |
|    iterations           | 281          |
|    time_elapsed         | 138          |
|    total_timesteps      | 575488       |
| train/                  |              |
|    approx_kl            | 0.0071772616 |
|    clip_fraction        | 0.0651       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.819       |
|    explained_variance   | 0.407        |
|    learning_rate        | 0.0003       |
|    loss                 | 11.8         |
|    n_updates            | 2800         |
|    policy_gradient_loss | -0.00181     |
|    value_loss           | 25.3         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.8        |
|    ep_rew_mean          | 16.7        |
| time/                   |             |
|    fps                  | 4143        |
|    iterations           | 291         |
|    time_elapsed         | 143         |
|    total_timesteps      | 595968      |
| train/                  |             |
|    approx_kl            | 0.015240723 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.897      |
|    explained_variance   | 0.432       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.2        |
|    n_updates            | 2900        |
|    policy_gradient_loss | -0.00454    |
|    value_loss           | 23.2        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 59.8    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.8        |
| time/                   |             |
|    fps                  | 4140        |
|    iterations           | 301         |
|    time_elapsed         | 148         |
|    total_timesteps      | 616448      |
| train/                  |             |
|    approx_kl            | 0.011259743 |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.809      |
|    explained_variance   | 0.321       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.9        |
|    n_updates            | 3000        |
|    policy_gradient_loss | -0.00773    |
|    value_loss           | 27          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.9        |
|    ep_rew_mean          | 16          |
| time/                   |             |
|    fps                  | 4138        |
|    iterations           | 311         |
|    time_elapsed         | 153         |
|    total_timesteps      | 636928      |
| train/                  |             |
|    approx_kl            | 0.008492393 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.826      |
|    explained_variance   | 0.443       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.19        |
|    n_updates            | 3100        |
|    policy_gradient_loss | -0.00279    |
|    value_loss           | 23.2        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 59.9

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.1        |
| time/                   |             |
|    fps                  | 4133        |
|    iterations           | 321         |
|    time_elapsed         | 159         |
|    total_timesteps      | 657408      |
| train/                  |             |
|    approx_kl            | 0.006338096 |
|    clip_fraction        | 0.106       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.809      |
|    explained_variance   | 0.505       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.7        |
|    n_updates            | 3200        |
|    policy_gradient_loss | -0.0064     |
|    value_loss           | 24.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 18.3         |
| time/                   |              |
|    fps                  | 4128         |
|    iterations           | 331          |
|    time_elapsed         | 164          |
|    total_timesteps      | 677888       |
| train/                  |              |
|    approx_kl            | 0.0072658933 |
|    clip_fraction        | 0.109        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.917       |
|    explained_variance   | 0.668        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.9          |
|    n_updates            | 3300         |
|    policy_gradient_loss | 0.000721     |
|    value_loss           | 11.2         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 19.7        |
| time/                   |             |
|    fps                  | 4124        |
|    iterations           | 341         |
|    time_elapsed         | 169         |
|    total_timesteps      | 698368      |
| train/                  |             |
|    approx_kl            | 0.011425358 |
|    clip_fraction        | 0.13        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.886      |
|    explained_variance   | 0.624       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.79        |
|    n_updates            | 3400        |
|    policy_gradient_loss | -0.00438    |
|    value_loss           | 16.4        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 60      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 19.6        |
| time/                   |             |
|    fps                  | 4120        |
|    iterations           | 351         |
|    time_elapsed         | 174         |
|    total_timesteps      | 718848      |
| train/                  |             |
|    approx_kl            | 0.014094022 |
|    clip_fraction        | 0.138       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.927      |
|    explained_variance   | 0.658       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.37        |
|    n_updates            | 3500        |
|    policy_gradient_loss | -0.00213    |
|    value_loss           | 12.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.9        |
| time/                   |             |
|    fps                  | 4116        |
|    iterations           | 361         |
|    time_elapsed         | 179         |
|    total_timesteps      | 739328      |
| train/                  |             |
|    approx_kl            | 0.008338102 |
|    clip_fraction        | 0.0865      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.882      |
|    explained_variance   | 0.147       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.55        |
|    n_updates            | 3600        |
|    policy_gradient_loss | -0.00139    |
|    value_loss           | 18.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 21.8        |
| time/                   |             |
|    fps                  | 4113        |
|    iterations           | 371         |
|    time_elapsed         | 184         |
|    total_timesteps      | 759808      |
| train/                  |             |
|    approx_kl            | 0.011380976 |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.777      |
|    explained_variance   | 0.768       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.6         |
|    n_updates            | 3700        |
|    policy_gradient_loss | -0.00303    |
|    value_loss           | 12.5        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 21.7        |
| time/                   |             |
|    fps                  | 4107        |
|    iterations           | 381         |
|    time_elapsed         | 189         |
|    total_timesteps      | 780288      |
| train/                  |             |
|    approx_kl            | 0.009199824 |
|    clip_fraction        | 0.0917      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.766      |
|    explained_variance   | 0.622       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.48        |
|    n_updates            | 3800        |
|    policy_gradient_loss | 0.000305    |
|    value_loss           | 22.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 16.8        |
| time/                   |             |
|    fps                  | 4099        |
|    iterations           | 391         |
|    time_elapsed         | 195         |
|    total_timesteps      | 800768      |
| train/                  |             |
|    approx_kl            | 0.015656872 |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.779      |
|    explained_variance   | 0.541       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.8        |
|    n_updates            | 3900        |
|    policy_gradient_loss | -0.00146    |
|    value_loss           | 27.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 60         |
|    ep_rew_mean          | 14.3       |
| time/                   |            |
|    fps                  | 4089       |
|    iterations           | 401        |
|    time_elapsed         | 200        |
|    total_timesteps      | 821248     |
| train/                  |            |
|    approx_kl            | 0.00846412 |
|    clip_fraction        | 0.155      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.737     |
|    explained_variance   | 0.366      |
|    learning_rate        | 0.0003     |
|    loss                 | 10.2       |
|    n_updates            | 4000       |
|    policy_gradient_loss | 0.00182    |
|    value_loss           | 23.4       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 19.1        |
| time/                   |             |
|    fps                  | 4076        |
|    iterations           | 411         |
|    time_elapsed         | 206         |
|    total_timesteps      | 841728      |
| train/                  |             |
|    approx_kl            | 0.008098305 |
|    clip_fraction        | 0.0956      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.814      |
|    explained_variance   | 0.645       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.31        |
|    n_updates            | 4100        |
|    policy_gradient_loss | -0.0051     |
|    value_loss           | 13.4        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 18.3         |
| time/                   |              |
|    fps                  | 4064         |
|    iterations           | 421          |
|    time_elapsed         | 212          |
|    total_timesteps      | 862208       |
| train/                  |              |
|    approx_kl            | 0.0074174628 |
|    clip_fraction        | 0.0792       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.74        |
|    explained_variance   | 0.477        |
|    learning_rate        | 0.0003       |
|    loss                 | 10.7         |
|    n_updates            | 4200         |
|    policy_gradient_loss | -0.00192     |
|    value_loss           | 24.3         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 20.1        |
| time/                   |             |
|    fps                  | 4047        |
|    iterations           | 431         |
|    time_elapsed         | 218         |
|    total_timesteps      | 882688      |
| train/                  |             |
|    approx_kl            | 0.008031955 |
|    clip_fraction        | 0.0993      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.725      |
|    explained_variance   | 0.419       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.76        |
|    n_updates            | 4300        |
|    policy_gradient_loss | -0.00118    |
|    value_loss           | 23.8        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 59.8

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 59.8       |
|    ep_rew_mean          | 20.9       |
| time/                   |            |
|    fps                  | 4028       |
|    iterations           | 441        |
|    time_elapsed         | 224        |
|    total_timesteps      | 903168     |
| train/                  |            |
|    approx_kl            | 0.01796164 |
|    clip_fraction        | 0.12       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.749     |
|    explained_variance   | 0.542      |
|    learning_rate        | 0.0003     |
|    loss                 | 8.71       |
|    n_updates            | 4400       |
|    policy_gradient_loss | -0.00342   |
|    value_loss           | 17.2       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.8        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 19.3        |
| time/                   |             |
|    fps                  | 4009        |
|    iterations           | 451         |
|    time_elapsed         | 230         |
|    total_timesteps      | 923648      |
| train/                  |             |
|    approx_kl            | 0.009155864 |
|    clip_fraction        | 0.0913      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.807      |
|    explained_variance   | 0.347       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.6        |
|    n_updates            | 4500        |
|    policy_gradient_loss | -0.00453    |
|    value_loss           | 24.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 59.5  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 59.6         |
|    ep_rew_mean          | 19.3         |
| time/                   |              |
|    fps                  | 3985         |
|    iterations           | 461          |
|    time_elapsed         | 236          |
|    total_timesteps      | 944128       |
| train/                  |              |
|    approx_kl            | 0.0059584053 |
|    clip_fraction        | 0.0752       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.712       |
|    explained_variance   | 0.637        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.97         |
|    n_updates            | 4600         |
|    policy_gradient_loss | -0.00252     |
|    value_loss           | 21.3         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | 18.7        |
| time/                   |             |
|    fps                  | 3963        |
|    iterations           | 471         |
|    time_elapsed         | 243         |
|    total_timesteps      | 964608      |
| train/                  |             |
|    approx_kl            | 0.011978266 |
|    clip_fraction        | 0.186       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.8        |
|    explained_variance   | 0.544       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.47        |
|    n_updates            | 4700        |
|    policy_gradient_loss | -0.0019     |
|    value_loss           | 14          |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 59.6

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 60           |
|    ep_rew_mean          | 18.2         |
| time/                   |              |
|    fps                  | 3941         |
|    iterations           | 481          |
|    time_elapsed         | 249          |
|    total_timesteps      | 985088       |
| train/                  |              |
|    approx_kl            | 0.0056273337 |
|    clip_fraction        | 0.0869       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.752       |
|    explained_variance   | 0.403        |
|    learning_rate        | 0.0003       |
|    loss                 | 11           |
|    n_updates            | 4800         |
|    policy_gradient_loss | 0.000103     |
|    value_loss           | 20.9         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

<stable_baselines3.ppo.ppo.PPO at 0x30f75ef90>

In [22]:
evaluate_policy(model, env, n_eval_episodes=1000, render=True)

(19.06729859870672, 23.313152556109294)

In [28]:
model.save(os.path.join('Saved Models', 'PPO_1MT'))

