In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)
    X = data.drop(columns=['Eligible'])
    y = data['Eligible']
    return X, y

def train_ridge_model(X_train, y_train):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    ridge_model = Ridge()
    ridge_model.fit(X_train_scaled, y_train)
    return ridge_model, scaler

# Load and train on VehicleTrainingDataset.csv
X_train, y_train = load_and_preprocess_data('VehicleTrainingDataset_Noisy_0.1.csv')
ridge_model, scaler = train_ridge_model(X_train, y_train)

# Predict eligibility scores on 1000VehicleDataset.csv
vehicles_df = pd.read_csv('1000VehicleDataset_Noisy_0.1.csv')
X_test = vehicles_df.drop(columns=['Eligible'])
X_test_scaled = scaler.transform(X_test)
predicted_scores = ridge_model.predict(X_test_scaled)

# Assuming you have access to actual scores, replace this line with the actual score loading logic if available
y_actual = vehicles_df['Eligible']  # This would be prior to overwriting with predictions if you run this block again

# Replace actual scores with predicted ones
vehicles_df['Eligible'] = predicted_scores  

# Calculate metrics
mae = mean_absolute_error(y_actual, predicted_scores)
rmse = np.sqrt(mean_squared_error(y_actual, predicted_scores))
r_squared = r2_score(y_actual, predicted_scores)
rae = np.sum(np.abs(y_actual - predicted_scores)) / np.sum(np.abs(y_actual - np.mean(y_actual)))

# Output the results
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print(f"R-squared: {r_squared}")
print(f"RAE: {rae}")


MAE: 0.8967544998516832
RMSE: 1.1358026956536529
R-squared: 0.991274663358528
RAE: 0.09365725311592739


In [3]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback

# Load datasets
tasks_df = pd.read_csv('RandomTasks400.csv')


# Rename columns for consistency
tasks_df.rename(columns={
    'Required_RAM': 'RAM',
    'Required_Storage': 'storage',
    'Minimum_Trust_Factor': 'Trustfactor',
    'Max_Distance': 'Distance',
    'Min_Transmission_Rate': 'TransmissionRate',
    'Min_Eligibility': 'MinEligibility'
}, inplace=True)

# Define the Gym environment for task allocation
class TaskAllocationEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, vehicles, tasks):
        super(TaskAllocationEnv, self).__init__()
        self.vehicles = vehicles
        self.tasks = tasks
        self.current_task = 0
        self.successful_assignments = 0
        self.successful_history = []  # Added to track successful assignments
        self.seed()

        num_features = tasks.shape[1]
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(num_features,), dtype=np.float32)
        self.eligible_vehicle_indices = []
        self.update_action_space()

    def seed(self, seed=42):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]        
        
    def update_action_space(self):
        task_eligibility = self.tasks.iloc[self.current_task]['MinEligibility']
        eligible_vehicles = self.vehicles[self.vehicles['Eligible'] >= task_eligibility]
        self.eligible_vehicle_indices = eligible_vehicles.index.tolist()
        if len(self.eligible_vehicle_indices) == 0:
            self.action_space = spaces.Discrete(1)  # Prevents invalid action space of size 0
        else:
            self.action_space = spaces.Discrete(len(self.eligible_vehicle_indices))

    def reset(self):
        self.current_task = 0
        self.successful_assignments = 0
        self.update_action_space()
        #print("Resetting environment. Starting new episode.")
        return self.tasks.iloc[self.current_task].values.astype(np.float32)

    def step(self, action):
        task = self.tasks.iloc[self.current_task]
        vehicle = self.vehicles.iloc[action]
        reward = 0

        # Check if the vehicle meets all the task requirements
        meets_requirements = (
            vehicle['RAM'] >= task['RAM'] and
            vehicle['storage'] >= task['storage'] and
            vehicle['Trustfactor'] >= task['Trustfactor'] and
            vehicle['Distance'] <= task['Distance'] and
            vehicle['TransmissionRate'] >= task['TransmissionRate']
        )
        reward = 1 if meets_requirements else -1

        self.successful_assignments += reward > 0
        self.current_task += 1
        done = self.current_task >= len(self.tasks)

        if not done:
            self.update_action_space()
            next_state = self.tasks.iloc[self.current_task].values.astype(np.float32)
        else:
            next_state = np.zeros(self.observation_space.shape[0])
            self.successful_history.append(self.successful_assignments)
            #print(f"Episode completed. Successful assignments: {self.successful_assignments}.")
            self.successful_assignments = 0  # Reset for next episode

        
        
        #three lines below can be uncommented for more detailed output
        #print(f"Task Details: {task.to_dict()}")
        #print(f"Vehicle Details: {vehicle.to_dict()}")
        #print(f"Step: Task {self.current_task}, Action {action}, Reward {reward}")

        return next_state, reward, done, {}

    def get_average_success(self):
        return np.mean(self.successful_history) if self.successful_history else 0


    def render(self, mode='human'):
        pass

    def close(self):
        pass



# Custom callback for logging
class CustomCallback(BaseCallback):
    def __init__(self, env, verbose=0):
        super(CustomCallback, self).__init__(verbose)
        self.env = env
        self.total_rewards = 0
        self.total_assignments = 0
        self.num_episodes = 0

    def _on_step(self):
        return True

    def _on_rollout_end(self):
        mean_reward, std_reward = evaluate_policy(self.model, self.model.get_env(), n_eval_episodes=10)
        average_assignments = self.env.get_attr('get_average_success')[0]()
        self.total_rewards += mean_reward
        self.total_assignments += average_assignments
        self.num_episodes += 1

        print("-------- Rollout Summary --------")
        print(f"Total mean reward: {mean_reward}")
        print(f"Standard deviation of reward: {std_reward}")
        print(f"Average successful assignments: {average_assignments}")
        print("All assignments history:", self.env.envs[0].successful_history)
        self.env.envs[0].successful_history = []  # Reset history after each iteration

    def _on_training_end(self):
        average_total_reward = self.total_rewards / self.num_episodes
        average_total_assignments = self.total_assignments / self.num_episodes
        print("-------- Training Summary --------")
        print(f"Overall Average Mean Reward: {average_total_reward}")
        print(f"Overall Average Successful Assignments: {average_total_assignments}")


# Prepare the environment
env = make_vec_env(lambda: TaskAllocationEnv(vehicles_df, tasks_df), n_envs=1)

# Initialize and train the PPO model
model = PPO("MlpPolicy", env, verbose=1,
            n_steps=1024, batch_size=128, n_epochs=10, learning_rate=0.00018,
            gamma=0.96, gae_lambda=0.87, clip_range=0.15, ent_coef=0.07)

callback = CustomCallback(env)  # Use custom callback for detailed tracking and logging

# Train the model with the custom callback
model.learn(total_timesteps=1024*100, callback=callback)

# Save the model
model.save("ppo_task_allocation_model")


# Load new task dataset for testing
new_tasks_df = pd.read_csv('RandomTasks400Test.csv')
new_tasks_df.rename(columns={
    'Required_RAM': 'RAM',
    'Required_Storage': 'storage',
    'Minimum_Trust_Factor': 'Trustfactor',
    'Max_Distance': 'Distance',
    'Min_Transmission_Rate': 'TransmissionRate',
    'Min_Eligibility': 'MinEligibility'
}, inplace=True)
test_env = make_vec_env(lambda: TaskAllocationEnv(vehicles_df, new_tasks_df), n_envs=1)

# Evaluate the model on the new test environment
mean_reward, std_reward = evaluate_policy(model, test_env, n_eval_episodes=10)
# Extract the successful assignments history from the test environment
successful_assignments = test_env.envs[0].env.get_average_success()

print("---- Testing Summary ----")
print(f"Mean Reward: {mean_reward}")
print(f"Standard Deviation of Reward: {std_reward}")
print(f"Average Successful Assignments in Testing: {successful_assignments}")




Using cpu device
-------- Rollout Summary --------
Total mean reward: -378.0
Standard deviation of reward: 0.0
Average successful assignments: 11.75
All assignments history: [16, 15, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11]
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 400      |
|    ep_rew_mean     | -369     |
| time/              |          |
|    fps             | 211      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 1024     |
---------------------------------


  logger.warn(
  logger.warn(


-------- Rollout Summary --------
Total mean reward: -356.0
Standard deviation of reward: 0.0
Average successful assignments: 16.333333333333332
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -369        |
| time/                   |             |
|    fps                  | 194         |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.006658865 |
|    clip_fraction        | 0.0593      |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.84       |
|    explained_variance   | 0.0857      |
|    learning_rate        | 0.00018     |
|    loss                 | 3.18        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0388     |
|    value_loss           | 1

-------- Rollout Summary --------
Total mean reward: -64.0
Standard deviation of reward: 0.0
Average successful assignments: 115.80833333333334
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -365        |
| time/                   |             |
|    fps                  | 183         |
|    iterations           | 10          |
|    time_elapsed         | 55          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.009982504 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.78       |
|    explained_variance   | 0.0131      |
|    learning_rate        | 0.00018     |
|    loss                 | 2.15        |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0409     |
|    value_loss           | 4.

-------- Rollout Summary --------
Total mean reward: 46.0
Standard deviation of reward: 0.0
Average successful assignments: 145.46296296296296
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -356         |
| time/                   |              |
|    fps                  | 187          |
|    iterations           | 18           |
|    time_elapsed         | 98           |
|    total_timesteps      | 18432        |
| train/                  |              |
|    approx_kl            | 0.0100790635 |
|    clip_fraction        | 0.203        |
|    clip_range           | 0.15         |
|    entropy_loss         | -5.66        |
|    explained_variance   | 0.217        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.31         |
|    n_updates            | 170          |
|    policy_gradient_loss | -0.0448      |
|    value_l

-------- Rollout Summary --------
Total mean reward: 38.0
Standard deviation of reward: 0.0
Average successful assignments: 159.99679487179486
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -343        |
| time/                   |             |
|    fps                  | 176         |
|    iterations           | 26          |
|    time_elapsed         | 150         |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.008026913 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.4        |
|    explained_variance   | 0.298       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.18        |
|    n_updates            | 250         |
|    policy_gradient_loss | -0.0429     |
|    value_loss           | 3.0

-------- Rollout Summary --------
Total mean reward: 48.0
Standard deviation of reward: 0.0
Average successful assignments: 168.49754901960785
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -323        |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 34          |
|    time_elapsed         | 205         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.009748245 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.15        |
|    entropy_loss         | -4.92       |
|    explained_variance   | 0.541       |
|    learning_rate        | 0.00018     |
|    loss                 | 0.878       |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.0483     |
|    value_loss           | 2.6

-------- Rollout Summary --------
Total mean reward: 90.0
Standard deviation of reward: 0.0
Average successful assignments: 176.28968253968253
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -300        |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 42          |
|    time_elapsed         | 258         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.008560588 |
|    clip_fraction        | 0.17        |
|    clip_range           | 0.15        |
|    entropy_loss         | -4.37       |
|    explained_variance   | 0.494       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.52        |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.0456     |
|    value_loss           | 3.3

-------- Rollout Summary --------
Total mean reward: 110.0
Standard deviation of reward: 0.0
Average successful assignments: 185.15833333333333
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -273         |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 50           |
|    time_elapsed         | 311          |
|    total_timesteps      | 51200        |
| train/                  |              |
|    approx_kl            | 0.0059831324 |
|    clip_fraction        | 0.113        |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.89        |
|    explained_variance   | 0.459        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.04         |
|    n_updates            | 490          |
|    policy_gradient_loss | -0.0385      |
|    value_

-------- Rollout Summary --------
Total mean reward: 128.0
Standard deviation of reward: 0.0
Average successful assignments: 193.20114942528735
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -230        |
| time/                   |             |
|    fps                  | 163         |
|    iterations           | 58          |
|    time_elapsed         | 362         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.007237655 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.15        |
|    entropy_loss         | -3.64       |
|    explained_variance   | 0.368       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.12        |
|    n_updates            | 570         |
|    policy_gradient_loss | -0.0374     |
|    value_loss           | 3.

-------- Rollout Summary --------
Total mean reward: 134.0
Standard deviation of reward: 0.0
Average successful assignments: 199.9179292929293
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -186        |
| time/                   |             |
|    fps                  | 163         |
|    iterations           | 66          |
|    time_elapsed         | 412         |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.006617892 |
|    clip_fraction        | 0.133       |
|    clip_range           | 0.15        |
|    entropy_loss         | -3.4        |
|    explained_variance   | 0.322       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.09        |
|    n_updates            | 650         |
|    policy_gradient_loss | -0.0401     |
|    value_loss           | 2.8

-------- Rollout Summary --------
Total mean reward: 146.0
Standard deviation of reward: 0.0
Average successful assignments: 206.0259009009009
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -142         |
| time/                   |              |
|    fps                  | 163          |
|    iterations           | 74           |
|    time_elapsed         | 464          |
|    total_timesteps      | 75776        |
| train/                  |              |
|    approx_kl            | 0.0058606463 |
|    clip_fraction        | 0.0979       |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.25        |
|    explained_variance   | 0.442        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.65         |
|    n_updates            | 730          |
|    policy_gradient_loss | -0.0344      |
|    value_l

-------- Rollout Summary --------
Total mean reward: 152.0
Standard deviation of reward: 0.0
Average successful assignments: 211.1788617886179
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -102         |
| time/                   |              |
|    fps                  | 164          |
|    iterations           | 82           |
|    time_elapsed         | 509          |
|    total_timesteps      | 83968        |
| train/                  |              |
|    approx_kl            | 0.0063859313 |
|    clip_fraction        | 0.0988       |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.11        |
|    explained_variance   | 0.546        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.48         |
|    n_updates            | 810          |
|    policy_gradient_loss | -0.0324      |
|    value_l

-------- Rollout Summary --------
Total mean reward: 146.0
Standard deviation of reward: 0.0
Average successful assignments: 215.48611111111111
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -71.5       |
| time/                   |             |
|    fps                  | 169         |
|    iterations           | 90          |
|    time_elapsed         | 544         |
|    total_timesteps      | 92160       |
| train/                  |             |
|    approx_kl            | 0.006519531 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.15        |
|    entropy_loss         | -2.98       |
|    explained_variance   | 0.631       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.28        |
|    n_updates            | 890         |
|    policy_gradient_loss | -0.0365     |
|    value_loss           | 3.

-------- Rollout Summary --------
Total mean reward: 152.0
Standard deviation of reward: 0.0
Average successful assignments: 219.37755102040816
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -48.1        |
| time/                   |              |
|    fps                  | 173          |
|    iterations           | 98           |
|    time_elapsed         | 578          |
|    total_timesteps      | 100352       |
| train/                  |              |
|    approx_kl            | 0.0059548346 |
|    clip_fraction        | 0.107        |
|    clip_range           | 0.15         |
|    entropy_loss         | -2.97        |
|    explained_variance   | 0.685        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.8          |
|    n_updates            | 970          |
|    policy_gradient_loss | -0.0359      |
|    value_



---- Testing Summary ----
Mean Reward: 154.0
Standard Deviation of Reward: 0.0
Average Successful Assignments in Testing: 277.0


In [4]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback

# Load datasets
tasks_df = pd.read_csv('RandomTasks400.csv')


# Rename columns for consistency
tasks_df.rename(columns={
    'Required_RAM': 'RAM',
    'Required_Storage': 'storage',
    'Minimum_Trust_Factor': 'Trustfactor',
    'Max_Distance': 'Distance',
    'Min_Transmission_Rate': 'TransmissionRate',
    'Min_Eligibility': 'MinEligibility'
}, inplace=True)

# Define the Gym environment for task allocation
class TaskAllocationEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, vehicles, tasks):
        super(TaskAllocationEnv, self).__init__()
        self.vehicles = vehicles
        self.tasks = tasks
        self.current_task = 0
        self.successful_assignments = 0
        self.successful_history = []  # Added to track successful assignments
        self.seed()

        num_features = tasks.shape[1]
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(num_features,), dtype=np.float32)
        self.eligible_vehicle_indices = []
        self.update_action_space()

    def seed(self, seed=42):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]        
        
    def update_action_space(self):
        task_eligibility = self.tasks.iloc[self.current_task]['MinEligibility']
        eligible_vehicles = self.vehicles[self.vehicles['Eligible'] >= task_eligibility]
        self.eligible_vehicle_indices = eligible_vehicles.index.tolist()
        if len(self.eligible_vehicle_indices) == 0:
            self.action_space = spaces.Discrete(1)  # Prevents invalid action space of size 0
        else:
            self.action_space = spaces.Discrete(len(self.eligible_vehicle_indices))

    def reset(self):
        self.current_task = 0
        self.successful_assignments = 0
        self.update_action_space()
        #print("Resetting environment. Starting new episode.")
        return self.tasks.iloc[self.current_task].values.astype(np.float32)

    def step(self, action):
        task = self.tasks.iloc[self.current_task]
        vehicle = self.vehicles.iloc[action]
        reward = 0

        # Check if the vehicle meets all the task requirements
        meets_requirements = (
            vehicle['RAM'] >= task['RAM'] and
            vehicle['storage'] >= task['storage'] and
            vehicle['Trustfactor'] >= task['Trustfactor'] and
            vehicle['Distance'] <= task['Distance'] and
            vehicle['TransmissionRate'] >= task['TransmissionRate']
        )
        reward = 1 if meets_requirements else -1

        self.successful_assignments += reward > 0
        self.current_task += 1
        done = self.current_task >= len(self.tasks)

        if not done:
            self.update_action_space()
            next_state = self.tasks.iloc[self.current_task].values.astype(np.float32)
        else:
            next_state = np.zeros(self.observation_space.shape[0])
            self.successful_history.append(self.successful_assignments)
            #print(f"Episode completed. Successful assignments: {self.successful_assignments}.")
            self.successful_assignments = 0  # Reset for next episode

        
        
        #three lines below can be uncommented for more detailed output
        #print(f"Task Details: {task.to_dict()}")
        #print(f"Vehicle Details: {vehicle.to_dict()}")
        #print(f"Step: Task {self.current_task}, Action {action}, Reward {reward}")

        return next_state, reward, done, {}

    def get_average_success(self):
        return np.mean(self.successful_history) if self.successful_history else 0


    def render(self, mode='human'):
        pass

    def close(self):
        pass



# Custom callback for logging
class CustomCallback(BaseCallback):
    def __init__(self, env, verbose=0):
        super(CustomCallback, self).__init__(verbose)
        self.env = env
        self.total_rewards = 0
        self.total_assignments = 0
        self.num_episodes = 0

    def _on_step(self):
        return True

    def _on_rollout_end(self):
        mean_reward, std_reward = evaluate_policy(self.model, self.model.get_env(), n_eval_episodes=10)
        average_assignments = self.env.get_attr('get_average_success')[0]()
        self.total_rewards += mean_reward
        self.total_assignments += average_assignments
        self.num_episodes += 1

        print("-------- Rollout Summary --------")
        print(f"Total mean reward: {mean_reward}")
        print(f"Standard deviation of reward: {std_reward}")
        print(f"Average successful assignments: {average_assignments}")
        print("All assignments history:", self.env.envs[0].successful_history)
        self.env.envs[0].successful_history = []  # Reset history after each iteration

    def _on_training_end(self):
        average_total_reward = self.total_rewards / self.num_episodes
        average_total_assignments = self.total_assignments / self.num_episodes
        print("-------- Training Summary --------")
        print(f"Overall Average Mean Reward: {average_total_reward}")
        print(f"Overall Average Successful Assignments: {average_total_assignments}")


# Prepare the environment
env = make_vec_env(lambda: TaskAllocationEnv(vehicles_df, tasks_df), n_envs=1)

# Initialize and train the PPO model
model = PPO("MlpPolicy", env, verbose=1,
            n_steps=1024, batch_size=128, n_epochs=10, learning_rate=0.00018,
            gamma=0.96, gae_lambda=0.87, clip_range=0.15, ent_coef=0.07)

callback = CustomCallback(env)  # Use custom callback for detailed tracking and logging

# Train the model with the custom callback
model.learn(total_timesteps=1024*100, callback=callback)

# Save the model
model.save("ppo_task_allocation_model")


# Load new task dataset for testing
new_tasks_df = pd.read_csv('RandomTasks400Test.csv')
new_tasks_df.rename(columns={
    'Required_RAM': 'RAM',
    'Required_Storage': 'storage',
    'Minimum_Trust_Factor': 'Trustfactor',
    'Max_Distance': 'Distance',
    'Min_Transmission_Rate': 'TransmissionRate',
    'Min_Eligibility': 'MinEligibility'
}, inplace=True)
test_env = make_vec_env(lambda: TaskAllocationEnv(vehicles_df, new_tasks_df), n_envs=1)

# Evaluate the model on the new test environment
mean_reward, std_reward = evaluate_policy(model, test_env, n_eval_episodes=10)
# Extract the successful assignments history from the test environment
successful_assignments = test_env.envs[0].env.get_average_success()

print("---- Testing Summary ----")
print(f"Mean Reward: {mean_reward}")
print(f"Standard Deviation of Reward: {std_reward}")
print(f"Average Successful Assignments in Testing: {successful_assignments}")




Using cpu device
-------- Rollout Summary --------
Total mean reward: -384.0
Standard deviation of reward: 0.0
Average successful assignments: 8.916666666666666
All assignments history: [17, 10, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 400      |
|    ep_rew_mean     | -373     |
| time/              |          |
|    fps             | 337      |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 1024     |
---------------------------------


  logger.warn(
  logger.warn(


-------- Rollout Summary --------
Total mean reward: -270.0
Standard deviation of reward: 0.0
Average successful assignments: 32.708333333333336
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -372         |
| time/                   |              |
|    fps                  | 314          |
|    iterations           | 2            |
|    time_elapsed         | 6            |
|    total_timesteps      | 2048         |
| train/                  |              |
|    approx_kl            | 0.0071853753 |
|    clip_fraction        | 0.0665       |
|    clip_range           | 0.15         |
|    entropy_loss         | -5.84        |
|    explained_variance   | -0.0996      |
|    learning_rate        | 0.00018      |
|    loss                 | 3.32         |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0406      |
|    value

-------- Rollout Summary --------
Total mean reward: -38.0
Standard deviation of reward: 0.0
Average successful assignments: 104.14166666666667
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -367        |
| time/                   |             |
|    fps                  | 289         |
|    iterations           | 10          |
|    time_elapsed         | 35          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.008838493 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.78       |
|    explained_variance   | 0.00166     |
|    learning_rate        | 0.00018     |
|    loss                 | 0.587       |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0371     |
|    value_loss           | 4.

-------- Rollout Summary --------
Total mean reward: -34.0
Standard deviation of reward: 0.0
Average successful assignments: 126.79166666666667
All assignments history: []
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 400        |
|    ep_rew_mean          | -361       |
| time/                   |            |
|    fps                  | 283        |
|    iterations           | 18         |
|    time_elapsed         | 65         |
|    total_timesteps      | 18432      |
| train/                  |            |
|    approx_kl            | 0.00886194 |
|    clip_fraction        | 0.142      |
|    clip_range           | 0.15       |
|    entropy_loss         | -5.7       |
|    explained_variance   | 0.144      |
|    learning_rate        | 0.00018    |
|    loss                 | 1.14       |
|    n_updates            | 170        |
|    policy_gradient_loss | -0.0387    |
|    value_loss           | 2.64       |
--------

-------- Rollout Summary --------
Total mean reward: -2.0
Standard deviation of reward: 0.0
Average successful assignments: 139.16346153846155
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -348        |
| time/                   |             |
|    fps                  | 276         |
|    iterations           | 26          |
|    time_elapsed         | 96          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.008624364 |
|    clip_fraction        | 0.162       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.42       |
|    explained_variance   | 0.238       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.11        |
|    n_updates            | 250         |
|    policy_gradient_loss | -0.0437     |
|    value_loss           | 3.2

-------- Rollout Summary --------
Total mean reward: 64.0
Standard deviation of reward: 0.0
Average successful assignments: 151.32843137254903
All assignments history: []
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 400        |
|    ep_rew_mean          | -328       |
| time/                   |            |
|    fps                  | 270        |
|    iterations           | 34         |
|    time_elapsed         | 128        |
|    total_timesteps      | 34816      |
| train/                  |            |
|    approx_kl            | 0.00800571 |
|    clip_fraction        | 0.148      |
|    clip_range           | 0.15       |
|    entropy_loss         | -4.93      |
|    explained_variance   | 0.159      |
|    learning_rate        | 0.00018    |
|    loss                 | 0.63       |
|    n_updates            | 330        |
|    policy_gradient_loss | -0.0453    |
|    value_loss           | 3.39       |
---------

-------- Rollout Summary --------
Total mean reward: 78.0
Standard deviation of reward: 0.0
Average successful assignments: 163.2281746031746
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -300        |
| time/                   |             |
|    fps                  | 262         |
|    iterations           | 42          |
|    time_elapsed         | 164         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.008522388 |
|    clip_fraction        | 0.164       |
|    clip_range           | 0.15        |
|    entropy_loss         | -4.25       |
|    explained_variance   | 0.301       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.3         |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.0475     |
|    value_loss           | 3.29

-------- Rollout Summary --------
Total mean reward: 88.0
Standard deviation of reward: 0.0
Average successful assignments: 172.90166666666667
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -273        |
| time/                   |             |
|    fps                  | 250         |
|    iterations           | 50          |
|    time_elapsed         | 204         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.007990483 |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.15        |
|    entropy_loss         | -3.87       |
|    explained_variance   | 0.282       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.57        |
|    n_updates            | 490         |
|    policy_gradient_loss | -0.0429     |
|    value_loss           | 3.4

-------- Rollout Summary --------
Total mean reward: 90.0
Standard deviation of reward: 0.0
Average successful assignments: 180.6005747126437
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -230         |
| time/                   |              |
|    fps                  | 242          |
|    iterations           | 58           |
|    time_elapsed         | 244          |
|    total_timesteps      | 59392        |
| train/                  |              |
|    approx_kl            | 0.0071137017 |
|    clip_fraction        | 0.132        |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.64        |
|    explained_variance   | 0.341        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.44         |
|    n_updates            | 570          |
|    policy_gradient_loss | -0.0405      |
|    value_lo

-------- Rollout Summary --------
Total mean reward: 94.0
Standard deviation of reward: 0.0
Average successful assignments: 186.8169191919192
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -186         |
| time/                   |              |
|    fps                  | 237          |
|    iterations           | 66           |
|    time_elapsed         | 284          |
|    total_timesteps      | 67584        |
| train/                  |              |
|    approx_kl            | 0.0063754097 |
|    clip_fraction        | 0.119        |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.52        |
|    explained_variance   | 0.41         |
|    learning_rate        | 0.00018      |
|    loss                 | 1.41         |
|    n_updates            | 650          |
|    policy_gradient_loss | -0.0414      |
|    value_lo

-------- Rollout Summary --------
Total mean reward: 120.0
Standard deviation of reward: 0.0
Average successful assignments: 192.32995495495496
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -144         |
| time/                   |              |
|    fps                  | 231          |
|    iterations           | 74           |
|    time_elapsed         | 327          |
|    total_timesteps      | 75776        |
| train/                  |              |
|    approx_kl            | 0.0070673553 |
|    clip_fraction        | 0.109        |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.47        |
|    explained_variance   | 0.499        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.27         |
|    n_updates            | 730          |
|    policy_gradient_loss | -0.033       |
|    value_

-------- Rollout Summary --------
Total mean reward: 136.0
Standard deviation of reward: 0.0
Average successful assignments: 198.02032520325204
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -108         |
| time/                   |              |
|    fps                  | 229          |
|    iterations           | 82           |
|    time_elapsed         | 365          |
|    total_timesteps      | 83968        |
| train/                  |              |
|    approx_kl            | 0.0067658294 |
|    clip_fraction        | 0.129        |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.39        |
|    explained_variance   | 0.485        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.32         |
|    n_updates            | 810          |
|    policy_gradient_loss | -0.0395      |
|    value_

-------- Rollout Summary --------
Total mean reward: 134.0
Standard deviation of reward: 0.0
Average successful assignments: 202.925
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -83         |
| time/                   |             |
|    fps                  | 229         |
|    iterations           | 90          |
|    time_elapsed         | 402         |
|    total_timesteps      | 92160       |
| train/                  |             |
|    approx_kl            | 0.005815825 |
|    clip_fraction        | 0.112       |
|    clip_range           | 0.15        |
|    entropy_loss         | -3.3        |
|    explained_variance   | 0.476       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.15        |
|    n_updates            | 890         |
|    policy_gradient_loss | -0.0351     |
|    value_loss           | 3.41        |

-------- Rollout Summary --------
Total mean reward: 138.0
Standard deviation of reward: 0.0
Average successful assignments: 207.13945578231292
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -65.5        |
| time/                   |              |
|    fps                  | 230          |
|    iterations           | 98           |
|    time_elapsed         | 436          |
|    total_timesteps      | 100352       |
| train/                  |              |
|    approx_kl            | 0.0064613046 |
|    clip_fraction        | 0.129        |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.16        |
|    explained_variance   | 0.563        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.64         |
|    n_updates            | 970          |
|    policy_gradient_loss | -0.0371      |
|    value_



---- Testing Summary ----
Mean Reward: 144.0
Standard Deviation of Reward: 0.0
Average Successful Assignments in Testing: 272.0


In [5]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback

# Load datasets
tasks_df = pd.read_csv('RandomTasks400.csv')


# Rename columns for consistency
tasks_df.rename(columns={
    'Required_RAM': 'RAM',
    'Required_Storage': 'storage',
    'Minimum_Trust_Factor': 'Trustfactor',
    'Max_Distance': 'Distance',
    'Min_Transmission_Rate': 'TransmissionRate',
    'Min_Eligibility': 'MinEligibility'
}, inplace=True)

# Define the Gym environment for task allocation
class TaskAllocationEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, vehicles, tasks):
        super(TaskAllocationEnv, self).__init__()
        self.vehicles = vehicles
        self.tasks = tasks
        self.current_task = 0
        self.successful_assignments = 0
        self.successful_history = []  # Added to track successful assignments
        self.seed()

        num_features = tasks.shape[1]
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(num_features,), dtype=np.float32)
        self.eligible_vehicle_indices = []
        self.update_action_space()

    def seed(self, seed=42):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]        
        
    def update_action_space(self):
        task_eligibility = self.tasks.iloc[self.current_task]['MinEligibility']
        eligible_vehicles = self.vehicles[self.vehicles['Eligible'] >= task_eligibility]
        self.eligible_vehicle_indices = eligible_vehicles.index.tolist()
        if len(self.eligible_vehicle_indices) == 0:
            self.action_space = spaces.Discrete(1)  # Prevents invalid action space of size 0
        else:
            self.action_space = spaces.Discrete(len(self.eligible_vehicle_indices))

    def reset(self):
        self.current_task = 0
        self.successful_assignments = 0
        self.update_action_space()
        #print("Resetting environment. Starting new episode.")
        return self.tasks.iloc[self.current_task].values.astype(np.float32)

    def step(self, action):
        task = self.tasks.iloc[self.current_task]
        vehicle = self.vehicles.iloc[action]
        reward = 0

        # Check if the vehicle meets all the task requirements
        meets_requirements = (
            vehicle['RAM'] >= task['RAM'] and
            vehicle['storage'] >= task['storage'] and
            vehicle['Trustfactor'] >= task['Trustfactor'] and
            vehicle['Distance'] <= task['Distance'] and
            vehicle['TransmissionRate'] >= task['TransmissionRate']
        )
        reward = 1 if meets_requirements else -1

        self.successful_assignments += reward > 0
        self.current_task += 1
        done = self.current_task >= len(self.tasks)

        if not done:
            self.update_action_space()
            next_state = self.tasks.iloc[self.current_task].values.astype(np.float32)
        else:
            next_state = np.zeros(self.observation_space.shape[0])
            self.successful_history.append(self.successful_assignments)
            #print(f"Episode completed. Successful assignments: {self.successful_assignments}.")
            self.successful_assignments = 0  # Reset for next episode

        
        
        #three lines below can be uncommented for more detailed output
        #print(f"Task Details: {task.to_dict()}")
        #print(f"Vehicle Details: {vehicle.to_dict()}")
        #print(f"Step: Task {self.current_task}, Action {action}, Reward {reward}")

        return next_state, reward, done, {}

    def get_average_success(self):
        return np.mean(self.successful_history) if self.successful_history else 0


    def render(self, mode='human'):
        pass

    def close(self):
        pass



# Custom callback for logging
class CustomCallback(BaseCallback):
    def __init__(self, env, verbose=0):
        super(CustomCallback, self).__init__(verbose)
        self.env = env
        self.total_rewards = 0
        self.total_assignments = 0
        self.num_episodes = 0

    def _on_step(self):
        return True

    def _on_rollout_end(self):
        mean_reward, std_reward = evaluate_policy(self.model, self.model.get_env(), n_eval_episodes=10)
        average_assignments = self.env.get_attr('get_average_success')[0]()
        self.total_rewards += mean_reward
        self.total_assignments += average_assignments
        self.num_episodes += 1

        print("-------- Rollout Summary --------")
        print(f"Total mean reward: {mean_reward}")
        print(f"Standard deviation of reward: {std_reward}")
        print(f"Average successful assignments: {average_assignments}")
        print("All assignments history:", self.env.envs[0].successful_history)
        self.env.envs[0].successful_history = []  # Reset history after each iteration

    def _on_training_end(self):
        average_total_reward = self.total_rewards / self.num_episodes
        average_total_assignments = self.total_assignments / self.num_episodes
        print("-------- Training Summary --------")
        print(f"Overall Average Mean Reward: {average_total_reward}")
        print(f"Overall Average Successful Assignments: {average_total_assignments}")


# Prepare the environment
env = make_vec_env(lambda: TaskAllocationEnv(vehicles_df, tasks_df), n_envs=1)

# Initialize and train the PPO model
model = PPO("MlpPolicy", env, verbose=1,
            n_steps=1024, batch_size=128, n_epochs=10, learning_rate=0.00018,
            gamma=0.96, gae_lambda=0.87, clip_range=0.15, ent_coef=0.07)

callback = CustomCallback(env)  # Use custom callback for detailed tracking and logging

# Train the model with the custom callback
model.learn(total_timesteps=1024*100, callback=callback)

# Save the model
model.save("ppo_task_allocation_model")


# Load new task dataset for testing
new_tasks_df = pd.read_csv('RandomTasks400Test.csv')
new_tasks_df.rename(columns={
    'Required_RAM': 'RAM',
    'Required_Storage': 'storage',
    'Minimum_Trust_Factor': 'Trustfactor',
    'Max_Distance': 'Distance',
    'Min_Transmission_Rate': 'TransmissionRate',
    'Min_Eligibility': 'MinEligibility'
}, inplace=True)
test_env = make_vec_env(lambda: TaskAllocationEnv(vehicles_df, new_tasks_df), n_envs=1)

# Evaluate the model on the new test environment
mean_reward, std_reward = evaluate_policy(model, test_env, n_eval_episodes=10)
# Extract the successful assignments history from the test environment
successful_assignments = test_env.envs[0].env.get_average_success()

print("---- Testing Summary ----")
print(f"Mean Reward: {mean_reward}")
print(f"Standard Deviation of Reward: {std_reward}")
print(f"Average Successful Assignments in Testing: {successful_assignments}")


Using cpu device




-------- Rollout Summary --------
Total mean reward: -334.0
Standard deviation of reward: 0.0
Average successful assignments: 30.083333333333332
All assignments history: [21, 10, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33]
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 400      |
|    ep_rew_mean     | -369     |
| time/              |          |
|    fps             | 303      |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 1024     |
---------------------------------


  logger.warn(
  logger.warn(


-------- Rollout Summary --------
Total mean reward: -330.0
Standard deviation of reward: 0.0
Average successful assignments: 30.958333333333332
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -368         |
| time/                   |              |
|    fps                  | 283          |
|    iterations           | 2            |
|    time_elapsed         | 7            |
|    total_timesteps      | 2048         |
| train/                  |              |
|    approx_kl            | 0.0074489396 |
|    clip_fraction        | 0.0751       |
|    clip_range           | 0.15         |
|    entropy_loss         | -5.84        |
|    explained_variance   | -0.137       |
|    learning_rate        | 0.00018      |
|    loss                 | 2.07         |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.0382      |
|    value

-------- Rollout Summary --------
Total mean reward: -272.0
Standard deviation of reward: 0.0
Average successful assignments: 43.983333333333334
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -366        |
| time/                   |             |
|    fps                  | 275         |
|    iterations           | 10          |
|    time_elapsed         | 37          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.008893935 |
|    clip_fraction        | 0.147       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.78       |
|    explained_variance   | 0.000602    |
|    learning_rate        | 0.00018     |
|    loss                 | 0.608       |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0376     |
|    value_loss           | 4

-------- Rollout Summary --------
Total mean reward: -44.0
Standard deviation of reward: 0.0
Average successful assignments: 85.4675925925926
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -360         |
| time/                   |              |
|    fps                  | 275          |
|    iterations           | 18           |
|    time_elapsed         | 66           |
|    total_timesteps      | 18432        |
| train/                  |              |
|    approx_kl            | 0.0111927865 |
|    clip_fraction        | 0.237        |
|    clip_range           | 0.15         |
|    entropy_loss         | -5.71        |
|    explained_variance   | 0.103        |
|    learning_rate        | 0.00018      |
|    loss                 | 0.138        |
|    n_updates            | 170          |
|    policy_gradient_loss | -0.0503      |
|    value_lo

-------- Rollout Summary --------
Total mean reward: 30.0
Standard deviation of reward: 0.0
Average successful assignments: 115.74038461538461
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -348        |
| time/                   |             |
|    fps                  | 276         |
|    iterations           | 26          |
|    time_elapsed         | 96          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.007961285 |
|    clip_fraction        | 0.147       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.48       |
|    explained_variance   | 0.33        |
|    learning_rate        | 0.00018     |
|    loss                 | 0.326       |
|    n_updates            | 250         |
|    policy_gradient_loss | -0.0412     |
|    value_loss           | 2.9

-------- Rollout Summary --------
Total mean reward: 52.0
Standard deviation of reward: 0.0
Average successful assignments: 134.85294117647058
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -330        |
| time/                   |             |
|    fps                  | 276         |
|    iterations           | 34          |
|    time_elapsed         | 125         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.007686979 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.02       |
|    explained_variance   | 0.408       |
|    learning_rate        | 0.00018     |
|    loss                 | 0.868       |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.0444     |
|    value_loss           | 2.8

-------- Rollout Summary --------
Total mean reward: 82.0
Standard deviation of reward: 0.0
Average successful assignments: 149.4047619047619
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -306        |
| time/                   |             |
|    fps                  | 276         |
|    iterations           | 42          |
|    time_elapsed         | 155         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.006306173 |
|    clip_fraction        | 0.106       |
|    clip_range           | 0.15        |
|    entropy_loss         | -4.41       |
|    explained_variance   | 0.43        |
|    learning_rate        | 0.00018     |
|    loss                 | 0.903       |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.0388     |
|    value_loss           | 3   

-------- Rollout Summary --------
Total mean reward: 90.0
Standard deviation of reward: 0.0
Average successful assignments: 161.32333333333332
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -280         |
| time/                   |              |
|    fps                  | 275          |
|    iterations           | 50           |
|    time_elapsed         | 185          |
|    total_timesteps      | 51200        |
| train/                  |              |
|    approx_kl            | 0.0066524073 |
|    clip_fraction        | 0.133        |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.95        |
|    explained_variance   | 0.558        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.44         |
|    n_updates            | 490          |
|    policy_gradient_loss | -0.0448      |
|    value_l

-------- Rollout Summary --------
Total mean reward: 92.0
Standard deviation of reward: 0.0
Average successful assignments: 170.7528735632184
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -236        |
| time/                   |             |
|    fps                  | 275         |
|    iterations           | 58          |
|    time_elapsed         | 215         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.007640248 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.15        |
|    entropy_loss         | -3.56       |
|    explained_variance   | 0.466       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.16        |
|    n_updates            | 570         |
|    policy_gradient_loss | -0.0439     |
|    value_loss           | 2.88

-------- Rollout Summary --------
Total mean reward: 92.0
Standard deviation of reward: 0.0
Average successful assignments: 178.3320707070707
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 400         |
|    ep_rew_mean          | -191        |
| time/                   |             |
|    fps                  | 276         |
|    iterations           | 66          |
|    time_elapsed         | 244         |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.006591717 |
|    clip_fraction        | 0.118       |
|    clip_range           | 0.15        |
|    entropy_loss         | -3.46       |
|    explained_variance   | 0.436       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.18        |
|    n_updates            | 650         |
|    policy_gradient_loss | -0.0369     |
|    value_loss           | 3.19

-------- Rollout Summary --------
Total mean reward: 96.0
Standard deviation of reward: 0.0
Average successful assignments: 184.34121621621622
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -147         |
| time/                   |              |
|    fps                  | 276          |
|    iterations           | 74           |
|    time_elapsed         | 273          |
|    total_timesteps      | 75776        |
| train/                  |              |
|    approx_kl            | 0.0063931155 |
|    clip_fraction        | 0.0955       |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.41        |
|    explained_variance   | 0.398        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.4          |
|    n_updates            | 730          |
|    policy_gradient_loss | -0.0336      |
|    value_l

-------- Rollout Summary --------
Total mean reward: 104.0
Standard deviation of reward: 0.0
Average successful assignments: 189.4878048780488
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -109         |
| time/                   |              |
|    fps                  | 276          |
|    iterations           | 82           |
|    time_elapsed         | 303          |
|    total_timesteps      | 83968        |
| train/                  |              |
|    approx_kl            | 0.0053908825 |
|    clip_fraction        | 0.0787       |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.31        |
|    explained_variance   | 0.443        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.23         |
|    n_updates            | 810          |
|    policy_gradient_loss | -0.0322      |
|    value_l

-------- Rollout Summary --------
Total mean reward: 114.0
Standard deviation of reward: 0.0
Average successful assignments: 194.1648148148148
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -80.3        |
| time/                   |              |
|    fps                  | 277          |
|    iterations           | 90           |
|    time_elapsed         | 331          |
|    total_timesteps      | 92160        |
| train/                  |              |
|    approx_kl            | 0.0072977347 |
|    clip_fraction        | 0.146        |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.2         |
|    explained_variance   | 0.493        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.3          |
|    n_updates            | 890          |
|    policy_gradient_loss | -0.0376      |
|    value_l

-------- Rollout Summary --------
Total mean reward: 116.0
Standard deviation of reward: 0.0
Average successful assignments: 198.36309523809524
All assignments history: []
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 400          |
|    ep_rew_mean          | -61.3        |
| time/                   |              |
|    fps                  | 278          |
|    iterations           | 98           |
|    time_elapsed         | 359          |
|    total_timesteps      | 100352       |
| train/                  |              |
|    approx_kl            | 0.0067391316 |
|    clip_fraction        | 0.132        |
|    clip_range           | 0.15         |
|    entropy_loss         | -3.12        |
|    explained_variance   | 0.539        |
|    learning_rate        | 0.00018      |
|    loss                 | 1.12         |
|    n_updates            | 970          |
|    policy_gradient_loss | -0.0379      |
|    value_



---- Testing Summary ----
Mean Reward: 122.0
Standard Deviation of Reward: 0.0
Average Successful Assignments in Testing: 261.0
