In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)
    X = data.drop(columns=['Eligible'])
    y = data['Eligible']
    return X, y

def optimize_knn_model(X_train, y_train):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    knn_model = KNeighborsRegressor()
    param_grid = {
        'n_neighbors': [3, 5, 10, 20, 30, 40],
        'weights': ['uniform', 'distance'],
        'metric': ['euclidean', 'manhattan'],
        'p': [1, 2]  # 1: Manhattan distance, 2: Euclidean distance
    }
    grid_search = GridSearchCV(knn_model, param_grid, cv=5, verbose=1, scoring='neg_mean_squared_error')
    grid_search.fit(X_train_scaled, y_train)
    print("Best parameters:", grid_search.best_params_)
    best_knn = grid_search.best_estimator_
    return best_knn, scaler

# Load and train on VehicleTrainingDataset.csv
X_train, y_train = load_and_preprocess_data('VehicleTrainingDataset_Noisy_0.01.csv')
knn_model, scaler = optimize_knn_model(X_train, y_train)

# Predict eligibility scores on 1000VehicleDataset.csv
vehicles_df = pd.read_csv('1000VehicleDataset_Noisy_0.01.csv')
X_test = vehicles_df.drop(columns=['Eligible'])
X_test_scaled = scaler.transform(X_test)
predicted_scores = knn_model.predict(X_test_scaled)

# Assuming you have access to actual scores, replace this line with the actual score loading logic if available
y_actual = vehicles_df['Eligible']

# Replace actual scores with predicted ones
vehicles_df['Eligible'] = predicted_scores  

# Calculate metrics
mae = mean_absolute_error(y_actual, predicted_scores)
rmse = np.sqrt(mean_squared_error(y_actual, predicted_scores))
r_squared = r2_score(y_actual, predicted_scores)
rae = np.sum(np.abs(y_actual - predicted_scores)) / np.sum(np.abs(y_actual - np.mean(y_actual)))

# Output the results
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print(f"R-squared: {r_squared}")
print(f"RAE: {rae}")


Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best parameters: {'metric': 'manhattan', 'n_neighbors': 10, 'p': 1, 'weights': 'distance'}
MAE: 1.7258836094021666
RMSE: 2.159748576425794
R-squared: 0.9684512036561819
RAE: 0.18025169439477964


In [2]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback

# Load datasets
tasks_df = pd.read_csv('RandomTasks200.csv')


# Rename columns for consistency
tasks_df.rename(columns={
    'Required_RAM': 'RAM',
    'Required_Storage': 'storage',
    'Minimum_Trust_Factor': 'Trustfactor',
    'Max_Distance': 'Distance',
    'Min_Transmission_Rate': 'TransmissionRate',
    'Min_Eligibility': 'MinEligibility'
}, inplace=True)

# Define the Gym environment for task allocation
class TaskAllocationEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, vehicles, tasks):
        super(TaskAllocationEnv, self).__init__()
        self.vehicles = vehicles
        self.tasks = tasks
        self.current_task = 0
        self.successful_assignments = 0
        self.successful_history = []  # Added to track successful assignments
        self.seed()

        num_features = tasks.shape[1]
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(num_features,), dtype=np.float32)
        self.eligible_vehicle_indices = []
        self.update_action_space()

    def seed(self, seed=42):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]        
        
    def update_action_space(self):
        task_eligibility = self.tasks.iloc[self.current_task]['MinEligibility']
        eligible_vehicles = self.vehicles[self.vehicles['Eligible'] >= task_eligibility]
        self.eligible_vehicle_indices = eligible_vehicles.index.tolist()
        if len(self.eligible_vehicle_indices) == 0:
            self.action_space = spaces.Discrete(1)  # Prevents invalid action space of size 0
        else:
            self.action_space = spaces.Discrete(len(self.eligible_vehicle_indices))

    def reset(self):
        self.current_task = 0
        self.successful_assignments = 0
        self.update_action_space()
        #print("Resetting environment. Starting new episode.")
        return self.tasks.iloc[self.current_task].values.astype(np.float32)

    def step(self, action):
        task = self.tasks.iloc[self.current_task]
        vehicle = self.vehicles.iloc[action]
        reward = 0

        # Check if the vehicle meets all the task requirements
        meets_requirements = (
            vehicle['RAM'] >= task['RAM'] and
            vehicle['storage'] >= task['storage'] and
            vehicle['Trustfactor'] >= task['Trustfactor'] and
            vehicle['Distance'] <= task['Distance'] and
            vehicle['TransmissionRate'] >= task['TransmissionRate']
        )
        reward = 1 if meets_requirements else -1

        self.successful_assignments += reward > 0
        self.current_task += 1
        done = self.current_task >= len(self.tasks)

        if not done:
            self.update_action_space()
            next_state = self.tasks.iloc[self.current_task].values.astype(np.float32)
        else:
            next_state = np.zeros(self.observation_space.shape[0])
            self.successful_history.append(self.successful_assignments)
            #print(f"Episode completed. Successful assignments: {self.successful_assignments}.")
            self.successful_assignments = 0  # Reset for next episode

        
        
        #three lines below can be uncommented for more detailed output
        #print(f"Task Details: {task.to_dict()}")
        #print(f"Vehicle Details: {vehicle.to_dict()}")
        #print(f"Step: Task {self.current_task}, Action {action}, Reward {reward}")

        return next_state, reward, done, {}

    def get_average_success(self):
        return np.mean(self.successful_history) if self.successful_history else 0


    def render(self, mode='human'):
        pass

    def close(self):
        pass



# Custom callback for logging
class CustomCallback(BaseCallback):
    def __init__(self, env, verbose=0):
        super(CustomCallback, self).__init__(verbose)
        self.env = env
        self.total_rewards = 0
        self.total_assignments = 0
        self.num_episodes = 0

    def _on_step(self):
        return True

    def _on_rollout_end(self):
        mean_reward, std_reward = evaluate_policy(self.model, self.model.get_env(), n_eval_episodes=10)
        average_assignments = self.env.get_attr('get_average_success')[0]()
        self.total_rewards += mean_reward
        self.total_assignments += average_assignments
        self.num_episodes += 1

        print("-------- Rollout Summary --------")
        print(f"Total mean reward: {mean_reward}")
        print(f"Standard deviation of reward: {std_reward}")
        print(f"Average successful assignments: {average_assignments}")
        print("All assignments history:", self.env.envs[0].successful_history)
        self.env.envs[0].successful_history = []  # Reset history after each iteration

    def _on_training_end(self):
        average_total_reward = self.total_rewards / self.num_episodes
        average_total_assignments = self.total_assignments / self.num_episodes
        print("-------- Training Summary --------")
        print(f"Overall Average Mean Reward: {average_total_reward}")
        print(f"Overall Average Successful Assignments: {average_total_assignments}")


# Prepare the environment
env = make_vec_env(lambda: TaskAllocationEnv(vehicles_df, tasks_df), n_envs=1)

# Initialize and train the PPO model
model = PPO("MlpPolicy", env, verbose=1,
            n_steps=1024, batch_size=128, n_epochs=10, learning_rate=0.00018,
            gamma=0.96, gae_lambda=0.87, clip_range=0.15, ent_coef=0.07)

callback = CustomCallback(env)  # Use custom callback for detailed tracking and logging

# Train the model with the custom callback
model.learn(total_timesteps=1024*100, callback=callback)

# Save the model
model.save("ppo_task_allocation_model")




Using cpu device
-------- Rollout Summary --------
Total mean reward: -194.0
Standard deviation of reward: 0.0
Average successful assignments: 3.3333333333333335
All assignments history: [2, 7, 6, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 200      |
|    ep_rew_mean     | -192     |
| time/              |          |
|    fps             | 177      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 1024     |
---------------------------------


  logger.warn(
  logger.warn(


-------- Rollout Summary --------
Total mean reward: -174.0
Standard deviation of reward: 0.0
Average successful assignments: 7.266666666666667
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -188        |
| time/                   |             |
|    fps                  | 171         |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.008548127 |
|    clip_fraction        | 0.0742      |
|    clip_range           | 0.15        |
|    entropy_loss         | -6.57       |
|    explained_variance   | -0.249      |
|    learning_rate        | 0.00018     |
|    loss                 | 2.62        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0447     |
|    value_loss           | 17

-------- Rollout Summary --------
Total mean reward: -60.0
Standard deviation of reward: 0.0
Average successful assignments: 23.773333333333333
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -186        |
| time/                   |             |
|    fps                  | 130         |
|    iterations           | 10          |
|    time_elapsed         | 78          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.011910824 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.15        |
|    entropy_loss         | -6.53       |
|    explained_variance   | 0.0363      |
|    learning_rate        | 0.00018     |
|    loss                 | 1.03        |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0448     |
|    value_loss           | 5.

-------- Rollout Summary --------
Total mean reward: -12.0
Standard deviation of reward: 0.0
Average successful assignments: 40.7
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -185        |
| time/                   |             |
|    fps                  | 140         |
|    iterations           | 18          |
|    time_elapsed         | 131         |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.012299187 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.15        |
|    entropy_loss         | -6.51       |
|    explained_variance   | 0.427       |
|    learning_rate        | 0.00018     |
|    loss                 | 2.13        |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.0501     |
|    value_loss           | 4.3         |
--

-------- Rollout Summary --------
Total mean reward: 14.0
Standard deviation of reward: 0.0
Average successful assignments: 50.55641025641026
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -182        |
| time/                   |             |
|    fps                  | 149         |
|    iterations           | 26          |
|    time_elapsed         | 177         |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.009372862 |
|    clip_fraction        | 0.172       |
|    clip_range           | 0.15        |
|    entropy_loss         | -6.47       |
|    explained_variance   | 0.611       |
|    learning_rate        | 0.00018     |
|    loss                 | 1.8         |
|    n_updates            | 250         |
|    policy_gradient_loss | -0.0518     |
|    value_loss           | 4.11

-------- Rollout Summary --------
Total mean reward: 34.0
Standard deviation of reward: 0.0
Average successful assignments: 57.1764705882353
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -177        |
| time/                   |             |
|    fps                  | 154         |
|    iterations           | 34          |
|    time_elapsed         | 225         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.010823073 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.15        |
|    entropy_loss         | -6.36       |
|    explained_variance   | 0.697       |
|    learning_rate        | 0.00018     |
|    loss                 | 0.926       |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.0553     |
|    value_loss           | 3.13 

-------- Rollout Summary --------
Total mean reward: 48.0
Standard deviation of reward: 0.0
Average successful assignments: 62.65396825396825
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -169        |
| time/                   |             |
|    fps                  | 155         |
|    iterations           | 42          |
|    time_elapsed         | 276         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.010297618 |
|    clip_fraction        | 0.205       |
|    clip_range           | 0.15        |
|    entropy_loss         | -6.11       |
|    explained_variance   | 0.723       |
|    learning_rate        | 0.00018     |
|    loss                 | 0.659       |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.0573     |
|    value_loss           | 2.87

-------- Rollout Summary --------
Total mean reward: 46.0
Standard deviation of reward: 0.0
Average successful assignments: 67.38933333333334
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -154        |
| time/                   |             |
|    fps                  | 156         |
|    iterations           | 50          |
|    time_elapsed         | 327         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.009895926 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.72       |
|    explained_variance   | 0.636       |
|    learning_rate        | 0.00018     |
|    loss                 | 0.816       |
|    n_updates            | 490         |
|    policy_gradient_loss | -0.0572     |
|    value_loss           | 3.03

-------- Rollout Summary --------
Total mean reward: 52.0
Standard deviation of reward: 0.0
Average successful assignments: 71.40114942528736
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -135        |
| time/                   |             |
|    fps                  | 158         |
|    iterations           | 58          |
|    time_elapsed         | 374         |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.010092893 |
|    clip_fraction        | 0.211       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.42       |
|    explained_variance   | 0.596       |
|    learning_rate        | 0.00018     |
|    loss                 | 0.977       |
|    n_updates            | 570         |
|    policy_gradient_loss | -0.0581     |
|    value_loss           | 2.85

-------- Rollout Summary --------
Total mean reward: 60.0
Standard deviation of reward: 0.0
Average successful assignments: 75.21313131313131
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -114        |
| time/                   |             |
|    fps                  | 166         |
|    iterations           | 66          |
|    time_elapsed         | 406         |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.010872055 |
|    clip_fraction        | 0.228       |
|    clip_range           | 0.15        |
|    entropy_loss         | -5.12       |
|    explained_variance   | 0.594       |
|    learning_rate        | 0.00018     |
|    loss                 | 0.543       |
|    n_updates            | 650         |
|    policy_gradient_loss | -0.0587     |
|    value_loss           | 2.35

-------- Rollout Summary --------
Total mean reward: 64.0
Standard deviation of reward: 0.0
Average successful assignments: 78.58018018018018
All assignments history: []
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 200        |
|    ep_rew_mean          | -94.7      |
| time/                   |            |
|    fps                  | 174        |
|    iterations           | 74         |
|    time_elapsed         | 435        |
|    total_timesteps      | 75776      |
| train/                  |            |
|    approx_kl            | 0.00814953 |
|    clip_fraction        | 0.154      |
|    clip_range           | 0.15       |
|    entropy_loss         | -4.76      |
|    explained_variance   | 0.607      |
|    learning_rate        | 0.00018    |
|    loss                 | 0.92       |
|    n_updates            | 730        |
|    policy_gradient_loss | -0.0497    |
|    value_loss           | 2.38       |
----------

-------- Rollout Summary --------
Total mean reward: 68.0
Standard deviation of reward: 0.0
Average successful assignments: 81.80569105691058
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -75.2       |
| time/                   |             |
|    fps                  | 181         |
|    iterations           | 82          |
|    time_elapsed         | 461         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.008307787 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.15        |
|    entropy_loss         | -4.37       |
|    explained_variance   | 0.665       |
|    learning_rate        | 0.00018     |
|    loss                 | 0.47        |
|    n_updates            | 810         |
|    policy_gradient_loss | -0.0488     |
|    value_loss           | 1.83

-------- Rollout Summary --------
Total mean reward: 66.0
Standard deviation of reward: 0.0
Average successful assignments: 84.83185185185185
All assignments history: []
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 200        |
|    ep_rew_mean          | -55.6      |
| time/                   |            |
|    fps                  | 188        |
|    iterations           | 90         |
|    time_elapsed         | 487        |
|    total_timesteps      | 92160      |
| train/                  |            |
|    approx_kl            | 0.00801372 |
|    clip_fraction        | 0.162      |
|    clip_range           | 0.15       |
|    entropy_loss         | -4.01      |
|    explained_variance   | 0.538      |
|    learning_rate        | 0.00018    |
|    loss                 | 0.873      |
|    n_updates            | 890        |
|    policy_gradient_loss | -0.0493    |
|    value_loss           | 2.51       |
----------

-------- Rollout Summary --------
Total mean reward: 76.0
Standard deviation of reward: 0.0
Average successful assignments: 87.68299319727892
All assignments history: []
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 200         |
|    ep_rew_mean          | -39.1       |
| time/                   |             |
|    fps                  | 195         |
|    iterations           | 98          |
|    time_elapsed         | 514         |
|    total_timesteps      | 100352      |
| train/                  |             |
|    approx_kl            | 0.006628211 |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.15        |
|    entropy_loss         | -3.86       |
|    explained_variance   | 0.554       |
|    learning_rate        | 0.00018     |
|    loss                 | 0.789       |
|    n_updates            | 970         |
|    policy_gradient_loss | -0.0411     |
|    value_loss           | 2.72

In [3]:
# Load new task dataset for testing
new_tasks_df = pd.read_csv('RandomTasks200Test.csv')
new_tasks_df.rename(columns={
    'Required_RAM': 'RAM',
    'Required_Storage': 'storage',
    'Minimum_Trust_Factor': 'Trustfactor',
    'Max_Distance': 'Distance',
    'Min_Transmission_Rate': 'TransmissionRate',
    'Min_Eligibility': 'MinEligibility'
}, inplace=True)
test_env = make_vec_env(lambda: TaskAllocationEnv(vehicles_df, new_tasks_df), n_envs=1)

# Evaluate the model on the new test environment
mean_reward, std_reward = evaluate_policy(model, test_env, n_eval_episodes=10)
# Extract the successful assignments history from the test environment
successful_assignments = test_env.envs[0].env.get_average_success()

print("---- Testing Summary ----")
print(f"Mean Reward: {mean_reward}")
print(f"Standard Deviation of Reward: {std_reward}")
print(f"Average Successful Assignments in Testing: {successful_assignments}")



---- Testing Summary ----
Mean Reward: 66.0
Standard Deviation of Reward: 0.0
Average Successful Assignments in Testing: 133.0
