In [41]:
import sys
import os
kernel_name = os.path.basename(sys.executable.replace("/bin/python",""))
print(kernel_name)

ShodhAI_1


## CPU Scheduling Objectives

After analyzing the problem statement and data, several potential objectives were identified:

1. **Minimize Total Burst Time**: The goal is to minimize the total time required to complete all processes. This objective is critical for maximizing overall system throughput.

2. **Minimize Average Waiting Time**: Aimed at reducing the average waiting time for processes in the queue, thereby enhancing system responsiveness and user experience.

3. **Minimize Average Turnaround Time**: Focuses on minimizing the total time from process arrival to its completion, optimizing process efficiency.

4. **Maximize Resource Utilization**: Ensures efficient use of available resources to prevent idle time while maintaining other performance metrics.

5. **Minimize Context Switching**: Reduces the number of context switches between processes, which can lower overhead and improve system performance.

Given the constraints and time limitations, the chosen objective for this project is to **minimize total burst time**.


# Install necessary libraries

In [None]:
pip install pandas
pip install gymnasium 
conda install conda-forge::stable-baselines3

# Import Libraries

In [42]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.env_util import make_vec_env

# Environment Initialization

In [44]:

class CPUSchedulingEnv(gym.Env):
    def __init__(self):
        super(CPUSchedulingEnv, self).__init__()
        self.action_space = spaces.Discrete(10)
        self.observation_space = spaces.Box(low=0, high=1, shape=(7,), dtype=np.float32)
        self.current_job_index = 0
        self.current_time = 0
        self.completed_jobs = []
        self.job_queue = []
        self.resources_used = 0
        self.data = None
        self.max_burst_time = None
        self.max_resources = None
        self.np_random = None
        self.seed()

    def seed(self, seed=None):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]

    def _load_data(self):
        data = pd.DataFrame({
            'Job Id': range(500),
            'Burst time': self.np_random.integers(1, 500, size=500),
            'Arrival Time': self.np_random.uniform(0, 1000, size=500),
            'Preemptive': self.np_random.integers(0, 2, size=500),
            'Resources': self.np_random.integers(1, 10, size=500)
        })
        data = data.sort_values(by='Arrival Time').reset_index(drop=True)
        return data

    def _get_observation(self):
        if len(self.job_queue) > 0:
            next_job = self.job_queue[0]
            obs = np.array([
                next_job['Job Id'] / len(self.data),
                next_job['Burst time'] / self.max_burst_time,
                next_job['Arrival Time'] / self.data['Arrival Time'].max(),
                next_job['Preemptive'],
                next_job['Resources'] / self.max_resources,
                self.resources_used / 20,
                self.current_time / self.data['Arrival Time'].max()
            ])
        else:
            obs = np.zeros(7)
        return np.clip(obs, self.observation_space.low, self.observation_space.high)

    def _update_job_queue(self):
        while self.current_job_index < len(self.data) and self.data.iloc[self.current_job_index]["Arrival Time"] <= self.current_time:
            self.job_queue.append(self.data.iloc[self.current_job_index])
            self.current_job_index += 1

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_job_index = 0
        self.current_time = 0
        self.completed_jobs = []
        self.job_queue = []
        self.resources_used = 0
        self.data = self._load_data()
        self.max_burst_time = self.data['Burst time'].max()
        self.max_resources = self.data['Resources'].max()
        self._update_job_queue()
        observation = self._get_observation()
        info = {}  # You can add any relevant reset information here
        return observation, info

    def step(self, action):
        self._update_job_queue()
        if action < len(self.job_queue):
            selected_job = self.job_queue[action]
        else:
            selected_job = None

        reward = 0
        done = False

        if selected_job:
            job_resources = selected_job['Resources']
            if job_resources + self.resources_used <= 20:
                self.resources_used += job_resources
                self.current_time += selected_job['Burst time']
                waiting_time = self.current_time - selected_job['Arrival Time']
                reward = -waiting_time / 1000  # Normalize reward
                self.completed_jobs.append(selected_job)
                self.job_queue.remove(selected_job)
                self.resources_used -= job_resources
            else:
                reward = -0.1  # Small penalty for selecting a job that exceeds resource limits
        else:
            reward = -0.1  # Small penalty for invalid action

        if self.current_job_index >= len(self.data) and len(self.job_queue) == 0:
            done = True
            reward += 10  # Reward for completing all jobs

        observation = self._get_observation()
        info = {}  # You can add any relevant step information here
        return observation, reward, done, False, info  # Added False for truncated



# Training the model using PPO

## Why PPO is a Better Method for CPU Scheduling

PPO (Proximal Policy Optimization) is preferred for CPU scheduling due to its ability to handle continuous action spaces and adapt to dynamic environments through reinforcement learning. Unlike traditional methods relying on static rules, PPO learns optimal scheduling policies directly from environment interactions. This adaptive approach allows PPO to potentially outperform fixed algorithms by continuously refining policies based on received rewards and penalties. As a result, PPO can efficiently manage CPU resource utilization and improve job completion times in dynamic and unpredictable scheduling scenarios.


In [45]:
# Create and wrap the environment
env = make_vec_env(CPUSchedulingEnv, n_envs=4)
env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.)

# Create and train the PPO model
model = PPO("MlpPolicy", env, verbose=1, learning_rate=3e-4, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.01)
model.learn(total_timesteps=500000)

# Save the model
model.save("ppo_cpu_scheduling")

# Test the model
obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, infos = env.step(action)
    if dones.any():
        obs = env.reset()

Using cpu device
------------------------------
| time/              |       |
|    fps             | 10939 |
|    iterations      | 1     |
|    time_elapsed    | 0     |
|    total_timesteps | 8192  |
------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 5301        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.013445817 |
|    clip_fraction        | 0.0369      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.3        |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.16        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00304    |
|    value_loss           | 0.287       |
-----------------------------------------
----------

# Resources
- [Gymnasium](https://gymnasium.farama.org/)
- [YouTube - Nicholas Renotte's Reinforcement Learning Playlist](https://www.youtube.com/watch?v=Mut_u40Sqz4&list=PLgNJO2hghbmjlE6cuKMws2ejC54BTAaWV&index=8&ab_channel=NicholasRenotte)
- [YouTube - Nicholas Renotte's Custom Environments Tutorial](https://www.youtube.com/watch?v=bD6V3rcr_54&list=PLgNJO2hghbmjlE6cuKMws2ejC54BTAaWV&index=3&ab_channel=NicholasRenotte)
- [Creating Custom Environments in OpenAI Gym](https://saashanair.com/blog/blog-posts/custom-environments-in-openai-gym)
- [ChatGPT](https://chatgpt.com/)
- [Claude AI](https://claude.ai/chat)


# Possible additions
- Add evaluation and testing
- Create a comparison with normal optimisation methods like Pulp or with current algorithms for CPU scheduling
- Create a hybrid model