# DEPARTMENT OF INFORMATION TECHNOLOGY
# Operating Systems - IT253 Mini Project

Gaurang Jitendra Velingkar<br>
Jason Krithik Kumar<br>
Rakshita Varadarajan<br>
Sidharth Lanka



In [12]:
!pip install tensorflow==2.4.1
!pip install gym
!pip install keras
!pip install keras-rl2

Collecting keras-rl2
[?25l  Downloading https://files.pythonhosted.org/packages/dd/34/94ffeab44eef43e22a01d82aa0ca062a97392c2c2415ba8b210e72053285/keras_rl2-1.0.4-py3-none-any.whl (53kB)
[K     |████████████████████████████████| 61kB 2.7MB/s 
Installing collected packages: keras-rl2
Successfully installed keras-rl2-1.0.4


Environment

In [13]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random

In [14]:
class TaskEnv(Env):
    #   	Init function to initialize environment state, it takes number of resources and no of 
    #  	Tasks in a batch and creates a state representation of the resources.
    def __init__(self, resources=[90, 90], num_tasks=5):
        # Actions we can take, down, stay, up
        self.action_space = Discrete(num_tasks)
        # Possible values for memory and cpu utilisation array
        self.observation_space = Box(low=0, high=90, shape=(num_tasks, len(resources)))
        ### initially, no task are chosen
        self.state_ = np.zeros((num_tasks, 1), dtype=int)
        self.limit_ = 0
        for lim in resources:
            self.state_ = np.append(self.state_, np.random.randint(lim, size=(num_tasks, 1)), axis=1)
            
        self.reward_ = 0
        for lim in resources:
            self.limit_+=lim   #Set the total resource limit as sum of resources
       
        self.num_tasks_ = num_tasks
        self.resources_ = resources

    #     Return the sum of a one dimensional vector  
    def getSum(self, x ):
        if x[0] == 1:
            return sum(x)-1
        else:
            return 0    
    
    #This function defines the logic behind changing the states and deciding rewards based on the input provided by agent in terms of action.
    #Input: index of the selected task in state table
    #Output: 
    #New state (np.array)
    #Reward (INT) 
    #Done:  Job processing done or not flag.
    def step(self, actionIdx):
        ### update reward
        self.reward_ = 0
        reward = 0
        done = False
        #update the state based on action
        #case 1, if same task selected then penalize the agent
        if self.state_[actionIdx][0] == 1:
          # get the sum for selected task
            sum_res = np.sum(self.state_[actionIdx], axis=0)
            reward = -sum_res/10;
            return self.state_,reward,done,{}
        else:
            self.state_[actionIdx][0] = 1
       
        #collect all the resources for this batch until now.
        totReward = sum(np.apply_along_axis( self.getSum, axis=1, arr=self.state_ ))
        # get the sum for selected task
        sum_res = np.sum(self.state_[actionIdx], axis=0) 
        ones = np.sum(self.state_, axis=0)
        if totReward <= self.limit_:
            if ones[0] == 5:
              done = True
            reward = sum_res
        else:
            reward = -sum_res/10;
            done = True 
            
        return self.state_,reward,done,{}

    # render: Print current state
    def render(self):
        # Implement viz
        print(self.state_)
    
    # Set state to one given by user
    def set_state(self, state):
        self.reset()
        self.state_ = state

    # reset: Reset the environment state for the new batch of incoming jobs
    def reset(self):
        state = np.zeros((self.num_tasks_, 1), dtype=int)
        for lim in self.resources_: 
            state = np.append(state, np.random.randint(lim, size=(self.num_tasks_,1)), axis=1)
        self.state_ = state
        self.reward_ = 0
        return self.state_

In [15]:
env = TaskEnv()

In [16]:
env.observation_space.sample()

array([[41.87363  , 34.20834  ],
       [24.629042 , 52.142124 ],
       [68.53885  ,  3.3223493],
       [38.631977 , 61.177902 ],
       [67.61218  , 10.353863 ]], dtype=float32)

## Testing
Test if environment works as intended

In [17]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        # print(n_state)
        # print(action, " ", dqn.forward(n_state))
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:144.4
Episode:2 Score:128.2
Episode:3 Score:78.9
Episode:4 Score:34.0
Episode:5 Score:88.7
Episode:6 Score:94.7
Episode:7 Score:98.0
Episode:8 Score:123.39999999999999
Episode:9 Score:144.8
Episode:10 Score:136.3


## Model
Create model using Keras with Tensorflow backend

In [18]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten 
from tensorflow.keras.optimizers import Adam
from tensorflow import reshape

In [19]:
states = (1, 5, 3)
actions = env.action_space.n

print(states)
print(actions)

(1, 5, 3)
5


In [20]:
def build_model(states, actions, l1=32, l2=32, l3=32):
    model = Sequential()
    model.add(Flatten(input_shape=states))
    model.add(Dense(l1, activation='elu'))
    model.add(Dense(l2, activation='elu'))
    model.add(Dense(l3, activation='elu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [26]:
model = build_model(states, actions, 128, 128, 128)
#model.save('/drive/bestmodel')

In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 15)                0         
_________________________________________________________________
dense (Dense)                (None, 64)                1024      
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 5)                 325       
Total params: 9,669
Trainable params: 9,669
Non-trainable params: 0
_________________________________________________________________


## Agent
Create DQN agent using model made above and using Epsilon-Q policy

In [23]:
from rl.agents import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

In [24]:
def build_agent(model, actions):
    policy = EpsGreedyQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=200, target_model_update=1e-2)
    return dqn

In [278]:
dqn = build_agent(model, actions)

## Fitting And Testing Model

In [279]:
dqn.compile(Adam(lr=1e-4), metrics=['mae'])
dqn.fit(env, nb_steps=1000, visualize=False, verbose=1)

Training for 1000 steps ...
Interval 1 (0 steps performed)




 1000/10000 [==>...........................] - ETA: 7:33 - reward: -0.0958done, took 55.081 seconds


<tensorflow.python.keras.callbacks.History at 0x7f9d47f9ab50>

In [280]:
dqn.compile(Adam(lr=1e-2), metrics=['mae'])
dqn.fit(env, nb_steps=15000, visualize=False, verbose=1)

Training for 15000 steps ...
Interval 1 (0 steps performed)




544 episodes - episode_reward: -4.391 [-1051.300, 163.900] - loss: 400.482 - mae: 10.104 - mean_q: 10.762

Interval 2 (10000 steps performed)


<tensorflow.python.keras.callbacks.History at 0x7f9d47df2d90>

In [281]:
dqn.compile(Adam(lr=0.5*1e-3), metrics=['mse'])
dqn.fit(env, nb_steps=5000, visualize=False, verbose=1)

Training for 5000 steps ...
Interval 1 (0 steps performed)






<tensorflow.python.keras.callbacks.History at 0x7f9d4787add0>

In [247]:
dqn.compile(Adam(lr=1e-2), metrics=['mae'])
dqn.fit(env, nb_steps=5000, visualize=False, verbose=1)

Training for 5000 steps ...
Interval 1 (0 steps performed)






<tensorflow.python.keras.callbacks.History at 0x7f9d48f57990>

In [206]:
dqn.compile(Adam(lr=0.5*1e-3), metrics=['mse'])
dqn.fit(env, nb_steps=15000, visualize=False, verbose=1)

Training for 15000 steps ...
Interval 1 (0 steps performed)






<tensorflow.python.keras.callbacks.History at 0x7f9d4b115210>

In [235]:
model.save('/content/drive/MyDrive/bestmodelyet')
del model

## Different Scheduling Techniques

### Random Scheduling
We schedule a random task by sampling from the action space

In [46]:
episodes = 1000
scores = 0
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        # print(n_state)
        # print(action, " ", dqn.forward(n_state))
        score+=reward
    scores += score
print('Episodes:{} Score:{}'.format(episodes, scores/episodes))

Episodes:1000 Score:114.91079999999998


### Scheduling using RL agent
We use the best performing model stored and use it for scheduling tasks

In [48]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)  

Mounted at /content/drive/


In [45]:
from tensorflow import keras
model1 = keras.models.load_model('/content/drive/MyDrive/bestmodelyet')
dqn = build_agent(model1, actions)

def calc_score(dqn, episodes = 1000):
  scores = 0
  for episode in range(1, episodes+1):
      state = env.reset()
      done = False
      score = 0
      i = 0
      
      while not done:
          #env.render()
          action = dqn.forward(state)
          state, reward, done, info = env.step(action)
          # print(n_state)
          # print(action, " ", dqn.forward(n_state))
          score+=reward
          i += 1
          if i > 100:
            break
            # print(state)
      scores += score

  print('Episodes:{} Score:{}'.format(episodes, scores/episodes))
  return scores/episodes

calc_score(dqn)



Episodes:1000 Score:135.43469999999985


135.43469999999985

### FCFS
Scheduling the first task and the moving to the next

In [47]:
  episodes = 10000
scores = 0
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    i = 0
    
    while not done:
        #env.render()
        action = i
        i = i+1
        if i > 5: break
        state, reward, done, info = env.step(action)
        # print(n_state)
        # print(action, " ", dqn.forward(n_state))
        score+=reward
    scores += score
print('Episodes:{} Score:{}'.format(episodes, scores/episodes))

Episodes:10000 Score:120.36871999999983
