In [None]:
#Import the Scheduler library to set up the environment
!git clone https://github.com/TimeTraveller-San/JobSchedulingRLenv.git

In [1]:
import gym
import pandas as pd
import numpy as np
from collections import deque
import os
import random
# Plotting
import matplotlib.pyplot as plt 
import seaborn as sns 
%matplotlib inline

In [2]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Lambda
from keras.optimizers import Adam
from keras.utils import plot_model

Using TensorFlow backend.


In [3]:
#Keep the componenets of the scheduler class in the same folder.
from scheduler import Scheduler


In [60]:
#Testing the environment, its state, reward and action values. 
# env = Scheduler(n=3, mt=3, mr=3)
# some_random_action = env.sample() #sample() returns a randomly sampled action
# print("Action: ", some_random_action)
# print("Action shape: ", some_random_action.shape)
# print("State shape: ",state.shape)
# state, reward, done, observation = env.step(some_random_action)
# print(f"Reward is: {reward}")
# if done:
#   print(f"Episode done, call reset().")
# env.render(False)

Action:  1
Action shape:  ()
Reward is: 0
(3, 2)
{1: 0, 2: 0, 3: 0}
{1: 0, 2: 0, 3: 0}
{1: 0, 2: 0, 3: 0}
State shape:  (93,)


In [None]:
# print(env.action_space_n)
# print(env.observation_space)

In [4]:
class Agent():
    def __init__(self, state_size, action_size):
        self.weight_backup      = "trained_model_weight.h5"
        self.state_size         = state_size
        self.action_size        = action_size
        self.memory             = deque(maxlen=2000)
        self.learning_rate      = 0.001
        self.gamma              = 0.95
        self.exploration_rate   = 1.0
        self.exploration_min    = 0.01
        self.exploration_decay  = 0.995
        self.brain              = self._build_model()

    def get_action(self, x)->int:
        return (x - 1)//(self.action_size - 1)

    def _build_model(self):
            # Neural Net for Deep-Q learning Model
            # model = Sequential()
            # model.add(Dense(24, input_dim=self.state_size, activation='relu'))
            # model.add(Dense(24, activation='relu'))
            # model.add(Dense(self.action_size, activation='linear'))
            # model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
            model = Sequential()
            model.add(Dense(128, input_dim = self.state_size, activation = 'relu'))
            model.add(Dense(64, activation = 'relu'))
            model.add(Dense(32, activation = 'relu'))
            model.add(Dense(self.action_size, activation = 'linear'))
            model.compile(optimizer=Adam(lr=self.learning_rate), loss = 'mse')
            print(model.summary())
            plot_model(model, "./Deep_Q_Network.png")
            #Load the models if available
            if os.path.isfile(self.weight_backup):
                model.load_weights(self.weight_backup)
                self.exploration_rate = self.exploration_min
            print("Model build complete.")
            return model

    def save_model(self):
                self.brain.save(self.weight_backup)

    def act(self, state):
            if np.random.rand() <= self.exploration_rate:
                return random.randrange(self.action_size)
            act_values = self.brain.predict(state)
            #return self.get_action(np.argmax(act_values))
            return np.argmax(act_values[0])+1

    def remember(self, state, action, reward, next_state, done):
            self.memory.append((state, action, reward, next_state, done))

    def replay(self, sample_batch_size):
            if len(self.memory) < sample_batch_size:
                return
            sample_batch = random.sample(self.memory, sample_batch_size)
            for state, action, reward, next_state, done in sample_batch:
                target = reward
                if not done:
                    target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0])
                target_f = self.brain.predict(state)
                target_f[action-1] = target
                #print(target_f.shape)
                self.brain.fit(state, target_f, epochs=1, verbose=0)
            if self.exploration_rate > self.exploration_min:
                self.exploration_rate *= self.exploration_decay

In [7]:
class Scheduling_Agent:
    def __init__(self):
        self.cumulative_reward = []
        self.sample_batch_size = 32
        self.episodes          = 100
        self.env               = Scheduler(n=3, mt=3, mr=2)
        self.state_size        = self.env.observation_space
        self.action_size       = self.env.action_space_n
        self.agent             = Agent(self.state_size, self.action_size)
        

    def run(self):
        try:
            for index_episode in range(self.episodes):
                state = self.env.reset()
                state = np.reshape(state, [1, self.state_size])
                done = False
                index = 0
                total_reward = 0
                while not done:
                    self.env.render(False)
                    action = self.agent.act(state)
                    #action = self.env.get_action(action)
                    print("Action: ",action)
                    next_state, reward, done, _ = self.env.step(action)
                    total_reward += reward 
                    next_state = np.reshape(next_state, [1, self.state_size])
                    self.agent.remember(state, action, reward, next_state, done)
                    state = next_state
                    index += 1
                print("Episode {}# Score: {}  Total Reward: {}".format(index_episode, index + 1, total_reward))
                self.cumulative_reward.append(total_reward)
                self.agent.replay(self.sample_batch_size)
        finally:
            self.agent.save_model()

In [8]:
if __name__ == "__main__":
    agent = Scheduling_Agent()
    agent.run()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 128)               8192      
_________________________________________________________________
dense_6 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_7 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_8 (Dense)              (None, 3)                 99        
Total params: 18,627
Trainable params: 18,627
Non-trainable params: 0
_________________________________________________________________
None
Model build complete.
(3, 2)
{1: 0, 2: 0, 3: 0}
{1: 0, 2: 0, 3: 0}
{1: 0, 2: 0, 3: 0}
Action:  2
(3, 2)
{1: 0, 2: 0, 3: 0}
{1: 0, 2: 0, 3: 0}
{1: 0, 2: 0, 3: 0}
Action:  2
Episode 0# Score: 3  Total Reward: -100
(3, 2)
{1: 0, 2: 1, 3: 0}
{1: 0

IndexError: index 1 is out of bounds for axis 0 with size 1