In [None]:
# Every job has its tasks that have to be done on a specific machine 
# We have 2 machines and 3 jobs

from keras.utils import np_utils
from collections import deque
import random
from collections import deque
import keras
import numpy as np
from keras.layers import Dense, Input
from keras.models import Model, Sequential
from keras.optimizers import Adam

jobs_data = [ # task = (machine_id, processing_time)
    [(0, 3), (1, 2)],  # Job_0
        [(0, 2),  (1, 4)],  # Job_1
        [(0, 4), (1, 3)]  # Job_2
]

machines_count = 2
all_machines = range(machines_count)
NUMBER_OF_CLASSES = 2

machine_ids = []
processing_time = []
for job in jobs_data:
    for task in job:
        machine_ids.append(task[0]) #labels
        processing_time.append(task[1]) #features

#labels to vectors
machine_ids = np_utils.to_categorical(machine_ids, NUMBER_OF_CLASSES)



In [None]:
class JobShop:
    # This class is the environment of Job shop problem

    #bool_generate_random_jssp = None
    number_job = None
    number_machine = None
    number_features = None

    # the lower limit of one position of job 's processing time.
    #time_low = None
    # the upper limit of one position of job 's processing time.
    #time_high = None

    # Matrix of processing time, M_processing_time[i,j] is the processing time of job i 's position j.
    M_processing_time = np.array([[18, 20, 21, 17], [18, 26, 15, 16], [17, 18, 27, 23], [18, 21, 25, 15], [22, 29, 28, 21]])
    
    # Matrix of processing time, M_processing_order[i,j] is the machine restrain of job i 's position j.
    # (0,1,2,3) - machine ids
    M_processing_order = np.array([[1, 3, 0, 2], [0, 2, 1, 3], [3, 1, 2, 0], [1, 3, 0, 2], [0, 1, 2, 3]]) 
    
    M_start_time = None
    M_end_time = None
    X_schedule_plan = None
    schedule_line = None
    
    def Get_Possible_Job_Position(self): 
        # ergodic the schedule_line, and return the possible position to produce of jobs

        job_position_list = [0 for i in range(self.number_job)] # generuje listę zer o długości number_of_jobs
        for job_id, job_position in self.schedule_line: # schedule_line = [[action_1, job_position_1], [action_2, job_position_2],...]
            if job_position < self.number_machine-1:
                job_position_list[job_id] = job_position+1
            else:
                job_position_list[job_id] = -1

        return [[i, job_position_list[i]] for i in range(len(job_position_list))]


    def GetFeature(self, job_id, job_position):
        # get the feature of one position of one job 
        # readers can change the feature to get a more powerful model 

        # raw features
        machine_id = self.M_processing_order[job_id, job_position]
        job_time_need = np.sum(self.M_processing_time, axis=1)
        current_time_use = self.M_processing_time[job_id, job_position]

        machine_endtime = np.max(self.M_end_time, axis=1)
        job_endtime = np.sum(self.M_processing_time[job_id, :job_position])
        job_alltime = np.sum(self.M_processing_time[job_id, :])

        if job_position == 0:
            frac_currentend_othermachineave = 0.5
            frac_currentend_otherjobave = 0.5
            frac_currentendplusthisposition_othermachineave = 1
            schedule_finish_station = 0

            frac_jobposition_jobtime = 1
            frac_jobposition_totaltime = 1
        else:
            frac_currentend_othermachineave = (
                0.1 + machine_endtime[machine_id]) / (0.1 + np.average(machine_endtime))
            
            frac_currentendplusthisposition_othermachineave = (
                machine_endtime[machine_id] + current_time_use) / np.average(machine_endtime)
            
            schedule_finish_station = np.count_nonzero(
                self.M_end_time)/self.number_machine/self.number_job

            frac_currentend_otherjobave = (0.1+job_endtime) / (0.1+job_alltime)
            frac_jobposition_jobtime = current_time_use/job_time_need[job_id]
            frac_jobposition_totaltime = current_time_use/np.sum(job_time_need)

        # feature choose
        features = []
        # current features
        features.append(frac_currentend_othermachineave)
        features.append(frac_currentend_otherjobave)

        # features.append(frac_currentendplusthisposition_othermachineave)
        # features.append(schedule_finish_station)
        # # stable features
        # features.append(frac_jobposition_jobtime)
        # features.append(frac_jobposition_totaltime)

        self.number_features = len(features)

        if job_position == -1:
            features = [-1] * self.number_features

        return features
    
    def Get_Features(self, possible_job_position):
        # return the features of current state

        featrues = []
        for job_id, job_position in possible_job_position:
            f_item = self.GetFeature(job_id, job_position)
            featrues.append(f_item)

        return featrues
    
    def MeasurementAction(self, action_history):
        # measurement the action and return the makespan

        M_start_time = np.zeros((self.number_machine, self.number_job))
        M_end_time = np.zeros((self.number_machine, self.number_job))

        timeline_machine = np.zeros((self.number_machine), dtype=int)
        index_machine = np.zeros((self.number_machine), dtype=int)
        
        timeline_job = np.zeros((self.number_job), dtype=int)
        index_job = np.zeros((self.number_job), dtype=int)
        
        X_schedule_plan = np.zeros(
            (self.number_machine, self.number_job, 2), dtype=int)
        
        # job_id == action - the job chosen to process
        for job_id, job_position in action_history: #action_history == schedule_line ==[[action_1, job_pos_1], ...]
            
            # M_processing_order = np.array([[1 <- machine_id, 3, 0, 2], ...]
            # machine_id - numer maszyny na której musi zostać wykonany dany task z joba
            machine_id = self.M_processing_order[job_id, job_position] 
            # aktualny czas od którego rozpoczynam
            current_start_time = max(timeline_machine[machine_id], timeline_job[job_id])
            
            # aktualizuje aktualny czas zakończenia zadania, start_time + czas potrzebny do wykonania zadania
            current_end_time = current_start_time + 
                self.M_processing_time[job_id, job_position]
                
            #aktualizuje timeline maszyny i pracy
            timeline_machine[machine_id], timeline_job[job_id] = current_end_time, current_end_time 
            
            # index oznaczający kolejność danego joba na danej maszynie
            current_index = index_machine[machine_id] # index_machine == array([0,1,0,0]) <- job ma pozycję numer 2 
                                                                                            # na maszynie numer 2
            M_start_time[machine_id, current_index] = current_start_time
            M_end_time[machine_id, current_index] = current_end_time
            
            # Tworzy harmonogram, w którym wstawia [job_id, job_position] na miejsce
            X_schedule_plan[machine_id, current_index, :] = [job_id, job_position] 
            
            index_machine[machine_id] += 1 # idziemy do następnej maszyny
            
            index_job[job_id] += 1 # idziemy do następnej roboty

        self.M_start_time = M_start_time
        self.M_end_time = M_end_time # Macierz, której wierszami są poszczególne maszyny, a kolumnami - jobs, prace
        self.X_schedule_plan = X_schedule_plan 
        return np.max(M_end_time) # zwraca ile trwała cała praca (makespan)

     def Step(self, action=None):
        # be called in main function
        # input action and return state score and done
        # action: choose a job to process.
        # state:

        done = False
        if action == None:
            #self.MeasurementAction(self.schedule_line)
            possible_job_position = self.Get_Possible_Job_Position()
            state = np.array(self.Get_Features(possible_job_position))
            score = 0
            
        else:
            job_position_list = [0 for i in range(self.number_job)]
            for job_id, job_position in self.schedule_line:
                if job_position < self.number_machine-1:
                    job_position_list[job_id] = job_position+1
                else:
                    job_position_list[job_id] = -1
                    
            if job_position_list[action] == -1:  # action - number of the job to do
                done = True
                # Generate random action
                canchoose = [[i, job_position_list[i]] for i in range(
                    self.number_job) if job_position_list[i] != -1]
                action = canchoose[0]
                # Generate action from a state with DQAgent.act(state) function
            else:
                action = [action, job_position_list[action]]

            self.schedule_line.append(action)
            self.MeasurementAction(self.schedule_line)
            # self.PlotResult()
            score = np.max(self.M_end_time)

            possible_job_position = self.Get_Possible_Job_Position()
            state = np.array(self.Get_Features(possible_job_position))

        state = [np.reshape(state[i], (1, 2,)) for i in range(self.number_job)]

        return state, score, done

In [38]:
import numpy as np
X_schedule_plan = np.zeros(
            (4, 5, 2), dtype=int)

In [37]:
X_schedule_plan

array([[[0],
        [0],
        [0],
        [0],
        [0]],

       [[0],
        [0],
        [0],
        [0],
        [0]],

       [[0],
        [0],
        [0],
        [0],
        [0]],

       [[0],
        [0],
        [0],
        [0],
        [0]]])

In [None]:
class DQAgent:


  def __init__(self):
        MEMORY_SIZE = 2000 # number of steps from which we take the random sampling <- that's the batch we are going to train the NN off of
        MIN_MEMORY_SIZE = 1000  # Minimum number of steps in a memory to start training
        BATCH_SIZE = 64  # How many steps (samples) to use for training
        UPDATE_TARGET_EVERY = 5  # Terminal states (end of episodes)

        self.state_size = state_size
        self.action_size = action_size
        self.number_job = number_job
        self.number_feature = number_feature
        self.gamma = 0.95    # discount rate
        self.epsilon = 0.9  # exploration rate

        # Main model which we .fit() the model for every step the agent takes
        self.model = self.create_model()

        # Target network which we .predict() every step
        self.target_model = self.create_model()

        # Reupdating the model weights every some number of steps, not allowing the model to make predition every step, instead for example every 5 episodes or so
        # In order to have some kind of stability, so the model can actually learn and not overfit to 
        self.target_model.set_weights(self.model.get_weights()) 

        # An array with last n steps for training
        self.memory = deque(maxlen=MEMORY_SIZE)

        # Used to count when to update target network with main network's weights
        self.target_update_counter = 0

  def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential(name='basic_model')
        model.add(Dense(24, input_dim=self.number_feature, activation='relu'))
        model.add(Dense(24, input_dim=self.number_feature, activation='relu'))
        model.add(Dense(24, input_dim=self.number_feature, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(1, activation='linear'))
        model.compile(loss='mse',
                      optimizer=Adam(lr=0.0005))
        return model

  def update_memory(self, state, action, reward, next_state, done):
        # remember the information of current step
        # done - information wether or not it was done
        self.memory.append((current_state, action, reward, next_state, done))

  def act(self, state):
        # Agent takes a random action
        # choose a job to process in current state

        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)

        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

  def train(self, terminal_state, step):

    # checking if there are any samples in memory, so we can start to train
    if len(self.memory) < MIN_MEMORY_SIZE:
        return # if not we aint do nothin

    # Setting the minibatch if there is enough samples in a memory
    minibatch =  random.sample(self.memory, BATCH_SIZE)

    for index, (current_state, action, reward, next_state, done) in enumerate(minibatch):

      

  def get_q_values()