# Background

Here, we develop an Intelligent Whale Optimization (IWO) algorithm to solve dual-resourced flexible job-shop scheduling problem. To do this, we train an agent which adjusts the key parameters of the Whale Optimization (WO) algorithm. This helps to improve the global search ability of WO.

# Installing the libraries

In [None]:
!pip install 'tensorflow==1.15.2'

In [None]:
!pip install "gym==0.19.0"

In [None]:
!apt-get update
!apt-get install -y cmake libopenmpi-dev python3-dev zlib1g-dev
!pip install "stable-baselines[mpi]==2.2.1"

# Problem instances

In [None]:
import gym
import numpy as np
from gym import Env
from gym.spaces import Discrete, Box, MultiDiscrete
import random
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv,VecEnv
from stable_baselines.common import set_global_seeds
from stable_baselines import ACKTR,A2C

def problem_data(Seed):
  np.random.seed(Seed)
  Machine=[i for i in range(1,np.random.choice([5,6,7,8,9,10]))] 
  Configuration={i:np.random.choice([2,3,4,5]) for i in Machine} 
  Job={i:np.random.choice([2,3,4,5]) for i in range(1,np.random.choice([3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]))} 
  W=[i for i in range(1,np.random.choice([3,4,5,6,7,8,9,10,11]))] 
  R={(ii,jj):{(i,j,w):np.random.choice([2,3,4,5,6,7,8,9]) for w in W for i in Configuration.keys() for j in range(1,Configuration[i]+1)} for ii in Job.keys() for jj in range(1,Job[ii]+1)}  
  RecChange={ii:{(i,j):np.random.choice([2,3,4,5,6]) for i in range(1,1+Configuration[ii]) for j in range(1,1+Configuration[ii])} for ii in Configuration.keys()} 
  return Machine,Configuration,Job,R,RecChange,W

# Environment

In [5]:
class WOAEnv(Env):
  def __init__(self):
    super().__init__()
    self.Seed=0
    self.Machine,self.Configuration,self.Job,self.R,self.RecChange,self.W=problem_data(self.Seed)
    self.action_space = MultiDiscrete([10,5])
    #Box(low=np.array([0,0],dtype=np.float32), high=np.array([2,1],dtype=np.float32),dtype=np.float32)
    self.observation_space = Box(low=np.array([0,0,0,0,0,0,0,0,0],dtype=np.float32), high=np.array([1,1,1,1,1,1,1,1,1],dtype=np.float32),dtype=np.float32)
    self.bestpop=[]
    self.MaxTime = 200
    self.time=0
    self.tt=0
    self.costo=[]
    self.AVF1=0
    self.AVF=1
    self.Div1=0
    self.Div=1
    self.bestWOA=1
    self.bestt1=0
    self.bestt=1
    self.rew=0
    self.fitisum=0
    self.AV5=0
    self.AV10=0
    self.AV25=0
    self.AV50=0
    self.A=1.0
    self.prob=0.5
    self.mean=0
    self.std=0
    #self.state = np.array((self.prob,self.A,self.time,self.AV5,self.AV10,self.AV25,self.AV50),dtype=np.float32)
    self.state = np.array((self.tt,self.mean,self.AV5,self.AV10,self.AV25,self.AV50,self.AVF,self.Div,self.bestt),dtype=np.float32)
    self.dim=200
    self.SearchAgents_no=30
    self.Max_iter=self.MaxTime
    self.Leader_pos=np.zeros([1,self.dim]).tolist()
    self.Leader_score=float('inf')
    self.Positions=np.random.random([self.SearchAgents_no,self.dim]).tolist()
    self.Convergence_curve=np.zeros([1,self.Max_iter+2]).tolist()[0]
    

      
  def step(self, action):
    self.prob=action[1]/5.01
    self.A=action[0]/5.01
    lb=0.001
    ub=0.999
    dim=200
    SearchAgents_no=30
    Best_score,Best_pos,WOA_cg_curve=self.WOA(SearchAgents_no,self.MaxTime,lb,ub,dim,self.CMAX)
    if self.time ==0: 
        reward =0
    else: 
        reward =self.rew+3*((self.bestWOA-Best_score)/self.bestWOA)
    self.bestWOA=Best_score
    self.bestpop.append(self.bestWOA)
    self.mean=self.bestWOA/np.mean(self.costo)
    #self.std=(np.mean(self.costo)-self.bestWOA)/np.std(self.costo)
    self.AV5=-(self.bestWOA-np.mean(self.bestpop[-5:]))/np.mean(self.bestpop[-5:])
    self.AV10=-(self.bestWOA-np.mean(self.bestpop[-10:]))/np.mean(self.bestpop[-10:])
    self.AV25=-(self.bestWOA-np.mean(self.bestpop[-25:]))/np.mean(self.bestpop[-25:])
    self.AV50=-(self.bestWOA-np.mean(self.bestpop[-50:]))/np.mean(self.bestpop[-50:])
    self.time+=1
    self.tt=self.time/self.MaxTime
    if self.time >= self.MaxTime: 
        done = True
    else:
        done = False
    info = {}
    self.costo=[]
    return np.array((self.tt,self.mean,self.AV5,self.AV10,self.AV25,self.AV50,self.AVF,self.Div,self.bestt),dtype=np.float32), reward, done, info

  def render(self):
      # Implement viz
    pass

  def reset(self):
    # Reset shower temperature
    #self.Machine,self.Configuration,self.Job,self.AMRs,self.R,self.Nodes,self.Nodesdistance,self.RecChange=problem_data()
    self.Seed+=1
    self.Machine,self.Configuration,self.Job,self.R,self.RecChange,self.W=problem_data(self.Seed)
    self.p=0.5
    # Reset shower time
    self.A=1
    self.bestWOA=1
    self.MaxTime=200
    self.bestpop=[]
    self.AVF1=0
    self.AVF=1
    self.Div1=0
    self.Div=1
    self.bestt1=0
    self.bestt=1
    self.rew=0
    self.fitisum=0
    self.AV5=0
    self.AV10=0
    self.AV25=0
    self.AV50=0
    self.costo=[]
    self.mean=0
    self.std=0
    self.time=0
    self.tt=0
    self.Leader_pos=np.zeros([1,self.dim]).tolist()
    self.Leader_score=float('inf')
    self.Positions=np.random.random([self.SearchAgents_no,self.dim]).tolist()
    self.Convergence_curve=np.zeros([1,self.Max_iter+2]).tolist()[0]
    print('Reset')
    return np.array((self.tt,self.mean,self.AV5,self.AV10,self.AV25,self.AV50,self.AVF,self.Div,self.bestt),dtype=np.float32)#np.array((self.p,self.b),dtype=np.float32)#np.array((self.temprature,self.shower_length),dtype=np.float32)
    
  def WOA(self,SearchAgents_no,Max_iter,lb,ub,dim,fobj):
    t=self.time
    if t==0:
      for i in range(0,len(self.Positions)):
          Flag4ub=(np.array(self.Positions[i])>np.array([ub,]*dim))
          Flag4lb=(np.array(self.Positions[i])<np.array([lb,]*dim))
          self.Positions[i][:]=((np.array(self.Positions[i][:])*(~(Flag4ub|Flag4lb)))+0.9*Flag4ub+0.1*Flag4lb).tolist()
          fitness=fobj(self.Positions[i])
          if fitness<self.Leader_score:
              self.Leader_score=fitness
              self.Leader_pos=self.Positions[i][:]
    a=2-t*((2)/Max_iter)
    a2=-1+t*((-1)/Max_iter)
    for i in range(0,len(self.Positions)):
        r1=np.random.rand()
        r2=np.random.rand()
        A=self.A#2*a*r1-a
        C=2*r2
        b=1
        l=(a2-1)*np.random.rand()+1
        p = np.random.rand()
      
        for j in range(len(self.Positions[1])):
            if p<self.prob:   
                if abs(A)>=1:
                    rand_leader_index = int(np.floor(SearchAgents_no*np.random.rand()))
                    X_rand = self.Positions[rand_leader_index][:]
                    D_X_rand=abs(C*X_rand[j]-self.Positions[i][j])
                    self.Positions[i][j]=X_rand[j]-A*D_X_rand
                elif abs(A)<1:
                    D_Leader=abs(C*self.Leader_pos[j]-self.Positions[i][j])
                    self.Positions[i][j]=self.Leader_pos[j]-A*D_Leader
            elif p>=self.prob:
                distance2Leader=abs(self.Leader_pos[j]-self.Positions[i][j])
                self.Positions[i][j]=distance2Leader*np.exp(b*l)*np.cos(l*2*np.pi)+self.Leader_pos[j]########??
    #print('Self.time:      ',self.time)    
    self.Convergence_curve[self.time]=self.Leader_score
    #print('Iteration:        ',t, '       Leader_score:     ',Leader_score,'        Leader_pos:     ',Leader_pos)
    fitisum=0
    for i in range(0,len(self.Positions)):
      Flag4ub=(np.array(self.Positions[i])>np.array([ub,]*dim))
      Flag4lb=(np.array(self.Positions[i])<np.array([lb,]*dim))
      self.Positions[i][:]=((np.array(self.Positions[i][:])*(~(Flag4ub|Flag4lb)))+0.9*Flag4ub+0.1*Flag4lb).tolist()
      fitness=fobj(self.Positions[i])
      self.costo.append(fitness)
      fitisum+=fitness
      if fitness<self.Leader_score:
          self.Leader_score=fitness
          self.Leader_pos=self.Positions[i][:]
    if t>0:
        self.rew=(self.fitisum-fitisum)/self.fitisum
    self.fitisum=fitisum
    if t==0:
      self.AVF1=fitisum
      self.bestt1=self.Leader_score
      Avsum=fitisum/self.SearchAgents_no
      self.Div1=sum([abs(fobj(pp)-Avsum) for pp in self.Positions])
    else:
      self.AVF=fitisum/self.AVF1
      self.bestt=self.Leader_score/self.bestt1
      Avsum=fitisum/self.SearchAgents_no
      self.Div=sum([abs(fobj(pp)-Avsum) for pp in self.Positions])/self.Div1
    return self.Leader_score,self.Leader_pos,self.Convergence_curve

  def CMAX(self, solution):
    Machine=self.Machine
    Configuration=self.Configuration
    Job=self.Job
    R=self.R
    RecChange=self.RecChange
    W=self.W
    Rp={i:{} for i in R.keys()}
    Scheduling=solution[:sum(list(Job.values()))]
    MachineConfiguration=solution[sum(list(Job.values())):2*sum(list(Job.values()))]
    AGV=solution[2*sum(list(Job.values())):3*sum(list(Job.values()))]
    for i in Job.keys():
      for j in range(1,Job[i]+1):
        listt=[1/k for k in R[i,j].values()]
        listtt=[k/(sum(listt)) for k in listt]
        list4=[(listtt[k]+sum(listtt[:k])) for k in range(len(listtt))]
        kkk=0
        for kk in R[i,j].keys():
          Rp[i,j][kk]=list4[kkk]
          kkk+=1
    Jobp=Job.copy()
    Sequence={i:list() for i in range(1,sum(list(Job.values()))+1)}
    op=lambda i : Job[i]-Jobp[i]+1

    def ls(i,B):
        return {
            1: lambda: sum(Job[j] for j in range(1,i))+Job[i]-Jobp[i],
            0: lambda: sum(Job[j] for j in range(1,i)),
        }.get(B, lambda: None)()
    
    for index in Sequence.keys():
      js={i:Scheduling[ls(i,(Jobp[i]>0)*1)] for i in Job.keys() }#if Jobp[i]>0}
      selectedjob=np.where(min(js.values())==np.array(list(js.values())))[0][0]+1
      a=MachineConfiguration[ls(selectedjob,(Jobp[selectedjob]>0)*1)]
      mc=np.where(np.array(list(Rp[selectedjob,op(selectedjob)].values()))>=a)[0][0]
      Sequence[index].append((selectedjob,op(selectedjob),list(R[selectedjob,op(selectedjob)].keys())[mc]))
      Scheduling[ls(selectedjob,(Jobp[1]>0)*1)]=1
      Jobp[selectedjob]-=1
    #print('Sequence:  ',Sequence)

    CJob={i:0 for i in Job.keys()}
    CMachine={i:0 for i in Machine}
    CWorker={i:0 for i in W}
    MachineConf={i:1 for i in Machine}
    for i in Sequence.keys():
      jobb=Sequence[i][0][0]
      operationn=Sequence[i][0][1]
      machinee=Sequence[i][0][2][0]
      configurationn=Sequence[i][0][2][1]
      workerr=Sequence[i][0][2][2]
      CMachine[machinee]+=RecChange[machinee][(MachineConf[machinee],configurationn)]
      CMachine[machinee]=max([CMachine[machinee],CJob[jobb],CWorker[workerr]])+R[(jobb,operationn)][(machinee,configurationn,workerr)]
      CJob[jobb]=CMachine[machinee]
      obje=max(list(CJob.values()))
    return obje

# Training an agent, using actor-critic algorithm

In [7]:
def make_envv(rank, seed=0):
    def _init():
        env = WOAEnv()
        env.seed(seed + rank)
        return env
    return _init

if __name__ == '__main__':
    num_cpu = 1
    env = SubprocVecEnv([make_envv(i) for i in range(num_cpu)])
    model = A2C(MlpPolicy, env, verbose=1)
    model.learn(total_timesteps=50)

Reset
---------------------------------
| explained_variance | 0.678    |
| fps                | 0        |
| nupdates           | 1        |
| policy_entropy     | 3.91     |
| total_timesteps    | 5        |
| value_loss         | 0.0252   |
---------------------------------
