# simpy 실습

In [1]:
import simpy  # pip install 후 import

----------

### 1. Environment

simpy의 environment 클래스를 인스턴스화하여 environment를 정의한다

In [14]:
env = simpy.Environment()

### 2.Process Interaction

In [15]:
class Car(object) : #Car의 인자로 들어오는것은 car라는 object. 이때 car의 속성인 환경(env)를 init에서 초기화 하기 때문에 car를 선언할 때 car=Car(env(parameter))로 선언한것
    def __init__(self,env):
        self.env = env
        self.action = env.process(self.run()) #이건 잘..
    
    def run(self): #Car의 메소드 run함수 선언 : 5초동안 charge를 하는것
        charge_duration=5 #전기차의 충전시간
        yield self.env.timeout(self.charge(charge_duration))
        
    def charge(self,duration): #duration이 지나면 timeout을 반환
        yield self.env.timeout(duration)
        

class 상식 : 파이썬 메소드의 첫 번째 인자로 항상 인스턴스(객체)가 전달된다. 즉, Car(env)로 객체를 생성했을 때 car라는 객체 정보가 항상 클래스 안 메소드(함수)의 첫 번째 인자에 들어감. 따라서 모든 함수에는 self라는 인자를 받아줘야함, 하지만 함수를 사용할때는 첫번째 인자인 객체명을 쓰지 않고, 두번째 인자부터만 쓴다. ex, self.charge(charge_duration)처럼..

__init__은 파이썬에서 사용되는 생성자로 이것이 아닌 다른값으로 인스턴스를 초기화 할 수 없음 

In [16]:
# interrupting another process

def driver(env,car): #이 함수는 클래스 내부에서 선언하지 않음. 
    yield env.timeout(3) #3초동안 휴면상태
    car.action.interrupt() #3초 휴면상태 이후 action을 interrupt

In [20]:
car= Car(env) #생성된 env에서 동작시킬 Car 클래스를 만들고, car라는 인스턴스 객체화
env.process(driver(env,car))
env.run(until=15) #15초동안 run함수에서 동작

TypeError: '<' not supported between instances of 'generator' and 'int'

### 3. Resource

In [2]:
def car(env,name,station,drivint_time,charge_duration):
    yield env.timeout(driving_time) #차가 들어올 시간
    
    print(f'{name} 차가 충전소에 {env.now}에 도착했습니다.')
    with station.request()as req:
        yield req
          
        print(f'{name} 차가 충전을 시작한 시간 {env.now}')
        yield env.timeout(charge_duration)
        print(f'{name} 차의 충전이 완료된 시간 {env.now}')
        
import simpy
env = simpy.Environment()
station = simpy.Resource(env,capacity=2)

for i in range(4):
    env.process(car(env,f'Car{i}',station,i*2,5))
    
env.run()
            

NameError: name 'driving_time' is not defined

-------------------

In [10]:
#2개의 시계가 다른 time interval로 움직이는 예제
def clock(env,name,tick):
    while True:
        print(name, env.now)
        yield env.timeout(tick)
        
env=simpy.Environment()
env.process(clock(env,'fast',0.5))
env.process(clock(env,'slow',1))

env.run(until=2)

fast 0
slow 0
fast 0.5
slow 1
fast 1.0
fast 1.5


In [11]:
#자동차 프로세스 - 주차하고 여행 떠남 (parking, driving)
def car(env):
    while True:
        print('start parking at %d' % env.now) # t:0, 
        parking_duration=5
        #환경에서 timeout이벤트 발생 (parking_duration동안 휴면)
        yield env.timeout(parking_duration)
        
        print('start driving at %d' % env.now)
        trip_duration = 2
        yield env.timeout(trip_duration)
        
env = simpy.Environment()
env.process(car(env))
env.run(until=15)

start parking at 0
start driving at 5
start parking at 7
start driving at 12
start parking at 14


자동차의 parking 시간은 5초, driving은 2초.

즉 5는 주차 -> 2는 여행(7까지) -> 5만큼 다시 주차(12까지) -> 반복 (15동안)

car라는 함수(프로세스,generator)에서 while loop를 도는동안

timeout() : 주어진 시간동안 휴면 상태 유지

즉, parking_duration =5만큼 휴면 상태. -> yield로 현재 상태 반환하고, 그 이후부터 다시 실행 -> trip_duration만큼 또 휴면 상태. 다음 while roop로

함수로 만들어진 mygenerator 객체가 for루프를 통해 처음으로 실행될 때 yield키워드를 만나기 전까지만 코드가 실행되고, 첫번째 루프의 값을 반환, 다음에는 yield키워드 뒤 코드를 실행하고 다시 반환값이 없어질때까지 같은 과정 반복!

### Q-learning NN model

In [None]:
import simpy
import pandas as pd
import numpy as np 
from collections import namedtuple
from recordtype import recordtype
import random 
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from prettytable import PrettyTable
#get_ipython().magic('matplotlib inline')
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam 
from collections import deque 


SIM_TIME = 10000000 #simulation time
# OBS_TIME = 100 # observation time cycle
# BREAK_TIME = random.expovariate(.5) # time after which we send in a new set of cassettes 

UPDATE_TARGET_NETWORK = 10

WAIT_TIME_IF_FULL = 6


# Create an environment and start the setup process
env = simpy.Environment()

n=0

Machine = recordtype('Machine', 'duration, name, station_name, queue_length, max_q_length, output, t_start')

m1 = Machine(20, 'M1','S1', 0, 3, 0, 0)
m2 = Machine(20, 'M2','S1', 0, 3, 0, 0)
m3 = Machine(20, 'M3','S1', 0, 3, 0, 0)
m4 = Machine(10, 'M4','S2', 0, 3, 0, 0)
m5 = Machine(10, 'M5','S2', 0, 3, 0, 0)
m6 = Machine(10, 'M6','S2', 0, 3, 0, 0)


station1 = [m1, m2, m3]
station2 = [m4, m5, m6]

# Total list of all the stations in the factory 
ls = [station1, station2]

#List to store the state space everytime an action takes place 
state_list = []

# List to store all the possible actions
action_list = []

# List to store the actions taken (choosing a machine) 
acted = []

# List to store the output of all the machines 
output_list = []

# The time at which a wafer enters the factory or makes a transition to a new station
ip_time = []

# The time at which a wafer enters the factory or makes a transition to a new station
op_time = []
wafer_list = []

# List to store the wait time of each wafer
wt = []

# Dictionary for q function values
#q_dict = {}

STATE_SPACE = [[m.queue_length for m in s] for s in ls]

ACTION_SPACE = [[m for m in s] for s in ls]


class DQN:
    def __init__(self):
        self.gamma = 0.85
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.tau = 0.125
        self.learning_rate = 0.005
        self.memory = deque(maxlen= 2000)
        self.model = self.create_model()
        self.target_model = self.create_model()



    # create the neural network to train the q function 
    def create_model(self):
        model = Sequential()
        st = np.array(STATE_SPACE).reshape(1,6)
        act = np.array(ACTION_SPACE).reshape(1,6)
        state_shape = st.shape
        action_shape = act.shape
        model.add(Dense(24, input_dim= (state_shape[1] +1), activation= 'relu'))
        model.add(Dense(48, activation= 'relu'))
        model.add(Dense(24, activation= 'relu'))
        model.add(Dense(action_shape[1]))
        model.compile(loss= 'mean_squared_error', optimizer= Adam(lr= self.learning_rate))
        return model 



    # Action function to choose the best action given the q-function if not exploring based on epsilon
    def choose_action(self, state, allowed_actions):
        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)
        r = np.random.random()
        if r < self.epsilon:
            print("**** Choosing a random action ****")
            return random.choice(allowed_actions)
        state = np.array(state).reshape(1,7)
        list_of_allowed_machines = []
        
        for i in range(len(allowed_actions)):
            list_of_allowed_machines.append(int(allowed_actions[i].name[-1]) - 1)

        pred = self.model.predict(state)
        pred = sum(pred.tolist(), [])
        semi_list = [pred[i] for i in list_of_allowed_machines]
        print("**** Choosing a predicted action **********************************")
        return allowed_actions[np.argmax(semi_list)]



    # create replay buffer memory to sample randomly
    def remember(self, state, action, reward, next_state, next_action, done):
        self.memory.append([state, action, reward, next_state, next_action, done])



    # build the replay buffer
    def replay(self):
        batch_size = 32
        list_of_next_allowed_machines = []
        if len(self.memory) < batch_size:
            return 
        samples = random.sample(self.memory, batch_size)
        for sample in samples:
            state, action, reward, new_state, new_action, done = sample
            state = np.array(state).reshape(1,7)
            new_state = np.array(new_state).reshape(1,7)
            target = self.target_model.predict(state)
            action_id = int(action.name[-1])-1
            if done:
                target[0][action_id] = reward
            else:
                # take max only from next_allowed_actions
                for i in range(len(new_action)):
                    list_of_next_allowed_machines.append(int(new_action[i].name[-1]) - 1)

                next_pred = self.target_model.predict(new_state)[0]
                next_pred = next_pred.tolist()
                semi_next_list = [next_pred[i] for i in list_of_next_allowed_machines]
                Q_future = max(semi_next_list)
                target[0][action_id] = reward + Q_future * self.gamma

            self.model.fit(state, target, epochs= 1, verbose= 0)



    # update our target network 
    def train_target(self):
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
        self.target_model.set_weights(target_weights)



    # save our model 
    def save_model(self, fn):
        self.model.save(fn)


    
# Start sending the wafers into the factory 
def run_incoming_cassettes(env, ls, m, n, breaktime):
    l = 0 
    dqn_agent = DQN()
    # sending in the cassettes
    while l < m:
        print("training on Cassette set C%s"%(l))
        l += 1
        k = 0
        # sending in the wafers
        while k < n:
            #env.process(wafer(i, env, ls))
            lenst = len(ls)
            i = 0
            total_run_t = 0
            t_arrival = env.now

            while(i!=lenst):
                state = [[m.queue_length for m in s] for s in ls]
                allowed_actions = [m for m in ls[i] if m.queue_length < m.max_q_length]

                # appending the current station number to the state
                state = np.append(state, i)
                print("state space", state)
                dispatch_machine = dqn_agent.choose_action(state, allowed_actions)
                print("taken action", dispatch_machine.name)
                #print("selected action", dispatch_machine.name)
                dispatch_machine.queue_length = dispatch_machine.queue_length + 1
                
                q_enter_time = env.now

                if dispatch_machine.queue_length <= 1:
                    dispatch_machine.t_start = env.now
            
                running_time = env.now - dispatch_machine.t_start            
                yield env.timeout(dispatch_machine.queue_length*dispatch_machine.duration-running_time)
                
                total_run_t += dispatch_machine.duration
                #print("total_run_t", total_run_t)
                dispatch_machine.queue_length = dispatch_machine.queue_length - 1
                
                next_state = [[m.queue_length for m in s] for s in ls]
                # appending the current station number to the state
                next_state = np.append(next_state, i+1)
                print("next state", next_state)
                reward = q_enter_time - env.now

                if i < lenst-1:
                    done = False
                    next_allowed_actions = [m for m in ls[i+1] if m.queue_length < m.max_q_length]
                else:
                    done = True

                cur_state = state 
                action = dispatch_machine
                new_state = next_state 
                new_action = next_allowed_actions
                reward = reward
                done = done

                dqn_agent.remember(cur_state, action, reward, new_state, new_action, done)
                dqn_agent.replay()

                # updating the target network less frequently
                if(l%UPDATE_TARGET_NETWORK == 0):
                    dqn_agent.train_target()


                if dispatch_machine.queue_length >=1:
                    dispatch_machine.t_start = env.now
                
                dispatch_machine.output = dispatch_machine.output + 1
               
                i = i + 1

            t_del = env.now - t_arrival
            w_del = t_del - total_run_t
            #print("env now", env.now)
            #print("t_arrival", t_arrival)
            #print("t_del", t_del)
            #print("total_run", total_run_t)
            #print("w_del", w_del)
            print("***************************")
            wt.append(w_del)

            k += 1
        yield(env.timeout(breaktime))
    print("Completed in {} cassettes".format(m))
    dqn_agent.save_model("success.model")
    


# Start Processing a set of n casset
env.process(run_incoming_cassettes(env, ls, 1*10**2, 10, 24))

# Start the simulation 
env.run(SIM_TIME)

print("Wait times of each wafer:")
print(len(wt))
print(wt)

plt.plot(wt)
plt.show()

training on Cassette set C0
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space 

state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M3
n

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
nex

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M1
n

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next sta

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M6
next state [0 0 0 0 0 0 2]
*********

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **************************

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
training on Cassette set C21
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted 

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **************************

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **************************

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
training on Cassette set C32
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action *********************

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **************************

***************************
training on Cassette set C40
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
training on Cassette set C43
state space [0 0 0 0 0 0 

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted 

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action *********************

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
training on Cassette set C51
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted 

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action *********************

***************************
training on Cassette set C59
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
training on Cassette set C62
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted 

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted 

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
training on Cassette set C70
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted 

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted 

***************************
training on Cassette set C78
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
training on Cassette set C81
state space [0 0 0 0 0 0 

state space [0 0 0 0 0 0 1]
**** Choosing a random action ****
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a random action ****
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
*********

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M2
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted 

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M4
next state [0 0 0 0 0 0 2]
***************************
training on Cassette set C89
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted

state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action *********************

***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M1
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M6
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted action **********************************
taken action M3
next state [0 0 0 0 0 0 1]
state space [0 0 0 0 0 0 1]
**** Choosing a predicted action **********************************
taken action M5
next state [0 0 0 0 0 0 2]
***************************
state space [0 0 0 0 0 0 0]
**** Choosing a predicted 

### Q-learning factory (DQN을 이용한 공장시뮬레이션인듯)

In [None]:

import simpy
import pandas as pd
import numpy as np 
from collections import namedtuple
from recordtype import recordtype
import random 
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from prettytable import PrettyTable
#get_ipython().magic('matplotlib inline')


SIM_TIME = 10000000 #simulation time
# OBS_TIME = 100 # observation time cycle
# BREAK_TIME = random.expovariate(.5) # time after which we send in a new set of cassettes 


WAIT_TIME_IF_FULL = 6


# Create an environment and start the setup process
env = simpy.Environment()

n=0

Machine = recordtype('Machine', 'duration, name, station_name, queue_length, max_q_length, output, t_start')

m1 = Machine(20, 'M1','S1', 0, 3, 0, 0)
m2 = Machine(20, 'M2','S1', 0, 3, 0, 0)
m3 = Machine(20, 'M3','S1', 0, 3, 0, 0)
m4 = Machine(10, 'M4','S2', 0, 3, 0, 0)
m5 = Machine(10, 'M5','S2', 0, 3, 0, 0)
m6 = Machine(10, 'M6','S2', 0, 3, 0, 0)


station1 = [m1, m2, m3]
station2 = [m4, m5, m6]

# Total list of all the stations in the factory 
ls = [station1, station2]

#List to store the state space everytime an action takes place 
state_list = []

# List to store all the possible actions
action_list = []

# List to store the actions taken (choosing a machine) 
acted = []

# List to store the output of all the machines 
output_list = []

# The time at which a wafer enters the factory or makes a transition to a new station
ip_time = []

# The time at which a wafer enters the factory or makes a transition to a new station
op_time = []
wafer_list = []

# List to store the wait time of each wafer
wt = []

# Dictionary for q function values
q_dict = {}

epsilon = 0.0

N_EPISODES = 10**4*3*2  # 10^24

MIN_ALPHA = 0.01

# learning rate 
alphas = np.linspace(1, MIN_ALPHA, N_EPISODES)


# Q function to maintain the Q-value table
def q(state, all_actions, action=None):
    if state not in q_dict:
        q_dict[state] = np.zeros(len(all_actions))

    if action is None:
        return q_dict[state]
    
    return q_dict[state][action]



# Action function to choose the best action given the q-table if not exploring based on epsilon
def choose_action(state, allowed_actions):
    if random.uniform(0, 1) < epsilon:
        return random.choice(allowed_actions) 
    else:
        return allowed_actions[np.argmax(q(state, allowed_actions))]



def wafer(name, env, st):
    global n
    global state_list
    global action_list
    global acted 
    global output_list
    global ip_time
    global op_time
    global wafer_list 
    global wt
    #allowed_actions = []
    l = len(st)
    i = 0
#     print("W%s entered the factory at t = %s" %(name, env.now))
#     print("-----------------------------------")
    total_run_t = 0
    t_arrival = env.now
    while(i!=l):
        state = [[m.queue_length for m in s] for s in st]
        
        state_str = str([state, i])
#        print(state_str)
#         print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
#         print('The Current queue state is:')
#         print(state_str)
#        state_list.append(state)
#         print('The allowable machines for dispatch are:')
        allowed_actions = [m for m in st[i] if m.queue_length < m.max_q_length]
        # for m in st[i]:
        #     if m.queue_length < m.max_q_length:
        #         allowed_actions.append(m)
        #     else:
        #         raise Exception('All the queues are full, cannot take in more wafers!')
        #     yield env.timeout(WAIT_TIME_IF_FULL)

        # print(type(allowed_actions))
#        action_list.append([m.name for m in allowed_actions])
#         print([m.name for m in allowed_actions])
        
        dispatch_machine = choose_action(state_str, allowed_actions)
        dispatch_machine.queue_length = dispatch_machine.queue_length + 1
        
        q_enter_time = env.now
        
#         print("W%s got %s from %s at t = %s"%(name, dispatch_machine.name, dispatch_machine.station_name, env.now))
#         print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
        
#        ip_time.append(env.now)
#        acted.append(dispatch_machine.name)
#        wafer_list.append('W'+str(name))

        if dispatch_machine.queue_length <= 1:
            dispatch_machine.t_start = env.now
            
        running_time = env.now - dispatch_machine.t_start            
        
        yield env.timeout(dispatch_machine.queue_length*dispatch_machine.duration-running_time)
        
        total_run_t += dispatch_machine.duration
#        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
        dispatch_machine.queue_length = dispatch_machine.queue_length - 1
        
        next_state = [[m.queue_length for m in s] for s in st]
        next_state_str = str([next_state, i+1])
        
        print("state",state)
        print("action taken",dispatch_machine.name)
        print("next_state", next_state)


        if i < l-1:
            next_allowed_actions = [m for m in st[i+1] if m.queue_length < m.max_q_length]
            reward = q_enter_time - env.now
        else:
            reward = q_enter_time - env.now
        
        m_index = allowed_actions.index(dispatch_machine)
        # Update your q-value for that corresponding action taken 
        q(state_str, allowed_actions)[m_index] = q(state_str, allowed_actions, m_index) + alphas[n] * (reward + np.max(q(next_state_str, next_allowed_actions)) - q(state_str, allowed_actions, m_index))
        n+=1
        
        if dispatch_machine.queue_length >=1:
            dispatch_machine.t_start = env.now
        
        dispatch_machine.output = dispatch_machine.output + 1
       
#         print("W%s released %s from %s at t = %s"%(name, dispatch_machine.name, dispatch_machine.station_name, env.now))
#        op_time.append(env.now)
#         print('the total output of each machine is:')
#         print([[m.output for m in station] for station in st])
#        output_list.append([[m.output for m in station] for station in st])
#         print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') 
        i = i + 1
    t_del = env.now - t_arrival
    w_del = t_del - total_run_t
    print("************")
    wt.append(w_del)
#     print("W%s left the factory at t = %s" %(name, env.now))
#     print("|||||||||||||||||||||||||||||||")

    
    
# Start sending the wafers into the factory 
def run_incoming_cassettes(env, ls, m, n, breaktime):
    l = 0
    # sending in the cassettes 
    while l < m:
        l += 1
        i = 0
        # sending in the wafers
        while i < n:
            env.process(wafer(i, env, ls))
            i += 1
        yield(env.timeout(breaktime))
    
# Start Processing a set of n casset
env.process(run_incoming_cassettes(env, ls, 2*10**3, 3, 24))

# Start the simulation 
env.run(SIM_TIME)


#t = PrettyTable(['Time Step', 'Wafer Name', 'Input Time', 'Queue State Space', 'Action Space', 'Action taken', '                                ', 'Output Time', 'Output State Space'])

# df = pd.DataFrame([wafer_list[i], ip_time[i], state_list[i], action_list[i], acted[i]])
# print(df)

# for i in range(len(state_list)):
#     t.add_row([i, wafer_list[i], ip_time[i], state_list[i], action_list[i], acted[i], '                             ', op_time[i], output_list[i]])
#     t.add_row([" ", " ", " ", " ", " ", " ", " ", " ", " "])


# print(t)

#np.mean(wt[1500:])

print(len(wt))

plt.plot(wt)
plt.show()


print(len(q_dict))
#print(q_dict.values())

### testing Q

In [1]:
import simpy
import pandas as pd
import numpy as np 
from collections import namedtuple
from recordtype import recordtype
import random 
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from prettytable import PrettyTable
#get_ipython().magic('matplotlib inline')
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam 
from collections import deque 

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
SIM_TIME = 10**3 #simulation time
# OBS_TIME = 100 # observation time cycle
# BREAK_TIME = random.expovariate(.5) # time after which we send in a new set of cassettes 

UPDATE_TARGET_NETWORK = 10

WAIT_TIME_IF_FULL = 6


# Create an environment and start the setup process
env = simpy.Environment()

n=0

Machine = recordtype('Machine', 'duration, name, station_name, queue_length, max_q_length, output, t_start')

m1 = Machine(10, 'M0','S1', 0, 1000, 0, 0)
m2 = Machine(10, 'M1','S1', 0, 1000, 0, 0)
m3 = Machine(10, 'M2','S1', 0, 1000, 0, 0)
m4 = Machine(20, 'M3','S2', 0, 1000, 0, 0)
m5 = Machine(20, 'M4','S2', 0, 1000, 0, 0)
m6 = Machine(20, 'M5','S2', 0, 1000, 0, 0)


station1 = [m1, m2, m3]
station2 = [m4, m5, m6]

# Total list of all the stations in the factory 
ls = [station1, station2]

# List to store the wait time of each wafer
wt = []

# A fixed state space of our simulation
STATE_SPACE = [[m.queue_length for m in s] for s in ls]#state의 공간을 만드는것. queue를 가지고있는 machine(내 simul에서는 source)이 6개면 
#여기서는 m1,m2,m3가 s1, m4,m5,m6이 s2 두 종류의 state를 갖는다. State_space는 각 queue length

# A fixed action space of our simulation
ACTION_SPACE = [[m for m in s] for s in ls]


In [None]:
STATE_SPACE # m1,m2,m3 m4,m5,m6 의 queue 길이만 저장하는 공간. 근데 왜 이름이 state_space이지? queue길이를 state로 하는건가?

In [None]:
ACTION_SPACE #state에 대한 action을 저장하는 배열이 아닐까

In [None]:
st

In [3]:
import simpy
import pandas as pd
import numpy as np 
from collections import namedtuple
from recordtype import recordtype
import random 
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from prettytable import PrettyTable
#get_ipython().magic('matplotlib inline')
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam 
from collections import deque 


SIM_TIME = 10**3 #simulation time
# OBS_TIME = 100 # observation time cycle
# BREAK_TIME = random.expovariate(.5) # time after which we send in a new set of cassettes 

UPDATE_TARGET_NETWORK = 10

WAIT_TIME_IF_FULL = 6


# Create an environment and start the setup process
env = simpy.Environment()

n=0

Machine = recordtype('Machine', 'duration, name, station_name, queue_length, max_q_length, output, t_start')

m1 = Machine(10, 'M0','S1', 0, 1000, 0, 0)
m2 = Machine(10, 'M1','S1', 0, 1000, 0, 0)
m3 = Machine(10, 'M2','S1', 0, 1000, 0, 0)
m4 = Machine(20, 'M3','S2', 0, 1000, 0, 0)
m5 = Machine(20, 'M4','S2', 0, 1000, 0, 0)
m6 = Machine(20, 'M5','S2', 0, 1000, 0, 0)


station1 = [m1, m2, m3]
station2 = [m4, m5, m6]

# Total list of all the stations in the factory 
ls = [station1, station2]

# List to store the wait time of each wafer
wt = []

# A fixed state space of our simulation
STATE_SPACE = [[m.queue_length for m in s] for s in ls]#state의 공간을 만드는것. queue를 가지고있는 machine(내 simul에서는 source)이 6개면 
#여기서는 m1,m2,m3가 s1, m4,m5,m6이 s2 두 종류의 state를 갖는다. State_space는 각 queue length

# A fixed action space of our simulation
ACTION_SPACE = [[m for m in s] for s in ls]


class DQN:
    def __init__(self): #parameter들의 초기값
        self.gamma = 0.85
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.tau = 0.125
        self.learning_rate = 0.005
        self.memory = deque(maxlen= 2000)
        self.model = self.create_model()
        self.target_model = self.create_model() #target_model이란 create_model의 return값..?



    # create the neural network to train the q function 
    def create_model(self): #Q값예측모델
        model = Sequential()
        st = np.array(STATE_SPACE).reshape(1,len(sum(STATE_SPACE, [])))#1차원 리스트로 reshape한 state_space
        act = np.array(ACTION_SPACE).reshape(1,len(sum(ACTION_SPACE, [])))
        state_shape = st.shape
        action_shape = act.shape
        model.add(Dense(24, input_dim= (state_shape[1] +1), activation= 'relu')) # input은 state
        model.add(Dense(48, activation= 'relu'))
        model.add(Dense(24, activation= 'relu'))
        model.add(Dense(action_shape[1])) 
        model.compile(loss= 'mean_squared_error', optimizer= Adam(lr= self.learning_rate))
        return model 



    # Action function to choose the best action given the q-function if not exploring based on epsilon p값에 의한 예측이 아닐때
    def choose_action(self, state, allowed_actions): #action을 선택 (parameter로 선택가능한 action이 들어옴)
        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)
        r = np.random.random()
        if r < self.epsilon: #p값보다 작은 경우 랜덤한 액션을 취함
            #print("**** Choosing a random action ****")
            return random.choice(allowed_actions) #가능한 action중 랜덤하게 선택
        state = np.array(state).reshape(1,len(state)) #p값보다 큰경우, state 배열 생성
        list_of_allowed_machines = [] 
        
        for i in range(len(allowed_actions)): # 허용가능한 action에 대해
            list_of_allowed_machines.append(int(allowed_actions[i].name[-1])) 

        pred = self.model.predict(state) #모델의 예측값(Q값?)
        pred = sum(pred.tolist(), [])
        semi_list = [pred[i] for i in list_of_allowed_machines] #허용가능범위에서 모델이 예측한 리스트
        #print("************************* Choosing a predicted action **********************************")
        return allowed_actions[np.argmax(semi_list)] #예측값중 가장 큰 action을 선택



    # create replay buffer memory to sample randomly #메모리에서 꺼내서 학습할 수 있게 저장
    def remember(self, state, action, reward, next_state, next_action, done):
        self.memory.append([state, action, reward, next_state, next_action, done])



    # build the replay buffer 저장한 것을 버퍼에서 꺼내오는.? 학습단계?
    def replay(self):
        batch_size = 32
        list_of_next_allowed_machines = []
        if len(self.memory) < batch_size: #buffer에 저장된 memory가 buffer의 총 batch_size보다 작다면 return
            return 
        samples = random.sample(self.memory, batch_size) #메모리에서 배치사이즈만큼 랜덤으로 선택
        for sample in samples:
            state, action, reward, new_state, new_action, done = sample # sample 데이터 하나를 꺼내서
            state = np.array(state).reshape(1,state.shape[0]) 
            new_state = np.array(new_state).reshape(1,new_state.shape[0])
            target = self.target_model.predict(state) #predicted state에 대해 target은 model의 output(실행결과)값. 아마 예측Q값?
            action_id = int(action.name[-1])
            if done: #전체 queue를 모두 끝냈다면
                target[0][action_id] = reward #done이면 reward를 줌
            else:
                # take max only from next_allowed_actions
                for i in range(len(new_action)):
                    list_of_next_allowed_machines.append(int(new_action[i].name[-1]))

                next_pred = self.target_model.predict(new_state)[0] #new state에서(다음)의 target값
                next_pred = next_pred.tolist()
                semi_next_list = [next_pred[i] for i in list_of_next_allowed_machines] 
                Q_future = max(semi_next_list) #다음 state에서 target 최대 값
                target[0][action_id] = reward + Q_future * self.gamma # target의 action_id번째 위치에 다음 Q값이 들어감. 맞춰야 하는 값!!!

            self.model.fit(state, target, epochs= 1, verbose= 0) #1에폭으로 학습



    # update our target network 
    def train_target(self): #학습에서 가중치를 업데이트
        weights = self.model.get_weights() 
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
        self.target_model.set_weights(target_weights)



    # save our model 
    def save_model(self, fn):
        self.model.save(fn)


# Initialize the policy and the target network
dqn_agent = DQN()


    
# Start sending the wafers into the factory 
def run_incoming_cassettes(env, ls, n, breaktime): #main함수
    global loop_num
    loop_num = 0 #simulation 전체 횟수
    # sending in the cassettes
    while True: 
        print("Working on Cassette set C%s"%(loop_num)) #현재 시뮬레이션 횟수
        loop_num += 1
        k = 0
        # sending in the wafers
        while k < n: # n번의 action을 취하는 횟수가 주어짐
            env.process(wafer(env, ls))
            k += 1
            
        yield(env.timeout(breaktime)) # wafer가 모두 돌아가면 break time동안 쉬고 함수 끝냄. breaktime 이후 다시 loop_num 증가, wafer돌림
    print("Completed in {} cassettes".format(m)) 
    dqn_agent.save_model("policy_network.h5")#모델 저장
    



def wafer(env, st):
    lenst = len(st)
    i = 0
    total_run_t = 0
    t_arrival = env.now

    while(i!=lenst): #station의 개수만큼 실행(2*5=10)
        state = [[m.queue_length for m in s] for s in st]
        allowed_actions = [m for m in st[i] if m.queue_length < m.max_q_length] #allowed action은 최대 queue length 미만인 machine

        # appending the current station number to the state
        state = np.append(state, i)
        print("current state", state)
        print(" ")
        print(" ")
        dispatch_machine = dqn_agent.choose_action(state, allowed_actions) #action 선택
        #print("selected action", dispatch_machine.name)
        dispatch_machine.queue_length = dispatch_machine.queue_length + 1 #선택된 action(machine)의 queue길이 증가
        
        q_enter_time = env.now

        if dispatch_machine.queue_length <= 1: #선택된 action의 대기중인 queue가 1이라면 지금 시작 (runningtime은 0)
            dispatch_machine.t_start = env.now
    
        running_time = env.now - dispatch_machine.t_start            
        yield env.timeout(dispatch_machine.queue_length*dispatch_machine.duration-running_time) #queue길이*하나당 소요시간 - 현재 실행한시간
        
        total_run_t += dispatch_machine.duration
        #print("total_run_t", total_run_t)
        dispatch_machine.queue_length = dispatch_machine.queue_length - 1
        
        next_state = [[m.queue_length for m in s] for s in st]

        # appending the current station number to the state
        next_state = np.append(next_state, i+1)

        # build your reward function 리워드 방정식 입력
        reward = (q_enter_time - env.now) - np.sum(state)

        if i < lenst-1: #전체 queue를 모두 처리하면 done=True가 됨
            done = False
            next_allowed_actions = [m for m in st[i+1] if m.queue_length < m.max_q_length]
        else:
            done = True

        cur_state = state 
        action = dispatch_machine
        new_state = next_state 
        new_action = next_allowed_actions
        reward = reward
        done = done

        dqn_agent.remember(cur_state, action, reward, new_state, new_action, done) #새로운 state로 설정해주고 기존state저장
        dqn_agent.replay()#학습

        # updating the target network less frequently
        if(loop_num%UPDATE_TARGET_NETWORK == 0): #가중치 업데이트하는 빈도 정하기
            dqn_agent.train_target()


        if dispatch_machine.queue_length >=1:
            dispatch_machine.t_start = env.now
        
        dispatch_machine.output = dispatch_machine.output + 1
       
        i = i + 1

    t_del = env.now - t_arrival
    w_del = t_del - total_run_t
    
    wt.append(w_del)




# Start Processing a set of n casset
env.process(run_incoming_cassettes(env, ls, 5, 25)) # 5카세트(총 시뮬레이션 횟수?) 25 브레이크

# Start the simulation 
env.run(SIM_TIME)

print("Total wafers made: {}".format(len(wt)))
print("Wait times of each wafer:")
print(wt)

plt.plot(wt)
plt.show()


Working on Cassette set C0
current state [0 0 0 0 0 0 0]
 
 
current state [0 1 0 0 0 0 0]
 
 
current state [0 1 1 0 0 0 0]
 
 
current state [1 1 1 0 0 0 0]
 
 
current state [1 2 1 0 0 0 0]
 
 
current state [1 1 2 0 0 0 1]
 
 
current state [1 1 1 0 1 0 1]
 
 
current state [0 1 1 0 1 1 1]
 
 
current state [0 0 1 1 1 1 1]
 
 

current state [0 0 0 2 1 1 1]
 
 
Working on Cassette set C1
current state [0 0 0 2 2 1 0]
 
 
current state [0 1 0 2 2 1 0]
 
 
current state [0 1 1 2 2 1 0]
 
 
current state [0 2 1 2 2 1 0]
 
 
current state [1 2 1 2 2 1 0]
 
 
current state [2 1 1 1 1 0 1]
 
 
current state [2 1 0 2 1 0 1]
 
 
current state [1 1 0 2 1 1 1]
 
 
current state [1 0 0 2 2 1 1]
 
 
current state [0 0 0 2 3 1 1]
 
 
Working on Cassette set C2
current state [0 0 0 2 2 1 0]
 
 
current state [1 0 0 2 2 1 0]
 
 
current state [2 0 0 2 2 1 0]
 
 
current state [3 0 0 2 2 1 0]
 
 
current state [3 0 1 2 2 1 0]
 
 
current state [2 1 1 2 2 0 1]
 
 
current state [2 1 0 2 2 1 1]
 
 


current state [ 6  3  0 10 20  3  1]
 
 
current state [ 5  3  0 10 21  3  1]
 
 
current state [ 5  2  0  9 21  2  1]
 
 
current state [ 4  2  0  9 22  2  1]
 
 
current state [ 4  1  0  9 23  2  1]
 
 
current state [ 3  1  0  9 23  2  1]
 
 
Working on Cassette set C21
current state [ 3  1  0  9 23  3  0]
 
 
current state [ 4  1  0  9 23  3  0]
 
 
current state [ 4  2  0  9 23  3  0]
 
 
current state [ 4  3  0  9 23  3  0]
 
 
current state [ 5  3  0  9 23  3  0]
 
 
current state [ 6  2  0  8 22  3  1]
 
 
current state [ 5  2  0  8 23  3  1]
 
 
current state [ 4  2  0  8 24  2  1]
 
 
current state [ 4  1  0  8 25  2  1]
 
 
current state [ 3  1  0  8 24  2  1]
 
 
Working on Cassette set C22
current state [ 3  1  0  8 25  2  0]
 
 
current state [ 4  1  0  8 25  2  0]
 
 
current state [ 5  1  0  8 25  2  0]
 
 
current state [ 6  1  0  8 25  2  0]
 
 
current state [ 7  1  0  8 25  2  0]
 
 
current state [ 8  0  0  8 25  2  1]
 
 
current state [ 7  0  0  8 25  2  1]
 
 
c

current state [ 8  1  0  0 73  0  1]
 
 
current state [ 8  0  0  0 74  0  1]
 
 
current state [ 7  0  0  0 74  0  1]
 
 
Total wafers made: 118
Wait times of each wafer:
[0, 0, 0, 20, 20, 0, 15, 15, 0, 35, 35, 20, 30, 30, 15, 50, 25, 35, 45, 45, 5, 40, 40, 25, 60, 60, 25, 80, 55, 20, 75, 50, 15, 95, 45, 90, 90, 110, 85, 10, 105, 80, 10, 125, 100, 120, 95, 140, 115, 135, 135, 10, 155, 105, 150, 125, 170, 145, 190, 140, 210, 160, 45, 205, 155, 40, 200, 175, 85, 220, 195, 55, 240, 215, 50, 235, 210, 45, 230, 230, 65, 250, 225, 85, 245, 245, 55, 240, 240, 50, 285, 235, 45, 280, 155, 325, 150, 320, 120, 340, 140, 335, 135, 355, 155, 350, 125, 370, 25, 390, 55, 410, 405, 425, 445, 25, 440, 485]
