# Import and Constants

In [1]:
# Import

import simpy
from simpy.events import AnyOf
import numpy as np
from numpy import random
import pandas as pd

import statistics
import matplotlib.pyplot as plt

from scipy.stats import t

In [2]:
# Constants

AMOUNT_OF_PRODUCTS = 6

a = 0.05

MACHINE_TIME_AGENT = 0.001
MACHINE_TIME_UP_1 = 2.05
MACHINE_TIME_CENTRE_1 = 8.05
MACHINE_TIME_RIGHT_1 = 6.05
MACHINE_TIME_DOWN_1 = 6.05
MACHINE_TIME_DOWN_2 = 4.05
MACHINE_TIME_QUALITY = 1.05
MACHINE_TIME_IO = 1.05

machine_time_up_1 = lambda: random.uniform(MACHINE_TIME_UP_1-a,MACHINE_TIME_UP_1+a)
machine_time_centre_1 = lambda: random.exponential(MACHINE_TIME_CENTRE_1)
machine_time_right_1 = lambda: random.exponential(MACHINE_TIME_RIGHT_1)
machine_time_down_1 = lambda: random.exponential(MACHINE_TIME_DOWN_1)
machine_time_down_2 = lambda: random.uniform(MACHINE_TIME_DOWN_2-a,MACHINE_TIME_DOWN_2+a)
machine_time_quality = lambda: random.uniform(MACHINE_TIME_QUALITY-a,MACHINE_TIME_QUALITY+a)
machine_time_io = lambda: random.uniform(MACHINE_TIME_IO-a,MACHINE_TIME_IO+a)

TRANSPORT_TIME_AGENT_UP_1 = 1
TRANSPORT_TIME_AGENT_CENTRE_1 = 1
TRANSPORT_TIME_AGENT_DOWN_1 = 1
TRANSPORT_TIME_UP_1_RIGHT_1 = 1
TRANSPORT_TIME_CENTRE_1_RIGHT_1 = 1
TRANSPORT_TIME_RIGHT_1_QUALITY = 1
TRANSPORT_TIME_DOWN_1_DOWN_2 = 1
TRANSPORT_TIME_DOWN_2_QUALITY = 1
TRANSPORT_TIME_QUALITY_IO = 1
TRANSPORT_TIME_IO_AGENT = 1

transport_time_agent_up_1 = lambda: random.uniform(1, 1.1)
transport_time_agent_centre_1 = lambda: random.uniform(1, 1.1)
transport_time_agent_down_1 = lambda: random.uniform(1, 1.1)
transport_time_up_1_right_1 = lambda: random.uniform(1, 1.1)
transport_time_centre_1_right_1 = lambda: random.uniform(1, 1.1)
transport_time_down_1_down_2 = lambda: random.uniform(1, 1.1)
transport_time_right_1_quality = lambda: random.uniform(1, 1.1)
transport_time_down_2_quality = lambda: random.uniform(1, 1.1)
transport_time_quality_io = lambda: random.uniform(1, 1.1)
transport_time_io_agent = lambda: random.uniform(1, 1.1)

SOURCE_TIME = 1

SEED_ID = 5
SEED_PALLET = 42

CAP_BUF_AGENT = 2
CAP_BUF_UP_1 = 2
CAP_BUF_CENTRE_1 = 2
CAP_BUF_RIGHT_1 = 2
CAP_BUF_DOWN_1 = 2
CAP_BUF_DOWN_2 = 2
CAP_BUF_QUALITY = 2
CAP_BUF_IO = 2

SIMULATION_LENGTH = 100000
NUM_PALLET = 20000000

T_STEP = 0.999764/2
T_START = AMOUNT_OF_PRODUCTS + 0.5
IO_LAG = 0.0001
JUNCTION_LAG = 0.0001
T_WAIT = MACHINE_TIME_AGENT*10

M = 1000
OCC = 10
TH_P = 1000
STEP = 10

PALLET_STAR = 1
flow_times = []

seed_list = [1, 2, 3, 4, 5]

# Simulation Functions

In [3]:
# Pallets

sequences = ['1234']

class Pallet:
    def __init__(self, sequence, next_ope, pallet, product):
        self.sequence = sequence
        self.next_ope = next_ope
        self.pallet = pallet
        self.product = product
        
    def __repr__(self):
        return (f"Pallet {self.pallet} carrying Product {self.product} with sequence {self.sequence}, next operation: {self.sequence[self.next_ope]}")

random.seed(SEED_PALLET)

Pallets_List = []
for i in range(NUM_PALLET):
    p = Pallet('1234', 0, 0, i+1) #sequences[random.randint(0,len(sequences)-1)]
    Pallets_List.append(p)

In [32]:
# Environments

def create_environment():

    env = simpy.Environment()

    resources = {
        'env': env,

        'machine_agent_1': simpy.Resource(env, capacity=1),
        'machine_up_1': simpy.Resource(env, capacity=1),
        'machine_centre_1': simpy.Resource(env, capacity=1),
        'machine_down_1': simpy.Resource(env, capacity=1),
        'machine_right_1': simpy.Resource(env, capacity=1),
        'machine_down_2': simpy.Resource(env, capacity=1),
        'machine_quality': simpy.Resource(env, capacity=1),
        'machine_io': simpy.Resource(env, capacity=1),

        'machine_up_1_s': simpy.Resource(env, capacity=1),
        'machine_centre_1_s': simpy.Resource(env, capacity=1),
        'machine_down_1_s': simpy.Resource(env, capacity=1),
        'machine_right_1_s': simpy.Resource(env, capacity=1),
        'machine_down_2_s': simpy.Resource(env, capacity=1),
        'machine_quality_s': simpy.Resource(env, capacity=1),
        'machine_io_s': simpy.Resource(env, capacity=1),

        'buffer_agent_1': simpy.FilterStore(env, capacity=CAP_BUF_AGENT),
        'buffer_up_1': simpy.FilterStore(env, capacity=CAP_BUF_UP_1),
        'buffer_centre_1': simpy.FilterStore(env, capacity=CAP_BUF_CENTRE_1),
        'buffer_down_1': simpy.FilterStore(env, capacity=CAP_BUF_DOWN_1),
        'buffer_right_1': simpy.FilterStore(env, capacity=CAP_BUF_RIGHT_1),
        'buffer_down_2': simpy.FilterStore(env, capacity=CAP_BUF_DOWN_2),
        'buffer_quality': simpy.FilterStore(env, capacity=CAP_BUF_QUALITY),
        'buffer_io': simpy.FilterStore(env, capacity=CAP_BUF_IO),

        'check_agent_in': env.event(),
        'check_agent_out': env.event(),
        'check_agent_blocking': env.event(),
        'check_junction_1': env.event(),
        'check_buf_up_1': env.event(),
        'check_buf_centre_1': env.event(),
        'check_buf_down_1': env.event(),
        'check_buf_right_1': env.event(),
        'check_buf_down_2': env.event(),
        'check_buf_quality': env.event(),
        'check_buf_io': env.event(),
        'check_production_event': env.event(),

        'container_right_1': simpy.Container(env, capacity=CAP_BUF_RIGHT_1+1, init=CAP_BUF_RIGHT_1+1),
        'container_quality': simpy.Container(env, capacity=CAP_BUF_QUALITY+1, init=CAP_BUF_QUALITY+1),
        'containers_list': ['right_1', 'quality'],
        'container_right_1_list': [],
        'container_quality_list': [],

        'q': 0,
        't_enter': 0,
        't_enter_agent1': 0,
        'flow': 0,
        'flow_time': [],
        'j': AMOUNT_OF_PRODUCTS,
        'th': 0, 
        'th_1': 0,
        'pr_count': 0
        }

    return env, resources

machines_list = ['up_1', 'centre_1', 'down_1', 'right_1', 'down_2', 'quality', 'io']
agents_list = ['agent_1']

machine_up_1_op = ['1']
machine_centre_1_op = ['1']
machine_down_1_op = ['1']

machine_right_1_op = ['2']
machine_down_2_op = ['2']

machine_quality_op = ['3']

machine_io_op = ['4']

all_op = set()
for machine in machines_list:
    ops = globals()[f'machine_{machine}_op']
    all_op.update(ops)
all_op = list(all_op)

In [33]:
# Production Functions (without Prints)

def normal_production_stage(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            while pallet.sequence[pallet.next_ope] in machine_ops:
                yield env.timeout(machine_time)
                pallet.next_ope += 1
            yield buffer_out.put(pallet)
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1!= res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield buffer_out.put(pallet)

    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)
    env.process(next_process_func(env, res, pallet, next_config, f, train))

def normal_io(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    io_lag = config['io_lag']
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now
                res['flow'] = env.now - res['flow']
                res['flow_time'].append(res['flow'])

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield env.timeout(machine_time)

            if pallet.pallet == PALLET_STAR:
                res['th'] = 1
                config['check_production_event'].succeed(); config['check_production_event'] = env.event()
                yield env.timeout(io_lag)
                res['th'] = 0

            if not train:
                f.write(f'{pallet.pallet} {pallet.product} {env.now} EXIT_IO\n')

            res['pr_count'] += 1
            old_pallet = pallet
            pallet = Pallets_List[res['j']]
            pallet.pallet = old_pallet.pallet
            pallet.next_ope = 0
            res['j'] += 1
            yield buffer_out.put(pallet)
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield buffer_out.put(pallet)

    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)
    env.process(next_process_func(env, res, pallet, next_config, f, train))

def production_stage_before_junction(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    container_out = config['container_out']
    container_out_list = config['container_out_list']
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now
            
            config['check_event'].succeed(); config['check_event'] = env.event()
            
            while pallet.sequence[pallet.next_ope] in machine_ops:
                yield env.timeout(machine_time)
                pallet.next_ope += 1
            yield container_out.get(1)    
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1!= res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield container_out.get(1)

    container_out_list.append(pallet)
    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)

    if pallet.pallet == PALLET_STAR:
        for th_key, th_cfg in config['th'].items():
            res[th_key] = 1
            th_cfg['config_agent']['check_junction'].succeed(); th_cfg['config_agent']['check_junction'] = env.event()    
        
        yield env.timeout(JUNCTION_LAG)

        for th_key in config['th']:
            res[th_key] = 0

    container_out_list.remove(pallet)
    yield buffer_out.put(pallet)
    env.process(next_process_func(env, res, pallet, next_config, f, train))

def normal_stage_before_junction(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    container_out = config['container_out']
    container_out_list = config['container_out_list']
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now
            
            config['check_event'].succeed(); config['check_event'] = env.event()
            
            while pallet.sequence[pallet.next_ope] in machine_ops:
                yield env.timeout(machine_time)
                pallet.next_ope += 1
            yield container_out.get(1)    
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield container_out.get(1)

    container_out_list.append(pallet)

    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)    
    container_out_list.remove(pallet)
    yield buffer_out.put(pallet)

    env.process(next_process_func(env, res, pallet, next_config, f, train))

def io_before_junction(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    container_out = config['container_out']
    container_out_list = config['container_out_list']
    io_lag = config['io_lag']
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now
                res['flow'] = env.now - res['flow']
                res['flow_time'].append(res['flow'])

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield env.timeout(machine_time)

            if pallet.pallet == PALLET_STAR:
                res['th'] = 1
                config['check_production_event'].succeed(); config['check_production_event'] = env.event()
                yield env.timeout(io_lag)
                res['th'] = 0

            if not train:
                f.write(f'{pallet.pallet} {pallet.product} {env.now} EXIT_IO\n')

            res['pr_count'] += 1
            old_pallet = pallet
            pallet = Pallets_List[res['j']]
            pallet.pallet = old_pallet.pallet
            pallet.next_ope = 0
            res['j'] += 1
            yield container_out.get(1)
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield container_out.get(1)

    container_out_list.append(pallet)
    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)
    container_out_list.remove(pallet)
    yield buffer_out.put(pallet)
    env.process(next_process_func(env, res, pallet, next_config, f, train))

def production_stage_after_junction(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    container_in = config['container_in']
    container_in_list = config['container_in_list']
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1
            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            while pallet.sequence[pallet.next_ope] in machine_ops:
                yield env.timeout(machine_time)
                pallet.next_ope += 1
            yield buffer_out.put(pallet)
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1
            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield buffer_out.put(pallet)

    yield container_in.put(1)
    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)
    env.process(next_process_func(env, res, pallet, next_config, f, train))

def io_after_junction(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    container_in = config['container_in']
    container_in_list = config['container_in_list']
    io_lag = config['io_lag']
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 
            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now
                res['flow'] = env.now - res['flow']
                res['flow_time'].append(res['flow'])

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield env.timeout(machine_time)

            if pallet.pallet == PALLET_STAR:
                res['th'] = 1
                config['check_production_event'].succeed(); config['check_production_event'] = env.event()
                yield env.timeout(io_lag)
                res['th'] = 0

            if not train:
                f.write(f'{pallet.pallet} {pallet.product} {env.now} EXIT_IO\n')

            res['pr_count'] += 1
            old_pallet = pallet
            pallet = Pallets_List[res['j']]
            pallet.pallet = old_pallet.pallet
            pallet.next_ope = 0
            res['j'] += 1
            yield buffer_out.put(pallet)
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1
            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield buffer_out.put(pallet)

    yield container_in.put(1)
    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)
    env.process(next_process_func(env, res, pallet, next_config, f, train))

def production_stage_between_junctions(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    container_in = config['container_in']
    container_in_list = config['container_in_list']
    container_out = config['container_out']
    container_out_list = config['container_out_list']
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + len(machine.users) + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()

            while pallet.sequence[pallet.next_ope] in machine_ops:
                yield env.timeout(machine_time)
                pallet.next_ope += 1
            yield container_out.get(1)    
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1
            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield container_out.get(1)
    
    container_out_list.append(pallet)
    yield container_in.put(1)

    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)

    if pallet.pallet == PALLET_STAR:
        for th_key, th_cfg in config['th'].items():
            res[th_key] = 1
            th_cfg['config_agent']['check_junction'].succeed(); th_cfg['config_agent']['check_junction'] = env.event()
        
        yield env.timeout(JUNCTION_LAG)

        for th_key in config['th']:
            res[th_key] = 0

    container_out_list.remove(pallet)
    yield buffer_out.put(pallet)

    env.process(next_process_func(env, res, pallet, next_config, f, train))

def normal_stage_between_junctions(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    container_in = config['container_in']
    container_in_list = config['container_in_list']
    container_out = config['container_out']
    container_out_list = config['container_out_list']
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()

            while pallet.sequence[pallet.next_ope] in machine_ops:
                yield env.timeout(machine_time)
                pallet.next_ope += 1
            yield container_out.get(1)    
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1

            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield container_out.get(1)
    
    container_out_list.append(pallet)
    yield container_in.put(1)

    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)

    container_out_list.remove(pallet)
    yield buffer_out.put(pallet)

    env.process(next_process_func(env, res, pallet, next_config, f, train))

def io_between_junctions(env, res, pallet, config, f, train=False):
    check_event = config['check_event']
    buffer_in = config['buffer_in']
    buffer_out = config['buffer_out']
    machine = config['machine']
    machine_s = config['machine_s']
    machine_ops = config['machine_ops']
    machine_time = config['machine_time']()
    transport_time = config['transport_time']()
    container_in = config['container_in']
    container_in_list = config['container_in_list']
    container_out = config['container_out']
    container_out_list = config['container_out_list']
    io_lag = config['io_lag']
    next_process_func = config['next_process_func']
    next_config = config['next_config']

    while True:
        if buffer_in.items[0] is pallet:
            break
        yield config['check_event']

    if pallet.sequence[pallet.next_ope] in machine_ops:
        with machine.request() as req:
            req.pallet = pallet
            yield req
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1
            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now
                res['flow'] = env.now - res['flow']
                res['flow_time'].append(res['flow'])

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield env.timeout(machine_time)

            if pallet.pallet == PALLET_STAR:
                res['th'] = 1
                config['check_production_event'].succeed(); config['check_production_event'] = env.event()
                yield env.timeout(io_lag)
                res['th'] = 0

            if not train:
                f.write(f'{pallet.pallet} {pallet.product} {env.now} EXIT_IO\n')

            res['pr_count'] += 1
            old_pallet = pallet
            pallet = Pallets_List[res['j']]
            pallet.pallet = old_pallet.pallet
            pallet.next_ope = 0
            res['j'] += 1
            yield container_out.get(1)
    else:
        with machine_s.request() as req_s:
            req_s.pallet = pallet
            yield req_s
            yield buffer_in.get(lambda obj: obj is pallet)

            for idx, items in enumerate(buffer_in.items):
                if items.pallet == PALLET_STAR:
                    if idx + 1 != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1
            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_in.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_in.items)

            if pallet.pallet == PALLET_STAR:
                res['t_enter'] = env.now

            config['check_event'].succeed(); config['check_event'] = env.event()
            yield container_out.get(1)

    container_out_list.append(pallet)
    yield container_in.put(1)
    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    yield env.timeout(transport_time)
    container_out_list.remove(pallet)
    yield buffer_out.put(pallet)
    
    env.process(next_process_func(env, res, pallet, next_config, f, train))

def production_agent_RL(env, res, pallet, config, f, train=False):
    machine_agent = config['machine_agent']
    buffer_agent = config['buffer_agent']
    machine_time_agent = config['machine_time_agent']
    agent = config['agent']
    mode = config['mode']
    actions = config['actions']
    container_in = config['container_in']
    if container_in:
        container_in_list = config['container_in_list']
    event_in = config['check_agent_in']
    event_out = config['check_agent_out']
    event_blocking = config['check_agent_blocking']
    variables = config['variables']
    aux_variables = config['aux_variables']

    with machine_agent.request() as req_agent:
        req_agent.pallet = pallet
        yield req_agent
        yield buffer_agent.get(lambda p: p == pallet)

        for idx, items in enumerate(buffer_agent.items):
            if items.pallet == PALLET_STAR:
                if idx + 1 != res['q']:
                    res['t_enter'] = env.now
                    res['q'] = idx + 1
        if container_in:
            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_agent.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_agent.items)

        if pallet.pallet == PALLET_STAR:
            res['t_enter'] = env.now
            res[config['t_enter_agent']] = env.now

        if not train:
            f.write(f'{pallet.pallet} {pallet.product} {env.now} ENTERS_AGENT\n')

        state = get_state(env, res, PALLET_STAR)

        if mode == 'train' and pallet.pallet == PALLET_STAR:
            reduced_state = reduce_state_train(state, variables, aux_variables)
        else:
            reduced_state = reduce_state_test(state, variables, aux_variables)

        yield env.timeout(machine_time_agent)

        if mode == 'train' and pallet.pallet == PALLET_STAR:
            agent_decision = agent.choose_action_train(reduced_state)
        if mode == 'train' and pallet.pallet != PALLET_STAR:
            agent_decision = agent.choose_action_test(reduced_state) 
        if mode == 'test':
            agent_decision = agent.choose_action_test(reduced_state)

        action_config = actions[agent_decision]
        reward = 0
        blocking = 0

        if mode == 'train' and pallet.pallet == PALLET_STAR:
            if 'requires_container_out' in action_config:
                if action_config['container_out'].level == 0:
                    blocking = 1
                    new_state = get_state(env, res, PALLET_STAR)
                    new_reduced_state = reduce_state_train(new_state, variables, aux_variables)
                    agent.update_agent(reduced_state, agent_decision, reward, new_reduced_state)
                    config['check_agent_out'].succeed(); config['check_agent_out'] = env.event()
            else:
                if len(action_config['buffer_out'].items) == action_config['buffer_out'].capacity:
                    blocking = 1                        
                    new_state = get_state(env, res, PALLET_STAR)
                    new_reduced_state = reduce_state_train(new_state, variables, aux_variables)
                    agent.update_agent(reduced_state, agent_decision, reward, new_reduced_state)
                    config['check_agent_out'].succeed(); config['check_agent_out'] = env.event()

        if 'requires_container_out' in action_config:
            yield action_config['container_out'].get(1)
            action_config['container_out_list'].append(pallet)
        else:
            yield action_config['buffer_out'].put(pallet)        
    
    if mode == 'train' and pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['flow'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

        if blocking == 0:
            new_state = get_state(env, res, PALLET_STAR)
            new_reduced_state = reduce_state_train(new_state, variables, aux_variables)
            agent.update_agent(reduced_state, agent_decision, reward, new_reduced_state)
            config['check_agent_out'].succeed(); config['check_agent_out'] = env.event()

    if container_in:
        yield container_in.put(1)
    
    yield env.timeout(action_config['transport_time']())
    if 'requires_container_out' in action_config:
        action_config['container_out_list'].remove(pallet)
        yield action_config['buffer_out'].put(pallet)

    env.process(action_config['next_process_func'](env, res, pallet, action_config['next_config'], f, train))

def production_agent_random(env, res, pallet, config, f, train=False):
    machine_agent = config['machine_agent']
    buffer_agent = config['buffer_agent']
    machine_time_agent = config['machine_time_agent']
    actions = config['actions'] 
    container_in = config['container_in']

    with machine_agent.request() as req_agent:
        req_agent.pallet = pallet
        yield req_agent
        yield buffer_agent.get(lambda p: p == pallet)

        f.write(f'{pallet.pallet} {pallet.product} {env.now} ENTERS_AGENT\n')
        yield env.timeout(machine_time_agent)
        agent_decision = random.choice(list(config['actions'].keys()))
        action_config = actions[agent_decision]

        if 'requires_container_out' in action_config:
            yield action_config['container_out'].get(1)
            action_config['container_out_list'].append(pallet)
        else:
            yield action_config['buffer_out'].put(pallet)

    if pallet.pallet == PALLET_STAR:
        res['flow'] = env.now

    if container_in:
        yield container_in.put(1)

    yield env.timeout(action_config['transport_time']())
    if 'requires_container_out' in action_config:
        action_config['container_out_list'].remove(pallet)
        yield action_config['buffer_out'].put(pallet)
        
    env.process(action_config['next_process_func'](env, res, pallet, action_config['next_config'], f, train))

def production_agent_heuristic_th(env, res, pallet, config, f, train=False):
    machine_agent = config['machine_agent']
    buffer_agent = config['buffer_agent']
    machine_time_agent = config['machine_time_agent']
    actions = config['actions'] 
    container_in = config['container_in']

    with machine_agent.request() as req_agent:
        req_agent.pallet = pallet
        yield req_agent
        yield buffer_agent.get(lambda p: p == pallet)
        f.write(f'{pallet.pallet} {pallet.product} {env.now} ENTERS_AGENT\n')
        yield env.timeout(machine_time_agent)

        buffer_counts = {
            action: len(actions[action]['buffer_out'].items)
            for action in actions
        }

        min_count = min(buffer_counts.values())

        candidates = [action for action, count in buffer_counts.items() if count == min_count]

        for preferred_action in config['priority']:
            if preferred_action in candidates:
                agent_decision = preferred_action
                break

        action_config = actions[agent_decision]

        if 'requires_container_out' in action_config:
            yield action_config['container_out'].get(1)
            action_config['container_out_list'].append(pallet)
        else:
            yield action_config['buffer_out'].put(pallet)

    if pallet.pallet == PALLET_STAR:
        res['flow'] = env.now

    if container_in:
        yield container_in.put(1)

    yield env.timeout(action_config['transport_time']())
    if 'requires_container_out' in action_config:
        action_config['container_out_list'].remove(pallet)
        yield action_config['buffer_out'].put(pallet)

    env.process(action_config['next_process_func'](env, res, pallet, action_config['next_config'], f, train))

def production_agent_heuristic_flow(env, res, pallet, config, f, train=False):
    machine_agent = config['machine_agent']
    buffer_agent = config['buffer_agent']
    machine_time_agent = config['machine_time_agent']
    actions = config['actions'] 
    container_in = config['container_in']

    with machine_agent.request() as req_agent:
        req_agent.pallet = pallet
        yield req_agent
        yield buffer_agent.get(lambda p: p == pallet)
        f.write(f'{pallet.pallet} {pallet.product} {env.now} ENTERS_AGENT\n')
        yield env.timeout(machine_time_agent)

        best_action = None
        min_value = float('inf')

        for direction, values in actions.items():
            buffer_len = len(values['buffer_bottleneck'].items)
            machine_users = len(values['machine_bottleneck'].users)
            machine_time = values['machine_time_bottleneck']
    
            value = (1 + buffer_len + machine_users) * machine_time
    
            if value < min_value:
                min_value = value
                best_action = direction

        agent_decision = best_action

        action_config = actions[agent_decision]

        if 'requires_container_out' in action_config:
            yield action_config['container_out'].get(1)
            action_config['container_out_list'].append(pallet)
        else:
            yield action_config['buffer_out'].put(pallet)

    if pallet.pallet == PALLET_STAR:
        res['flow'] = env.now

    if container_in:
        yield container_in.put(1)

    yield env.timeout(action_config['transport_time']())
    if 'requires_container_out' in action_config:
        action_config['container_out_list'].remove(pallet)
        yield action_config['buffer_out'].put(pallet)

    env.process(action_config['next_process_func'](env, res, pallet, action_config['next_config'], f, train))

def production_agent_simple(env, res, pallet, config, f, train=False):
    machine_agent = config['machine_agent']
    buffer_agent = config['buffer_agent']
    machine_time_agent = config['machine_time_agent']
    actions = config['actions'] 
    container_in = config['container_in']
    if container_in:
        container_in_list = config['container_in_list']

    with machine_agent.request() as req_agent:
        req_agent.pallet = pallet
        yield req_agent
        yield buffer_agent.get(lambda p: p == pallet)

        for idx, items in enumerate(buffer_agent.items):
            if items.pallet == PALLET_STAR:
                if idx + 1 != res['q']:
                    res['t_enter'] = env.now
                    res['q'] = idx + 1
        if container_in:
            for idx, items in enumerate(container_in_list):
                if items.pallet == PALLET_STAR:
                    if idx + 1 + len(buffer_agent.items) != res['q']:
                        res['t_enter'] = env.now
                        res['q'] = idx + 1 + len(buffer_agent.items)

        if pallet.pallet == PALLET_STAR:
            res['t_enter'] = env.now

        yield env.timeout(machine_time_agent)

        if pallet.sequence[pallet.next_ope] == '6':
            agent_decision = 'up'
        else:
            agent_decision = 'left'

        action_config = actions[agent_decision]

        if 'requires_container_out' in action_config:
            yield action_config['container_out'].get(1)
            action_config['container_out_list'].append(pallet)
        else:
            yield action_config['buffer_out'].put(pallet)

    if pallet.pallet == PALLET_STAR:
        res['t_enter'] = env.now
        res['q'] = get_state(env, res, PALLET_STAR)['queue']

    if container_in:
        yield container_in.put(1)

    yield env.timeout(action_config['transport_time']())

    if pallet.pallet == PALLET_STAR:
        for th_key, th_cfg in action_config['th'].items():
            res[th_key] = 1
            th_cfg['config_agent']['check_junction'].succeed(); th_cfg['config_agent']['check_junction'] = env.event()
        
        yield env.timeout(JUNCTION_LAG)

        for th_key in action_config['th']:
            res[th_key] = 0

    if 'requires_container_out' in action_config:
        action_config['container_out_list'].remove(pallet)
        yield action_config['buffer_out'].put(pallet)

    env.process(action_config['next_process_func'](env, res, pallet, action_config['next_config'], f, train))

In [34]:
# Get State Function

def get_state(env, res, pallet):

    state = {}
    pos = '0'
    queue = 0
    slot = 0
    time = env.now - res['t_enter']
    th_pr = res['th']
    next_ope = 0

    counters = {}

    for agent_name in agents_list:
        idx = agent_name.split('_')[1]

        ag_key = f'agent_{idx}'
        buf_key = f'buf_ag_{idx}'
        th_key = f'th_{idx}'
        t_enter_key = f't_enter_agent{idx}'

        ag_val = 0
        buf_val = 0

        machine_obj = res[f'machine_{agent_name}']
        buffer_obj = res[f'buffer_{agent_name}']

        if machine_obj.users:
            ag_val = 1
            if machine_obj.users[0].pallet.pallet == pallet:
                pos = f'machine_{agent_name}'
                queue = 0
                next_ope = machine_obj.users[0].pallet.sequence[machine_obj.users[0].pallet.next_ope]

        if buffer_obj.items:
            for idx_buf, item in enumerate(buffer_obj.items):
                buf_val += 1
                if item.pallet == pallet:
                    pos = f'buf_ag_{idx}'
                    queue = idx_buf+1
                    next_ope = item.sequence[item.next_ope]

        if agent_name in res['containers_list']:
            for idx_cont, item in enumerate(res[f'container_{agent_name}_list']):
                buf_val += 1
                if item.pallet == pallet:
                    pos = f'buf_ag_{idx}'
                    queue = idx_cont + len(buffer_obj.items) + 1
                    next_ope = item.sequence[item.next_ope]

        state[ag_key] = ag_val
        state[buf_key] = buf_val
        state[th_key] = res[th_key]
        state[t_enter_key] = res[t_enter_key]

    all_op = set()
    for machine in machines_list:
        name = str(machine)
        op_list = globals()[f'machine_{name}_op']
        all_op.update(op_list)
    all_op = list(all_op)

    for machine in machines_list:
        name = str(machine)
        op_list = globals()[f'machine_{name}_op']
        not_handled_ops = set(all_op) - set(op_list)

        counters[name] = 0
        for op in not_handled_ops:
            counters[f'{name}_s{op}'] = 0

        buffer = res[f'buffer_{name}']
        machine_obj = res[f'machine_{name}']

        for idx, item in enumerate(buffer.items):
            next_op = item.sequence[item.next_ope]
            if next_op in op_list:
                counters[name] += 1
                if item.pallet == pallet:
                    pos = f'machine_{name}'
                    queue = idx+1
                    next_ope = item.sequence[item.next_ope]
            elif next_op in not_handled_ops:
                counters[f'{name}_s{next_op}'] += 1
                if item.pallet == pallet:
                    pos = f'machine_{name}_s{next_op}'
                    queue = idx+1
                    next_ope = item.sequence[item.next_ope]

        if machine_obj.users:
            if machine_obj.users[0].pallet.pallet == pallet:
                pos = f'machine_{name}'
                queue = 0
                next_ope = machine_obj.users[0].pallet.sequence[machine_obj.users[0].pallet.next_ope]
            counters[name] += 1

        machine_s = res.get(f'machine_{name}_s')
        if machine_s and machine_s.users:
            next_op_s = machine_s.users[0].pallet.sequence[machine_s.users[0].pallet.next_ope] ## FIX!
            if machine_s.users[0].pallet.pallet == pallet:
                pos = f'machine_{name}_s{next_op_s}'
                queue = 0
                next_ope = machine_s.users[0].pallet.sequence[machine_s.users[0].pallet.next_ope]
            counters[f'{name}_s{next_op_s}'] += 1

        if name in res['containers_list']:
            container_pals = res.get(f'container_{name}_list', [])
            buffer = res[f'buffer_{name}']
            for idx, container_pal in enumerate(container_pals):
                if container_pal.sequence[container_pal.next_ope] in op_list:
                    counters[name] += 1
                    if container_pal.pallet == pallet:
                        pos =  f'machine_{name}'
                        queue = idx+len(buffer.items)+1
                        next_ope = container_pal.sequence[container_pal.next_ope]
                else:
                    counters[f'{name}_s{container_pal.sequence[container_pal.next_ope]}'] += 1
                    if container_pal.pallet == pallet:
                        pos = f'machine_{name}_s{container_pal.sequence[container_pal.next_ope]}'
                        queue = idx+len(buffer.items)+1
                        next_ope = container_pal.sequence[container_pal.next_ope]

    if pos == 'machine_centre_1':
        if time <= 8:
            slot = 1
        else:
            slot = 2

    if pos == 'machine_up_1' or pos == 'machine_down_2':
        if time <= 4:
            slot = 1
        else:
            slot = 2

    if pos == 'machine_agent_1':
        if time <= 4:
            slot = 1
        else:
            slot = 2

    if pos == 'machine_down_1' or pos == 'machine_right_1':
        if time <= 6:
            slot = 1
        else:
            slot = 2
    
    state.update({
        'pos': pos,
        'slot': slot,
        'queue': queue,
        'next_ope': next_ope,
        **counters,
        'th_pr': th_pr
    })

    return state

def state_to_tuple(state_dict):
    return tuple(state_dict.items())

In [35]:
# Configs

def create_configs(res, agent_function):
    global config_agent, config_up_1, config_centre_1, config_down_1, config_right_1, config_down_2, config_quality, config_io

    config_agent = {
        'machine_agent': res['machine_agent_1'],
        'buffer_agent': res['buffer_agent_1'],
        'machine_time_agent': MACHINE_TIME_AGENT,
        'actions': {
            'up': {
                'transport_time': transport_time_agent_up_1,
                'buffer_out': res['buffer_up_1'],
                'next_process_func': normal_stage_before_junction,
                'next_config': None,
                'machine_bottleneck': res['machine_right_1'],
                'machine_time_bottleneck': MACHINE_TIME_RIGHT_1,
                'buffer_bottleneck': res['buffer_right_1']
            },
            'right': {
                'transport_time': transport_time_agent_centre_1,
                'buffer_out': res['buffer_centre_1'],
                'next_process_func': normal_stage_before_junction,
                'next_config': None,
                'machine_bottleneck': res['machine_centre_1'],
                'machine_time_bottleneck': MACHINE_TIME_CENTRE_1,
                'buffer_bottleneck': res['buffer_centre_1']
            },
            'down': {
                'transport_time': transport_time_agent_down_1,
                'buffer_out': res['buffer_down_1'],
                'next_process_func': normal_production_stage,
                'next_config': None,
                'machine_bottleneck': res['machine_down_1'],
                'machine_time_bottleneck': MACHINE_TIME_DOWN_1,
                'buffer_bottleneck': res['buffer_down_1']
            },
        },
        'container_in': None,
        'priority': ['up', 'down', 'right'],
        'check_junction': res['check_junction_1'],
        't_enter_agent': res['t_enter_agent1']
    }

    config_up_1 = {
        'check_event': res['check_buf_up_1'],
        'buffer_in': res['buffer_up_1'],
        'buffer_out': res['buffer_right_1'],
        'machine': res['machine_up_1'],
        'machine_s': res['machine_up_1_s'],
        'machine_ops': machine_up_1_op,
        'machine_time': machine_time_up_1,
        'transport_time': transport_time_up_1_right_1,
        'container_out': res['container_right_1'],
        'name': 'UP1',
        'next_process_func': production_stage_between_junctions,
        'next_config': None,
        'container_out_list': res['container_right_1_list']
    }

    config_centre_1 = {
        'check_event': res['check_buf_centre_1'],
        'buffer_in': res['buffer_centre_1'],
        'buffer_out': res['buffer_right_1'],
        'machine': res['machine_centre_1'],
        'machine_s': res['machine_centre_1_s'],
        'machine_ops': machine_centre_1_op,
        'machine_time': machine_time_centre_1,
        'transport_time': transport_time_centre_1_right_1,
        'container_out': res['container_right_1'],
        'name': 'CENRTE1',
        'next_process_func': production_stage_between_junctions,
        'next_config': None,
        'container_out_list': res['container_right_1_list']
    }

    config_down_1 = {
        'check_event': res['check_buf_down_1'],
        'buffer_in': res['buffer_down_1'],
        'buffer_out': res['buffer_down_2'],
        'machine': res['machine_down_1'],
        'machine_s': res['machine_down_1_s'],
        'machine_ops': machine_down_1_op,
        'machine_time': machine_time_down_1,
        'transport_time': transport_time_down_1_down_2,
        'name': 'DOWN1',
        'next_process_func': production_stage_before_junction,
        'next_config': None,
    }
    
    config_right_1 = {
        'check_event': res['check_buf_right_1'],
        'buffer_in': res['buffer_right_1'],
        'buffer_out': res['buffer_quality'],
        'machine': res['machine_right_1'],
        'machine_s': res['machine_right_1_s'],
        'machine_ops': machine_right_1_op,
        'machine_time': machine_time_right_1,
        'transport_time': transport_time_right_1_quality,
        'container_in': res['container_right_1'],
        'container_in_list': res['container_right_1_list'],
        'container_out': res['container_quality'],
        'container_out_list': res['container_quality_list'],
        'name': 'RIGHT1',
        'next_process_func': production_stage_after_junction,
        'next_config': None,
        'th': {'th_1': {'config_agent': None}},
    }

    config_down_2 = {
        'check_event': res['check_buf_down_2'],
        'buffer_in': res['buffer_down_2'],
        'buffer_out': res['buffer_quality'],
        'machine': res['machine_down_2'],
        'machine_s': res['machine_down_2_s'],
        'machine_ops': machine_down_2_op,
        'machine_time': machine_time_down_2,
        'transport_time': transport_time_down_2_quality,
        'container_out': res['container_quality'],
        'container_out_list': res['container_quality_list'],
        'name': 'DOWN2',
        'next_process_func': production_stage_after_junction,
        'next_config': None,
        'th': {'th_1': {'config_agent': None}},
    }

    config_quality = {
        'check_event': res['check_buf_quality'],
        'buffer_in': res['buffer_quality'],
        'buffer_out': res['buffer_io'],
        'machine': res['machine_quality'],
        'machine_s': res['machine_quality_s'],
        'machine_ops': machine_quality_op,
        'machine_time': machine_time_quality,
        'transport_time': transport_time_quality_io,
        'container_in': res['container_quality'],
        'container_in_list': res['container_quality_list'],
        'name': 'QUALITY',
        'next_process_func': normal_io,
        'next_config': None
    }

    config_io = {
        'check_event': res['check_buf_io'],
        'buffer_in': res['buffer_io'],
        'buffer_out': res['buffer_agent_1'],
        'machine': res['machine_io'],
        'machine_s': res['machine_io_s'],
        'machine_ops': machine_io_op,
        'machine_time': machine_time_io,
        'transport_time': transport_time_io_agent,
        'io_lag': IO_LAG,
        'name': 'I/O',
        'next_process_func': agent_function,
        'next_config': None,
        'check_production_event': res['check_production_event']
    }

    config_agent['actions']['up']['next_config'] = config_up_1
    config_agent['actions']['right']['next_config'] = config_centre_1
    config_agent['actions']['down']['next_config'] = config_down_1
    
    config_up_1['next_config'] = config_right_1
    config_centre_1['next_config'] = config_right_1
    config_down_1['next_config'] = config_down_2
    config_right_1['next_config'] = config_quality
    config_down_2['next_config'] = config_quality
    config_quality['next_config'] = config_io
    config_io['next_config'] = config_agent

    config_right_1['th']['th_1']['config_agent'] = config_agent
    config_down_2['th']['th_1']['config_agent'] = config_agent

In [36]:
# Source

def source(env, res, agent_function, file):
    for i in range(AMOUNT_OF_PRODUCTS):
        t_source = SOURCE_TIME
        yield env.timeout(t_source)
        pallet_type = Pallets_List[i]
        pallet_type.pallet = i+1
        pallet_type.next_ope = 0
        yield res['buffer_agent_1'].put(pallet_type)
        que_agent = agent_function(env, res, pallet_type, config_agent, file)
        env.process(que_agent)

# Random Policy

In [28]:
# Process and Run

counts_random = []

for seed in seed_list:
    random.seed(seed)
    env, resources = create_environment()
    create_configs(resources, production_agent_random)
    log_filename = f"RandomPolicy_{seed}.txt"
    f = open(log_filename, "w")
    env.process(source(env, resources, production_agent_random, f))
    env.run(until=SIMULATION_LENGTH)
    f.close()
    counts_random.append(resources['pr_count'])
    print('Total Production of Random Policy: ', resources['pr_count'])

Total Production of Random Policy:  20425
Total Production of Random Policy:  20761
Total Production of Random Policy:  20561
Total Production of Random Policy:  20426
Total Production of Random Policy:  20799


# Heuristic Policy Th

In [29]:
# Process and Run

counts_heuristic_th = []

for seed in seed_list:
    random.seed(seed)
    env, resources = create_environment()
    create_configs(resources, production_agent_heuristic_th)
    log_filename = f"HeuristicPolicyTh_{seed}.txt"
    f = open(log_filename, "w")
    env.process(source(env, resources, production_agent_heuristic_th, f))
    env.run(until=SIMULATION_LENGTH)
    f.close()
    counts_heuristic_th.append(resources['pr_count'])
    print('Total Production of Heuristic (for Throughput) Policy: ', resources['pr_count'])

Total Production of Heuristic (for Throughput) Policy:  21703
Total Production of Heuristic (for Throughput) Policy:  22000
Total Production of Heuristic (for Throughput) Policy:  21639
Total Production of Heuristic (for Throughput) Policy:  21874
Total Production of Heuristic (for Throughput) Policy:  21721


# Heuristic Policy Flow

In [37]:
# Process and Run

counts_heuristic_flow = []

for seed in seed_list:
    random.seed(seed)
    env, resources = create_environment()
    create_configs(resources, production_agent_heuristic_flow)
    log_filename = f"HeuristicPolicyFlow_{seed}.txt"
    f = open(log_filename, "w")
    env.process(source(env, resources, production_agent_heuristic_flow, f))
    env.run(until=SIMULATION_LENGTH)
    f.close()
    counts_heuristic_flow.append(resources['pr_count'])
    print('Total Production of Heuristic (for Flow) Policy: ', resources['pr_count'])

Total Production of Heuristic (for Flow) Policy:  25418
Total Production of Heuristic (for Flow) Policy:  25379
Total Production of Heuristic (for Flow) Policy:  25552
Total Production of Heuristic (for Flow) Policy:  25253
Total Production of Heuristic (for Flow) Policy:  25473


# RL Agent

In [9]:
# Q-Table Print Functions

def print_q_table(agent):
    print("Q-table:")
    for (state_key, action), q_value in agent.q_table.items():
        state_dict = dict(state_key)
        state_dict['next_ope'] = str(state_dict['next_ope'])
        state_str = ', '.join([
            f"{k}=\'{v}\'" if k == 'next_ope' else f"{k}={v}"
            for k, v in state_dict.items()
        ])
        print(f"State: ({state_str}), Action: {action} -> Q-value: {q_value:.4f}")

def print_sorted_q_table(agent):
    from collections import defaultdict
    grouped_q = defaultdict(list)

    for (state_key, action), q_value in agent.q_table.items():
        grouped_q[state_key].append((action, q_value))

    sorted_states = sorted(grouped_q.keys(), key=lambda x: tuple((k, str(v)) for k, v in sorted(x)))

    print("Sorted Q-table:")
    for state_key in sorted_states:
        state_dict = dict(state_key)

        if 'next_ope' in state_dict:
            state_dict['next_ope'] = f"\'{state_dict['next_ope']}\'"

        state_str = ', '.join([
            f"{k}={v}" for k, v in state_dict.items()
        ])
        print(f"\nState: ({state_str})")

        for action, q_value in sorted(grouped_q[state_key], key=lambda x: x[0]):
            print(f"  Azione: {action} -> Q-value: {q_value:.4f}")

In [10]:
# Agent Function

class QLearningAgent:
    def __init__(self, actions, name, alpha=0.4, gamma=0.99, epsilon=0.1):
        self.q_table = {}
        self.actions = actions
        self.actions_dense = ['wait']
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.name = name
        self.pos = 'machine_' + self.name #.replace('ag_', 'agent_') # Qui prima serviva replace, adesso solo +self.name

    def step_dense(self, new_state, perc):
        reward = TH_P*new_state['th_pr'] - perc*STEP
        return reward

    def choose_action_train(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(self.actions)
        state_key = state_to_tuple(state)
        q_values = [(a, self.q_table.get((state_key, a), 0)) for a in self.actions]
        max_value = max(q_values, key=lambda x: x[1])[1]
        best_actions = [a for a, v in q_values if v == max_value]
        return random.choice(best_actions)

    def choose_action_test(self, state, p=False):
        adjusted_state = dict(state)
        adjusted_state['pos'] = self.pos
        adjusted_state['queue'] = 0
        adjusted_state['slot'] = 1
        adjusted_state['next_ope'] = '1'

        if p == True:
            print('Stato Agente Ridotto:', adjusted_state)

        state_key = state_to_tuple(adjusted_state)
        q_values = [(a, self.q_table.get((state_key, a), 0)) for a in self.actions]
        max_value = max(q_values, key=lambda x: x[1])[1]
        best_actions = [a for a, v in q_values if v == max_value]
        
        if p == True:
            print('Azione', self.name, ':', best_actions)

        return random.choice(best_actions)

    def update_agent(self, state, action, reward, new_state, pr=False):
        state_key = state_to_tuple(state)
        new_state_key = state_to_tuple(new_state)

        if pr == True:
            print('')
            print('Stato Iniziale Agente:', state)
            print('Azione:', action)
            print('Q-Value Stato Iniziale Agente:', self.q_table.get((state_key, 'wait'), 0))

        q_predict = self.q_table.get((state_key, action), 0)
        q_target = reward + self.gamma * self.q_table.get((new_state_key, 'wait'), 0)
        self.q_table[(state_key, action)] = q_predict + self.alpha * (q_target - q_predict)
        
        if pr == True:
            print('Stato Finale Agente:', new_state)
            print('Reward:', reward)
            print('Q-Value Stato Finale Agente:', self.q_table.get((new_state_key, 'wait'), 0))
            print('Q-Value Stato Iniziale Agente Aggiornato:', self.q_table[(state_key, action)])

In [11]:
# Reduced State Function

def reduce_state_train(state, variables, aux_variables):

    mandatory_vars = ['pos', 'queue', 'slot']
    variables = list(set(variables) | set(mandatory_vars))
    
    state_reduced = {k: state[k] for k in variables if k in state}

    for new_var, sources in aux_variables.items():
        state_reduced[new_var] = sum(state[s] for s in sources if s in state)

    for new_var, sources in aux_variables.items():
        for s in sources:
            if state['pos'] == f'machine_{s}' or state['pos'] == s:
                state_reduced['pos'] = new_var
                state_reduced['queue'] = 0
                x = state_reduced['pos'].split('_')[1]   # estrae l'indice, es. 'bb_3' → '3'
                t_enter_agent = state[f't_enter_agent{x}']
                state_reduced['slot'] = 100 - round((env.now-t_enter_agent)/2)
                break

    return state_reduced

def reduce_state_test(state, variables, aux_variables):

    mandatory_vars = ['pos', 'queue', 'slot']
    variables = list(set(variables) | set(mandatory_vars))
    
    state_reduced = {k: state[k] for k in variables if k in state}

    for new_var, sources in aux_variables.items():
        state_reduced[new_var] = sum(state[s] for s in sources if s in state)
        state_reduced['queue'] = 0

    return state_reduced

In [12]:
# Configs

agent_actions = ['up', 'right', 'down']
agent = QLearningAgent(agent_actions, 'agent_1')
agents_RL = [agent]

def create_configs_RL(res, agent, mode):
    global config_agent, config_up_1, config_centre_1, config_down_1, config_right_1, config_down_2, config_quality, config_io

    config_agent = {
    'machine_agent': res['machine_agent_1'],
    'buffer_agent': res['buffer_agent_1'],
    'machine_time_agent': MACHINE_TIME_AGENT,
    'agent': agent,
    'mode': mode,
    'actions': {
            'up': {
                'transport_time': transport_time_agent_up_1,
                'buffer_out': res['buffer_up_1'],
                'next_process_func': normal_stage_before_junction,
                'next_config': None,
                'machine_bottleneck': res['machine_right_1'],
                'machine_time_bottleneck': MACHINE_TIME_RIGHT_1,
                'buffer_bottleneck': res['buffer_right_1']
            },
            'right': {
                'transport_time': transport_time_agent_centre_1,
                'buffer_out': res['buffer_centre_1'],
                'next_process_func': normal_stage_before_junction,
                'next_config': None,
                'machine_bottleneck': res['machine_centre_1'],
                'machine_time_bottleneck': MACHINE_TIME_CENTRE_1,
                'buffer_bottleneck': res['buffer_centre_1']
            },
            'down': {
                'transport_time': transport_time_agent_down_1,
                'buffer_out': res['buffer_down_1'],
                'next_process_func': normal_production_stage,
                'next_config': None,
                'machine_bottleneck': res['machine_down_2'],
                'machine_time_bottleneck': MACHINE_TIME_DOWN_2,
                'buffer_bottleneck': res['buffer_down_2']
            },
        },
    'container_in': None,
    'variables': ['buf_ag_1', 'agent_1', 'next_ope', 'up_1', 'centre_1', 'down_1', 'right_1', 'down_2', 'quality', 'th_1'],
    'aux_variables': {},
    'check_agent_in': res['check_agent_in'],
    'check_agent_out': res['check_agent_out'],
    'check_agent_blocking': res['check_agent_blocking'],
    'check_junction': res['check_junction_1'],
    't_enter_agent': res['t_enter_agent1']
}

    config_up_1 = {
        'check_event': res['check_buf_up_1'],
        'buffer_in': res['buffer_up_1'],
        'buffer_out': res['buffer_right_1'],
        'machine': res['machine_up_1'],
        'machine_s': res['machine_up_1_s'],
        'machine_ops': machine_up_1_op,
        'machine_time': machine_time_up_1,
        'transport_time': transport_time_up_1_right_1,
        'container_out': res['container_right_1'],
        'name': 'UP1',
        'next_process_func': production_stage_between_junctions,
        'next_config': None,
        'container_out_list': res['container_right_1_list']
    }

    config_centre_1 = {
        'check_event': res['check_buf_centre_1'],
        'buffer_in': res['buffer_centre_1'],
        'buffer_out': res['buffer_right_1'],
        'machine': res['machine_centre_1'],
        'machine_s': res['machine_centre_1_s'],
        'machine_ops': machine_centre_1_op,
        'machine_time': machine_time_centre_1,
        'transport_time': transport_time_centre_1_right_1,
        'container_out': res['container_right_1'],
        'name': 'CENRTE1',
        'next_process_func': production_stage_between_junctions,
        'next_config': None,
        'container_out_list': res['container_right_1_list']
    }

    config_down_1 = {
        'check_event': res['check_buf_down_1'],
        'buffer_in': res['buffer_down_1'],
        'buffer_out': res['buffer_down_2'],
        'machine': res['machine_down_1'],
        'machine_s': res['machine_down_1_s'],
        'machine_ops': machine_down_1_op,
        'machine_time': machine_time_down_1,
        'transport_time': transport_time_down_1_down_2,
        'name': 'DOWN1',
        'next_process_func': production_stage_before_junction,
        'next_config': None,
    }
    
    config_right_1 = {
        'check_event': res['check_buf_right_1'],
        'buffer_in': res['buffer_right_1'],
        'buffer_out': res['buffer_quality'],
        'machine': res['machine_right_1'],
        'machine_s': res['machine_right_1_s'],
        'machine_ops': machine_right_1_op,
        'machine_time': machine_time_right_1,
        'transport_time': transport_time_right_1_quality,
        'container_in': res['container_right_1'],
        'container_in_list': res['container_right_1_list'],
        'container_out': res['container_quality'],
        'container_out_list': res['container_quality_list'],
        'name': 'RIGHT1',
        'next_process_func': production_stage_after_junction,
        'next_config': None,
        'th': {'th_1': {'config_agent': None}}
    }

    config_down_2 = {
        'check_event': res['check_buf_down_2'],
        'buffer_in': res['buffer_down_2'],
        'buffer_out': res['buffer_quality'],
        'machine': res['machine_down_2'],
        'machine_s': res['machine_down_2_s'],
        'machine_ops': machine_down_2_op,
        'machine_time': machine_time_down_2,
        'transport_time': transport_time_down_2_quality,
        'container_out': res['container_quality'],
        'container_out_list': res['container_quality_list'],
        'name': 'DOWN2',
        'next_process_func': production_stage_after_junction,
        'next_config': None,
        'th': {'th_1': {'config_agent': None}},
    }

    config_quality = {
        'check_event': res['check_buf_quality'],
        'buffer_in': res['buffer_quality'],
        'buffer_out': res['buffer_io'],
        'machine': res['machine_quality'],
        'machine_s': res['machine_quality_s'],
        'machine_ops': machine_quality_op,
        'machine_time': machine_time_quality,
        'transport_time': transport_time_quality_io,
        'container_in': res['container_quality'],
        'container_in_list': res['container_quality_list'],
        'name': 'QUALITY',
        'next_process_func': normal_io,
        'next_config': None
    }

    config_io = {
        'check_event': res['check_buf_io'],
        'buffer_in': res['buffer_io'],
        'buffer_out': res['buffer_agent_1'],
        'machine': res['machine_io'],
        'machine_s': res['machine_io_s'],
        'machine_ops': machine_io_op,
        'machine_time': machine_time_io,
        'transport_time': transport_time_io_agent,
        'io_lag': IO_LAG,
        'name': 'I/O',
        'next_process_func': production_agent_RL,
        'next_config': None,
        'check_production_event': res['check_production_event']
    }

    config_agent['actions']['up']['next_config'] = config_up_1
    config_agent['actions']['right']['next_config'] = config_centre_1
    config_agent['actions']['down']['next_config'] = config_down_1
    
    config_up_1['next_config'] = config_right_1
    config_centre_1['next_config'] = config_right_1
    config_down_1['next_config'] = config_down_2
    config_right_1['next_config'] = config_quality
    config_down_2['next_config'] = config_quality
    config_quality['next_config'] = config_io
    config_io['next_config'] = config_agent

    config_right_1['th']['th_1']['config_agent'] = config_agent
    config_down_2['th']['th_1']['config_agent'] = config_agent

## Training

In [13]:
# Source Training

def source_train_RL(env, res, file):
    for i in range(AMOUNT_OF_PRODUCTS):
        t_source = SOURCE_TIME
        yield env.timeout(t_source)
        pallet_type = Pallets_List[i]
        pallet_type.pallet = i+1
        pallet_type.next_ope = 0
        yield res['buffer_agent_1'].put(pallet_type)
        que_agent = production_agent_RL(env, res, pallet_type, config_agent, file, train=True)
        env.process(que_agent)

    env.process(rl_dense(env, res, T_STEP, T_START, config_agent, 'th_1'))

def rl_dense(env, res, t_sampling, t_start, config_agent, th_code):
    if env.now < t_start:
        yield env.timeout(t_start - env.now)

    yield config_agent['check_agent_out']
    while True:
        action = 'wait'
        state = get_state(env, res, PALLET_STAR)
        reduced_state = reduce_state_train(state, config_agent['variables'], config_agent['aux_variables'])
        clock = env.now
        yield AnyOf(env, [env.timeout(t_sampling), config_agent['check_junction']])
        clock = (env.now - clock)/t_sampling
        new_state = get_state(env, res, PALLET_STAR)
        new_reduced_state = reduce_state_train(new_state, config_agent['variables'], config_agent['aux_variables'])
        reward = TH_P*new_reduced_state[th_code] - clock*STEP
        config_agent['agent'].update_agent(reduced_state, action, reward, new_reduced_state)

        #print('Users Agent 2:', [item.pallet for item in res['machine_agent_2'].users])
        #print('Buffer Agent 2 List:', [item for item in res['buffer_agent_2'].items])
        #print('Container Agent 2 List:', res['container_agent_2_list'])

        if new_reduced_state[th_code] == 1:
            yield config_agent['check_agent_out']

In [14]:
# Process and Run

seed_list_training = [1]
counts_training = []

for seed in seed_list_training:
    random.seed(seed)
    env, resources = create_environment()
    create_configs_RL(resources, agent, 'train')
    log_filename = f"RLTraining_{seed}.txt"
    f = open(log_filename, "w")
    env.process(source_train_RL(env, resources, f))
    env.run(until=SIMULATION_LENGTH*50)
    f.close()
    counts_training.append(resources['pr_count'])
    print('Total Production of RL (Training): ', resources['pr_count'])

Total Production of RL (Training):  1360778


In [15]:
# Print Q-Table

for idx, ag in enumerate(agents_RL):
    print('Agent', idx+1)
    print_q_table(ag)
    print('')
    print('')

Agent 1
Q-table:
State: (quality=0, agent_1=1, pos=machine_agent_1, up_1=0, down_1=0, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=0, centre_1=0, queue=0, down_2=0), Action: up -> Q-value: 0.0000
State: (quality=0, agent_1=1, pos=machine_agent_1, up_1=0, down_1=2, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=3, centre_1=0, queue=0, down_2=0), Action: right -> Q-value: 291.5620
State: (quality=0, agent_1=0, pos=machine_centre_1, up_1=0, down_1=2, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=3, centre_1=1, queue=1, down_2=0), Action: wait -> Q-value: 282.1808
State: (quality=0, agent_1=0, pos=machine_centre_1, up_1=0, down_1=2, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=3, centre_1=1, queue=0, down_2=0), Action: wait -> Q-value: 300.8611
State: (quality=0, agent_1=0, pos=machine_centre_1, up_1=0, down_1=1, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=3, centre_1=1, queue=0, down_2=1), Action: wait -> Q-value: 326.3636
State: (quality=0, agent_1=0, pos=machine_c

In [16]:
# Print Sorted Q-Table

for idx, ag in enumerate(agents_RL):
    print('Agent', idx+1)
    print_sorted_q_table(ag)
    print('')
    print('')

Agent 1
Sorted Q-table:

State: (quality=0, agent_1=0, pos=machine_up_1, up_1=3, down_1=0, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=1, centre_1=0, queue=0, down_2=0)
  Azione: wait -> Q-value: 19.4450

State: (quality=0, agent_1=0, pos=machine_up_1, up_1=2, down_1=0, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=2, centre_1=0, queue=0, down_2=0)
  Azione: wait -> Q-value: 178.5015

State: (quality=0, agent_1=0, pos=machine_up_1, up_1=3, down_1=0, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=2, centre_1=0, queue=0, down_2=0)
  Azione: wait -> Q-value: -23.5913

State: (quality=0, agent_1=0, pos=machine_up_1, up_1=1, down_1=0, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=3, centre_1=0, queue=0, down_2=0)
  Azione: wait -> Q-value: 338.0655

State: (quality=0, agent_1=0, pos=machine_up_1, up_1=2, down_1=0, next_ope='1', buf_ag_1=0, th_1=0, slot=1, right_1=3, centre_1=0, queue=0, down_2=0)
  Azione: wait -> Q-value: -12.7590

State: (quality=0, agent_1=0, pos=machine

## Test

In [17]:
# Process and Run

counts_test = []

for seed in seed_list:
    random.seed(seed)
    env, resources = create_environment()
    create_configs_RL(resources, agent, 'test')
    log_filename = f"RLTest_{seed}.txt"
    f = open(log_filename, "w")
    env.process(source(env, resources, production_agent_RL, f))
    env.run(until=SIMULATION_LENGTH)
    f.close()
    counts_test.append(resources['pr_count'])
    print('Total Production of RL (Test): ', resources['pr_count'])

Total Production of RL (Test):  27502
Total Production of RL (Test):  27435
Total Production of RL (Test):  27743
Total Production of RL (Test):  27501
Total Production of RL (Test):  27533


# Comparison

In [19]:
# Comparison Agent (Random Policy)

decisions_th = []
decisions_ft = []
decisions_rl = []

def production_agent_comparison(env, res, pallet, config, f, train=False):
    machine_agent = config['machine_agent']
    buffer_agent = config['buffer_agent']
    machine_time_agent = config['machine_time_agent']
    actions = config['actions'] 
    container_in = config['container_in']
    
    variables = ['buf_ag_1', 'agent_1', 'next_ope', 'up_1', 'centre_1', 'down_1', 'right_1', 'down_2', 'quality', 'th_1']
    aux_variables = {}

    global agent
    global decisions_th
    global decisions_ft
    global decisions_rl

    with machine_agent.request() as req_agent:
        req_agent.pallet = pallet
        yield req_agent
        yield buffer_agent.get(lambda p: p == pallet)

        state = get_state(env, res, PALLET_STAR)
        reduced_state = reduce_state_test(state, variables, aux_variables)

        f.write(f'{pallet.pallet} {pallet.product} {env.now} ENTERS_AGENT\n')
        yield env.timeout(machine_time_agent)

        # Heuristic Throughput
        buffer_counts = {
            action: len(actions[action]['buffer_out'].items)
            for action in actions
        }
        min_count = min(buffer_counts.values())
        candidates = [action for action, count in buffer_counts.items() if count == min_count]
        for preferred_action in config['priority']:
            if preferred_action in candidates:
                agent_th_decision = preferred_action
                break
        decisions_th.append(agent_th_decision)

        # Heuristic Flow Time
        best_action = None
        min_value = float('inf')
        for direction, values in actions.items():
            buffer_len = len(values['buffer_bottleneck'].items)
            machine_users = len(values['machine_bottleneck'].users)
            machine_time = values['machine_time_bottleneck']
            value = (1 + buffer_len + machine_users) * machine_time
            if value < min_value:
                min_value = value
                best_action = direction

        agent_ft_decision = best_action
        decisions_ft.append(agent_ft_decision)

        # Reinforcement Learning
        agent_rl_decision = agent.choose_action_test(reduced_state)
        decisions_rl.append(agent_rl_decision)

        # Controllo
        if random.random() < 0.1:
            print('')
            print('Stato:', reduced_state)
            print('Scelta Agente TH:', agent_th_decision)
            print('Scelta Agente FT:', agent_ft_decision)
            print('Scelta Agente RL:', agent_rl_decision)

        # Random
        agent_decision = random.choice(list(config['actions'].keys()))
        action_config = actions[agent_decision]

        if 'requires_container_out' in action_config:
            yield action_config['container_out'].get(1)
            action_config['container_out_list'].append(pallet)
        else:
            yield action_config['buffer_out'].put(pallet)

    if pallet.pallet == PALLET_STAR:
        res['flow'] = env.now

    if container_in:
        yield container_in.put(1)

    yield env.timeout(action_config['transport_time']())
    if 'requires_container_out' in action_config:
        action_config['container_out_list'].remove(pallet)
        yield action_config['buffer_out'].put(pallet)
        
    env.process(action_config['next_process_func'](env, res, pallet, action_config['next_config'], f, train))

In [20]:
# Process and Run

counts_random = []
seed_list_comparison = [6]

for seed in seed_list_comparison:
    random.seed(seed)
    env, resources = create_environment()
    create_configs(resources, production_agent_comparison)
    log_filename = f"RandomPolicy_{seed}.txt"
    f = open(log_filename, "w")
    env.process(source(env, resources, production_agent_comparison, f))
    env.run(until=SIMULATION_LENGTH)
    f.close()
    counts_random.append(resources['pr_count'])
    print('Total Production of Random Policy: ', resources['pr_count'])


Stato: {'quality': 0, 'up_1': 0, 'queue': 0, 'next_ope': '1', 'slot': 1, 'buf_ag_1': 0, 'right_1': 3, 'centre_1': 1, 'pos': 'machine_agent_1', 'agent_1': 1, 'down_1': 1, 'th_1': 0, 'down_2': 0}
Scelta Agente TH: up
Scelta Agente FT: down
Scelta Agente RL: down

Stato: {'quality': 0, 'up_1': 0, 'queue': 0, 'next_ope': '2', 'slot': 1, 'buf_ag_1': 1, 'right_1': 1, 'centre_1': 1, 'pos': 'machine_down_2', 'agent_1': 1, 'down_1': 0, 'th_1': 0, 'down_2': 1}
Scelta Agente TH: up
Scelta Agente FT: down
Scelta Agente RL: up

Stato: {'quality': 1, 'up_1': 0, 'queue': 0, 'next_ope': '3', 'slot': 0, 'buf_ag_1': 0, 'right_1': 3, 'centre_1': 0, 'pos': 'machine_quality', 'agent_1': 1, 'down_1': 0, 'th_1': 0, 'down_2': 0}
Scelta Agente TH: up
Scelta Agente FT: down
Scelta Agente RL: down

Stato: {'quality': 1, 'up_1': 0, 'queue': 0, 'next_ope': '1', 'slot': 1, 'buf_ag_1': 0, 'right_1': 3, 'centre_1': 0, 'pos': 'machine_agent_1', 'agent_1': 1, 'down_1': 1, 'th_1': 0, 'down_2': 0}
Scelta Agente TH: up
S

In [29]:
# Results Comparison

def compare(decisions1, decisions2):
    count = 0
    for idx, decision in enumerate(decisions1):
        if decision == decisions2[idx]:
            count += 1
    agree = count/len(decisions1)*100

    return agree, count

In [31]:
# Comparison 

print(f'Accordi TH - FT: {compare(decisions_th, decisions_ft)[0]:.2f}%')
print(f'Accordi TH - RL: {compare(decisions_th, decisions_rl)[0]:.2f}%')
print(f'Accordi FT - RL: {compare(decisions_ft, decisions_rl)[0]:.2f}%')

Accordi TH - FT: 14.28%
Accordi TH - RL: 33.51%
Accordi FT - RL: 60.85%
