## Single-arm cluster tool with MLIF flow wafers & purge operation

* Process times sampled between 10-300 seconds
* Purge times sampled between 0-200 seconds
* Baseline robot movement sequence uses backward(z) sequence
* Trained model checkpoint file loaded from ./saved_models/checkpoint_s4sp6.pt

In [1]:
import re
import os
import sys
import copy
import time
import random
import itertools
import numpy as np
import pandas as pd
import torch
import argparse
from envs.clustertool import NoncyclicClusterToolEnv as Env
from envs.algorithms.backward_z import get_policy
from model.CONCAT.model_concat import CONCATNet as CONCATModel


import warnings
warnings.filterwarnings("ignore", category=UserWarning)

STAGE_LIST = {
    's3s3': [1, 1, 1],
    's4s4': [1, 1, 1, 1],
    's2p5': [2, 3],
    's3sp4': [1, 2, 1],
    's3sp6': [1, 3, 2],
    's4sp6': [1, 2, 2, 1],
}


def settings(args):
    # seed fix
    DEBUG_MODE = True
    USE_CUDA = False
    CUDA_DEVICE_NUM = 0
    SEED = 1000
    torch.backends.cudnn.deterministic = True
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

    # env param setting
    env_params = {
            'arm_type': args.arm_type,
            'stage': STAGE_LIST[args.stage_type],
            'init_partial_loading': [0 for _ in range(len(STAGE_LIST[args.stage_type]))],
            'stage_z': args.z,
            'strategy': args.a,
            'min_process_time': args.min_process_time,
            'max_process_time': args.max_process_time,
            'min_purge_time': args.min_purge_time ,
            'max_purge_time': args.max_purge_time,
            'purge_type': args.purge_type,
            'loadport_capacity': 2,
            'num_foup': args.num_foup,
            'foup_size': args.foup_size,
            'num_lot_type': 5,
            'lot_dist': args.foup_type,
            'lot_variance': False,
            'consider_lot_type': args.consider_lot_type,
            }

    # model param setting
    model_params = {
        "purge": True if env_params['max_purge_time'] != 0 else False,
        'input_action': 'wafer',
        'embedding_dim': 256,
        'sqrt_embedding_dim': 256**(1/2),
        'encoder_layer_num': 3,
        'qkv_dim': 16,
        'sqrt_qkv_dim': 16**(1/2),
        'head_num': 16,
        'logit_clipping': 10,
        'ff_hidden_dim': 512,
        'ms_hidden_dim': 16,
        'ms_layer1_init': (1/2)**(1/2),
        'ms_layer2_init': (1/16)**(1/2),
        'eval_type': 'argmax',
        'normalize': 'instance' if env_params['num_lot_type'] > 1 else 'batch',
    }

    # test param setting
    const_type = 'purge' if model_params['purge'] else 'basic'
    test_params = {
        'use_cuda': USE_CUDA,
        'cuda_device_num': CUDA_DEVICE_NUM,
        'model_load': {
            'enable': args.use_trained_model,
            'use_latest_model': args.use_latest_model,
            'epoch': args.epoch
        },
        'multi_run_size': 1,
        'problem_count': args.num_test_problem,
        'test_batch_size': args.num_test_problem
    }

    # CUDA setting
    USE_CUDA = test_params['use_cuda']
    if USE_CUDA:
        cuda_device_num = test_params['cuda_device_num']
        torch.cuda.set_device(cuda_device_num)
        device = torch.device('cuda', cuda_device_num)
        #torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        device = torch.device('cpu')
    model_params['device'] = device

    # call env and model
    env = Env(**env_params)
    drl_model = CONCATModel(env, **env_params, **model_params)
    drl_model.params['eval_type'] = 'argmax'
    drl_model.eval()
    test_params['model_load']['enable'] = True 
    if test_params['model_load']['enable']:
        #current_dir = os.path.dirname(os.path.abspath(__file__))
        #checkpoint_fullname = f'{current_dir}/saved_models/checkpoint_s4sp6.pt'
        checkpoint_fullname = f'./saved_models/checkpoint_s4sp6.pt'
        checkpoint = torch.load(checkpoint_fullname, map_location=device)
        drl_model.load_state_dict(checkpoint['model_state_dict'])
        print(f'>>> trained Model {checkpoint_fullname} Loaded....')

    
    env = Env(**env_params)
    state = env.reset(test_params['test_batch_size'], device=device)

    # baseline robot move sequence rule settings
    if env.arm_type == 'single':
        if env.purge_constraint:
            baseline_policy = 'backward_z'
        else:
            baseline_policy = 'backward'

    else:
        if env.purge_constraint:
            baseline_policy = 'swap_a_z'
        else:
            baseline_policy = 'swap'
            
    print(f'>>> use conventional robot sequence = {baseline_policy}')
    env.lot_release_rule = args.input_sequencing_rule
    base_model = get_policy(baseline_policy, env)

    return env, state, base_model, drl_model, env_params, model_params, test_params


def compare_performance(args):
    env, state, rule, model, ep, mp, tp = settings(args)

    rolling_foup_cnt = args.rolling_foup_cnt

    # run baseline
    ###################################################
    benv = copy.deepcopy(env)
    bstate = copy.deepcopy(state)
    step_cnt = 0 
    
    done = False
    while not done:
        action = rule(benv, bstate)
        bstate = benv.step(action, rule=True, show=args.show)

        #foup_switch = benv.wafer.loc[:, :rolling_foup_cnt, :].sum(dim=-1).sum(dim=-1) == - rolling_foup_cnt*benv.foup_size
        done = benv.done.all()
        step_cnt += 1

    
    # run DRL
    ###################################################
    renv = copy.deepcopy(env)
    rstate = copy.deepcopy(state)
    model.encoding(renv, rstate)

    # rollout
    while not rstate.done.all():
        action, _ = model(renv, rstate)
        rstate = renv.step(action, show=args.show)

    # calculate throughput 
    ###################################################
    base_makespan = args.min_process_time + (args.max_process_time - args.min_process_time) * (benv.clock)
    drl_makespan = args.min_process_time + (args.max_process_time - args.min_process_time) * (renv.clock)
    
    return base_makespan, drl_makespan, env

def main():
    parser = argparse.ArgumentParser(description='Train a model with a specific number of lot types')
    # ENV SETTINGS
    parser.add_argument('--arm_type', type=str, default='single', help='arm type')
    parser.add_argument('--stage_type', type=str, default='s4sp6', help='stage type s3sp4, s3sp6, s4sp6')
    parser.add_argument('--min_process_time', type=int, default=10, help='process time')
    parser.add_argument('--max_process_time', type=int, default=300, help='process time')
    parser.add_argument('--min_purge_time', type=int, default=0, help='purge time')
    parser.add_argument('--max_purge_time', type=int, default=200, help='purge time')
    parser.add_argument('--num_foup', type=int, default=5, help='num_foup')
    parser.add_argument('--foup_size', type=int, default=25, help='foup size')
    parser.add_argument('--foup_type', type=str, default='multi_lot_imbalanced', help='multi_lot_imbalanced, multi_lot_balanced')
    parser.add_argument('--consider_lot_type', type=int, default=5, help='foup type')
    parser.add_argument('--purge_type', type=str, default='long', help='short, long')

    # MODEL SETTINGS
    parser.add_argument('--use_trained_model', type=bool, default=True, help='use trained model')
    parser.add_argument('--use_latest_model', type=bool, default=True, help='use latest trained model')
    parser.add_argument('--epoch', type=int, default=280, help='load epoch model')
    parser.add_argument('--num_test_problem', type=int, default=100, help='#test instance')


    # ETC SETTINGS
    parser.add_argument('--input_sequencing_rule', type=str, default='random', help='use rule for input sequencing')
    parser.add_argument('--time_limit', type=int, default=600, help='time_limit')
    
    parser.add_argument('--rolling_foup_cnt', type=int, default=4, help='rolling horizon range')
    parser.add_argument('--show', type=bool, default=False, help='show schedule monitor')

    args = parser.parse_args([])
    
    def generate_a(z):
            a_list = []
            for i in z:
                if i == 0: cand_a = [1]
                else: cand_a = [0,1]
                a_list.append(cand_a)
            return list(itertools.product(*a_list))

    results = []
    z_ranges = [(0, ) if x==1 else tuple(range(x)) for x in STAGE_LIST[args.stage_type]]
    z_list = list(itertools.product(*z_ranges))
    for z in z_list:
        # RUN
        args.z = z
        args.a = z      
        base_score, drl_score, env = compare_performance(args)
        results.append([z, base_score, drl_score, env])
        
    return results

In [2]:
result = main()

>>> trained Model ./saved_models/checkpoint_s4sp6.pt Loaded....
>>> use conventional robot sequence = backward_z
>>> trained Model ./saved_models/checkpoint_s4sp6.pt Loaded....
>>> use conventional robot sequence = backward_z
>>> trained Model ./saved_models/checkpoint_s4sp6.pt Loaded....
>>> use conventional robot sequence = backward_z
>>> trained Model ./saved_models/checkpoint_s4sp6.pt Loaded....
>>> use conventional robot sequence = backward_z


In [3]:
baseline_score, _ = torch.stack([result[z][1] for z in range(len(result))]).min(dim=0)
drl_score = result[0][2]

In [4]:
# print average results
print("="*50)
print("Single-arm cluster tool with MLIF flow wafers")
print(f'Average makespan of Base: {baseline_score.mean():.2f}, DRL: {drl_score.mean():.2f}')
print("="*50)

Single-arm cluster tool with MLIF flow wafers
Average makespan of Base: 18811.01, DRL: 17403.19


In [11]:
env = result[0][-1]

results = []
for instance_id in range(len(baseline_score)):
    type_1_process_time = env._process_time[instance_id,0,1:env.num_stage+1].tolist()
    type_2_process_time = env._process_time[instance_id,1,1:env.num_stage+1].tolist()
    type_3_process_time = env._process_time[instance_id,2,1:env.num_stage+1].tolist()
    type_4_process_time = env._process_time[instance_id,3,1:env.num_stage+1].tolist()
    type_5_process_time = env._process_time[instance_id,4,1:env.num_stage+1].tolist()
    
    stage_purge_time = env.loc.cleaning_time[instance_id,1:-1].tolist()
    
    results.append([
        instance_id,                    
        type_1_process_time, 
        type_2_process_time,
        type_3_process_time,
        type_4_process_time,
        type_5_process_time,
        stage_purge_time,
        int(baseline_score[instance_id]),       
        int(drl_score[instance_id])           
    ])

df = pd.DataFrame(results, columns=[
    "InstanceID", 
    "Type 1 Process Time",
    "Type 2 Process Time",
    "Type 3 Process Time",
    "Type 4 Process Time",
    "Type 5 Process Time",
    "Stage cleaning Time",
    "Backward(z*) Makespan", 
    "RL Makespan"
])

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

df


Unnamed: 0,InstanceID,Type 1 Process Time,Type 2 Process Time,Type 3 Process Time,Type 4 Process Time,Type 5 Process Time,Stage cleaning Time,Backward(z*) Makespan,RL Makespan
0,0,"[121, 195, 174, 61]","[116, 262, 220, 88]","[130, 192, 248, 124]","[102, 129, 86, 77]","[36, 245, 194, 18]","[11.0, 84.0, 147.0, 25.0]",18391,17242
1,1,"[100, 170, 218, 15]","[64, 255, 214, 141]","[25, 209, 265, 76]","[88, 271, 230, 18]","[138, 57, 101, 128]","[87.0, 156.0, 4.0, 7.0]",20406,18777
2,2,"[93, 293, 262, 70]","[41, 220, 151, 70]","[134, 61, 66, 83]","[43, 20, 280, 78]","[51, 253, 265, 111]","[17.0, 165.0, 163.0, 94.0]",22969,20819
3,3,"[86, 258, 213, 43]","[35, 166, 64, 77]","[143, 96, 107, 62]","[69, 236, 196, 65]","[42, 196, 149, 137]","[47.0, 159.0, 156.0, 69.0]",22071,18476
4,4,"[97, 184, 193, 90]","[144, 109, 228, 50]","[148, 129, 149, 117]","[33, 278, 37, 149]","[125, 225, 261, 96]","[53.0, 55.0, 104.0, 18.0]",18317,16322
5,5,"[122, 125, 174, 146]","[26, 158, 223, 71]","[38, 271, 107, 42]","[11, 63, 116, 136]","[67, 240, 204, 106]","[1.0, 42.0, 198.0, 18.0]",17500,18402
6,6,"[62, 90, 32, 105]","[55, 87, 85, 29]","[95, 71, 121, 93]","[126, 13, 238, 86]","[76, 227, 279, 96]","[39.0, 124.0, 79.0, 76.0]",20314,16354
7,7,"[105, 53, 89, 75]","[48, 44, 193, 25]","[93, 77, 284, 11]","[104, 151, 102, 143]","[74, 195, 180, 78]","[87.0, 98.0, 179.0, 0.0]",16883,18489
8,8,"[63, 224, 142, 125]","[120, 54, 118, 97]","[103, 276, 74, 91]","[67, 208, 183, 105]","[18, 292, 49, 63]","[67.0, 175.0, 96.0, 40.0]",18099,20227
9,9,"[12, 149, 229, 73]","[130, 90, 297, 20]","[105, 213, 213, 51]","[114, 271, 179, 128]","[64, 250, 286, 96]","[37.0, 188.0, 84.0, 79.0]",22736,20109
