## Dual-armed cluster tool with skip flow wafers 



* The stage configuration is [1,1,1,1,1]
    * Type 1 wafers are processed in the order PM1 -> PM2 -> PM3 -> PM4 
    * Type 2 wafers are processed in the order PM3 -> PM4 -> PM5. 
    * They share the PM3 for same process. 
* Process times are sampled from values between 10-300s. 
* The baseline robot move sequence uses alternative swap sequence. 
* Trained model checkpoint file is loaded from ./saved_models/checkpoint_v2.pt
    * Another trained model for (Type 1 flow is PM1->PM3->PM5, Type 2 flow is PM2->PM3->PM4) is ./saved_models/checkpoint_v1.pt

In [1]:
import copy
import logging
import argparse
import torch
import random
import numpy as np
import pandas as pd

from envs.dscfEnv import dscfEnv as Env, State
from model.model_concat import CONCATNet as CONCATModel
from envs.algorithms.ass import AlternatingSwapSequence

# Global configurations
DEBUG_MODE = True
USE_CUDA = not DEBUG_MODE
CUDA_DEVICE_NUM = 0
SEED = 1000

def set_seed(seed=SEED):
    """Fix random seed for reproducibility."""
    torch.backends.cudnn.deterministic = True
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(description='Train a model with specific lot types')
    parser.add_argument('--foup_size', type=int, default=500)
    parser.add_argument('--stage', type=int, nargs='+', default=[1,1,1,1,1])
    parser.add_argument('--group1_stage', type=int, default=2)
    parser.add_argument('--group1_min_prs_time', type=int, default=10)
    parser.add_argument('--group1_max_prs_time', type=int, default=300)
    parser.add_argument('--group2_stage', type=int, default=3)
    parser.add_argument('--group2_min_prs_time', type=int, default=10)
    parser.add_argument('--group2_max_prs_time', type=int, default=300)
    parser.add_argument('--prod_quantity', type=int, default=10)
    parser.add_argument('--done_quantity', type=int, default=100)
    parser.add_argument('--num_lot_type', type=int, default=2)
    parser.add_argument('--shared_min_prs_time', type=int, default=10)
    parser.add_argument('--shared_max_prs_time', type=int, default=300)
    parser.add_argument('--model_type', type=str, default='concat')
    parser.add_argument('--input_action', type=str, default='wafer')
    parser.add_argument('--model_epoch', type=int, default=15)
    return parser.parse_args([])

def get_stage_list():
    """Define stage configurations."""
    return [
        [1,0,1,0,1],
        [0,1,1,1,0],
        [1,1,1,1,0],
        [0,0,1,1,1]
    ]

def setup_tester_params(args):
    """Initialize tester parameters."""
    return {
        'use_cuda': False,
        'cuda_device_num': CUDA_DEVICE_NUM,
        'model_load': {
            'enable': True,
            'path': f'./saved_models/',
        },
        'multi_run_size': 1,
        'problem_count': 100,
        'test_batch_size': 100,
    }

def setup_env_params(args, stage_list):
    """Initialize environment parameters."""
    return {
        'foup_size': args.foup_size,
        'stage': args.stage,
        'group1_stage': stage_list[args.group1_stage],
        'group1_min_prs_time': args.group1_min_prs_time,
        'group1_max_prs_time': args.group1_max_prs_time,
        'group2_stage': stage_list[args.group2_stage],
        'group2_min_prs_time': args.group2_min_prs_time,
        'group2_max_prs_time': args.group2_max_prs_time,
        'prod_quantity': args.prod_quantity,
        'done_quantity': args.done_quantity,
        'num_lot_type': args.num_lot_type,
        'shared_min_prs_time': args.shared_min_prs_time,
        'shared_max_prs_time': args.shared_max_prs_time,
    }

def setup_model_params(args, env_params):
    """Initialize model parameters."""
    return {
        'type': args.model_type,
        'input_action': args.input_action,
        'purge': False,
        'embedding_dim': 256,
        'sqrt_embedding_dim': 256**(1/2),
        'encoder_layer_num': 3,
        'qkv_dim': 16,
        'sqrt_qkv_dim': 16**(1/2),
        'head_num': 16,
        'logit_clipping': 10,
        'ff_hidden_dim': 512,
        'ms_hidden_dim': 16,
        'ms_layer1_init': (1/2)**(1/2),
        'ms_layer2_init': (1/16)**(1/2),
        'eval_type': 'argmax',
        'normalize': 'instance' if env_params['num_lot_type'] > 1 else 'batch',
    }

def main():
    set_seed()
    args = parse_arguments()
    stage_list = get_stage_list()
    
    env_params = setup_env_params(args, stage_list)
    model_params = setup_model_params(args, env_params)
    tester_params = setup_tester_params(args)
    
    tester = Tester(env_params, model_params, tester_params)
    result = tester.run()
    return result

class Tester:
    def __init__(self, env_params, model_params, tester_params):
        self.env_params = env_params
        self.model_params = model_params
        self.tester_params = tester_params
        self.device = torch.device('cuda' if tester_params['use_cuda'] else 'cpu')
        self.model_params['device'] = self.device
        self.result_folder = './experiments/test'
        self.model = self._load_model()
    
    def _load_model(self):
        model = CONCATModel(**self.env_params, **self.model_params)
        model_load = self.tester_params['model_load']
        if model_load['enable']:
            checkpoint_path = f"{model_load['path']}/checkpoint_v2.pt"
            checkpoint = torch.load(checkpoint_path, map_location=self.device)
            model.load_state_dict(checkpoint['model_state_dict'])
            print(f'[Saved Model Loaded...] -> {checkpoint_path}')
        return model

    def run(self):
        """Execute the testing process."""
        print("Running tester...")
        # Implement the testing process here
        def _stack_states(states: list):
            return State(**{field: torch.stack([getattr(state, field) for state in states])
                            for field in State.__dataclass_fields__})

        # call environments & copy for each policy
        envs = []
        states = []
        for _ in range(self.tester_params['test_batch_size']):
            env = Env(**self.env_params)
            state = env.reset()
            envs.append(env)
            states.append(state)

        envs_ass = copy.deepcopy(envs)
        envs_rl = copy.deepcopy(envs)

        # Results storage
        results = []

        # Run ASS policy
        ass_makespans = []
        for i, e in enumerate(envs_ass):
            policy_ceil = AlternatingSwapSequence(env)
            while not e.done:
                action = policy_ceil(e)
                _ = e.step(action)
            ass_makespans.append(e.clock)

        # Prepare RL state
        state = _stack_states(states)
        state.batch_idx = torch.arange(state.batch_size())
        state.to(self.device)

        self.model.eval()
        self.model.to(self.device)
        self.model.encoding(state)
        policy_rl = self.model
        rl_makespans = [1e10 for _ in range(state.batch_size())]
        
        while not state.done.all():
            action, prob = policy_rl(state)
            states = []
            for b, a in enumerate(action):
                state = envs_rl[b].step(a.item())
                states.append(state)
                if envs_rl[b].done and rl_makespans[b] == 1e10:
                    rl_makespans[b] = copy.deepcopy(envs_rl[b].clock)
            state = _stack_states(states)
            state.batch_idx = torch.arange(state.batch_size())
            state.to(self.device)

        # Collect results
        for instance_id in range(self.tester_params['test_batch_size']):
            type1_time = [int(i) for i in envs[instance_id].recipes[0].time[1:-1]]
            type2_time = [int(i) for i in envs[instance_id].recipes[1].time[1:-1]]
            results.append([instance_id, type1_time, type2_time, int(ass_makespans[instance_id]), int(rl_makespans[instance_id])])
        
        # Convert to DataFrame and print
        df = pd.DataFrame(results, columns=["InstanceID", "Type 1 Process Time", "Type 2 Process Time", "ASS Makespan", "RL Makespan"])
        
        """ print("="*50)
        print("Results")
        print("="*50)
        print(df.to_string(index=False))
    
        # Show average results
        print("-"*50)
        print(f'Average ASS: {np.mean(ass_makespans)}, RL: {np.mean(rl_makespans)}')
        print("="*50) """
        
        return df 

In [2]:
# show results
pd.set_option('display.max_rows', None)  
pd.set_option('display.max_columns', None) 
pd.set_option('display.width', None)  
pd.set_option('display.max_colwidth', None)  
df = main()

  checkpoint = torch.load(checkpoint_path, map_location=self.device)


[Saved Model Loaded...] -> ./experiments//checkpoint.pt
Running tester...
--------------------------------------------------
Average ASS: 21984.50, RL: 21580.59


In [3]:
df

Unnamed: 0,InstanceID,Type 1 Process Time,Type 2 Process Time,ASS Makespan,RL Makespan
0,0,"[60, 211, 133, 196, 0]","[0, 0, 133, 196, 76]",20506,20289
1,1,"[244, 103, 249, 121, 0]","[0, 0, 249, 121, 21]",25821,25411
2,2,"[139, 92, 290, 70, 0]","[0, 0, 290, 70, 159]",29845,29302
3,3,"[192, 258, 178, 16, 0]","[0, 0, 178, 16, 153]",18677,18577
4,4,"[159, 22, 159, 254, 0]","[0, 0, 159, 254, 187]",26296,26021
5,5,"[164, 163, 249, 202, 0]","[0, 0, 249, 202, 258]",25826,25536
6,6,"[32, 113, 291, 147, 0]","[0, 0, 291, 147, 48]",29923,29455
7,7,"[91, 89, 18, 20, 0]","[0, 0, 18, 20, 120]",6454,6584
8,8,"[254, 95, 45, 257, 0]","[0, 0, 45, 257, 41]",26478,26118
9,9,"[159, 119, 289, 265, 0]","[0, 0, 289, 265, 81]",29696,29338


In [6]:
# print average results
print("="*50)
print("Dual-armed cluster tool with skip flow wafers")
print(f'Average makespan of ASS: {df["ASS Makespan"].mean():.2f}, RL: {df["RL Makespan"].mean():.2f}')
print("="*50)

Dual-armed cluster tool with skip flow wafers
Average makespan of ASS: 21984.50, RL: 21580.59
