In [None]:
import numpy as np
import pandas as pd
import copy
import time
from util.util_load         import read_txt, read_scenario
from env_action.metaheu     import GeneticAlgorithm, random_population
from env_action.environment import FJSP_under_uncertainties_Env
from env_action.data_indentifier import InstanceData, ScenarioData

directory           = 'SMALL'
planning_horizon    = 480*60
critical_machines   = {5, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 26, 27}
ReworkProbability   = 0.03
ScenarioList        = ['fixed_scenario', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
CaseList            = ['fixed_instance'] + [case+1 for case in range(60)]
maxtime             = 2
PopSize             = 500
instances           = {}
scenarios           = {}

for CaseID in CaseList:
    data_path = f"DATA/SMALL/Case{CaseID}.txt"

    J, I, K, p_ijk, h_ijk,   \
    d_j, n_j, MC_ji, n_MC_ji,\
    OperationPool          = read_txt(data_path)

    S_k                    = np.zeros((K))
    S_j                    = np.zeros((J))
    n_ops_left_j           = copy.deepcopy(n_j)

    t                      = 0
    JSet                   = list(range(J))
    OJSet                  = [[] for _ in range(J)]
    for j in JSet:
        OJSet[j]           = [i for i in range(int(n_j[j]))]

    StartTime                     = time.time()
    population, chromosome_len    = random_population(OperationPool, PopSize)
    GBest, X_ijk, S_ij, C_ij, C_j = GeneticAlgorithm(S_k, S_j, JSet, OJSet, J, I, K, 
                                                    p_ijk, h_ijk, d_j, n_j, n_ops_left_j, 
                                                    MC_ji, n_MC_ji, OperationPool,
                                                    PopSize, population, chromosome_len,
                                                    StartTime, maxtime=100)
    instances[CaseID]             = InstanceData(J, I, X_ijk, S_ij, C_ij, C_j, p_ijk, h_ijk, 
                                                 d_j, n_j, MC_ji, n_MC_ji, OperationPool)
    
for ScenarioID in ScenarioList:
    scenario_path = f"DATA/SMALL/Scenario_fixed_instance_{ScenarioID}.txt"
    JA_event, MB_event     = read_scenario(scenario_path, K, critical_machines)
    scenarios[ScenarioID]  = ScenarioData(JA_event, MB_event)


In [None]:
import gymnasium as gym
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env     import DummyVecEnv
from stable_baselines3.common.callbacks   import BaseCallback, EvalCallback
from stable_baselines3.common.monitor     import Monitor
from stable_baselines3.common.env_checker import check_env 

WeibullDistribution = pd.read_excel('DataMaster.xlsx', sheet_name='Distribution')
K    = 30
env1 = FJSP_under_uncertainties_Env(True , True , instances, scenarios, K, WeibullDistribution, critical_machines, ReworkProbability, planning_horizon, PopSize, maxtime)
env2 = FJSP_under_uncertainties_Env(True , False, instances, scenarios, K, WeibullDistribution, critical_machines, ReworkProbability, planning_horizon, PopSize, maxtime)
env3 = FJSP_under_uncertainties_Env(False, False, instances, scenarios, K, WeibullDistribution, critical_machines, ReworkProbability, planning_horizon, PopSize, maxtime)

# env = FJSP_under_uncertainties_Env(J, I, K, X_ijk, S_ij, C_ij, C_j, p_ijk, d_j, n_j, MC_ji, n_MC_ji, h_ijk, OperationPool, JA_event, MB_event, PopSize, maxtime)

# check_env(env)
# obs = env.reset(seed=42)
# print("Observation:", obs)

# episodes = 10
# for episode in range(episodes):
# 	done = False
# 	obs = env.reset()
# 	while done == False:#not done:
# 		random_action = env.action_space.sample()
# 		obs, reward, done, truncated, info = env.step(random_action)
# 		print('reward', reward)


In [None]:
from stable_baselines3 import DQN
import os
import datetime
import pandas as pd
from stable_baselines3.common.callbacks import BaseCallback

action_list            = ["exact", "GA", "TS", 
                          "LFOH", "LAPH", "LAP_LFO", 
                          "LFOH_TS", "LAPH_TS", "LFOH_GA", "LAPH_GA",
                          "CDR1", "CDR2", "CDR3", "CDR4", "CDR5", "CDR6",
                          "RCRS"]
                          
# Create directories for models and logs
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
models_dir = f"models/DQN-{current_time}"
logdir = f"logs/DQN-{current_time}"
log_training_txt_dir = "log_training_txt"
log_training_excel_dir = "log_training_excel"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
if not os.path.exists(logdir):
    os.makedirs(logdir)
if not os.path.exists(log_training_txt_dir):
    os.makedirs(log_training_txt_dir)
if not os.path.exists(log_training_excel_dir):
    os.makedirs(log_training_excel_dir)

# Generate unique file names based on current time
log_file          = os.path.join(log_training_txt_dir,   f"training_{current_time}.txt")
excel_file        = os.path.join(log_training_excel_dir, f"training_{current_time}.xlsx")
action_count_file = os.path.join(log_training_txt_dir,   f"action_count_{current_time}.txt")
action_excel_file = os.path.join(log_training_excel_dir, f"action_count_{current_time}.xlsx")

# Define the custom callback -------------------------------------------------------------
class CustomCallback(BaseCallback):
    def __init__(self, log_dir, excel_file, txt_file, action_count_file, action_excel_file, verbose=0):
        super(CustomCallback, self).__init__(verbose)
        self.log_dir = log_dir
        self.excel_file = excel_file
        self.txt_file = txt_file
        self.action_count_file = action_count_file
        self.action_excel_file = action_excel_file
        self.logs = []
        self.episode_rewards = []
        self.action_counts = {}
        self.episode_start = True

    def _on_training_start(self) -> None:
        # Initialize action counts
        self.action_counts = {action: 0 for action in action_list}

    def _on_step(self) -> bool:
        if self.episode_start:
            self.episode_rewards.append(0)
            self.episode_start = False

        # Record reward for the current step
        reward = self.locals['rewards'][0]
        self.episode_rewards[-1] += reward

        # Increment action count
        action = self.locals.get('actions', None)
        if action is not None:
            action_name = action_list[action[0]]
            self.action_counts[action_name] += 1
        
        return True

    def _on_rollout_end(self) -> None:
        # Called at the end of each episode
        sum_reward   = self.episode_rewards[-1] if self.episode_rewards else 0
        tardiness    = self.training_env.get_attr('unwrapped')[0].all_Tard
        
        self.logger.record('train/episode_reward',   sum_reward)
        self.logger.record('train/actual_tardiness', tardiness)
        

        self.logs.append({
            'episode': len(self.episode_rewards),
            'sum_reward': sum_reward,
            'tardiness': tardiness
        })
        self.episode_start = True

    def _on_training_end(self) -> None:
        # Save logs to Excel
        df = pd.DataFrame(self.logs)
        df.to_excel(self.excel_file, index=False)

        action_df = pd.DataFrame(list(self.action_counts.items()), columns=['Action', 'Count'])
        action_df.to_excel(self.action_excel_file, index=False)

        # Save logs to text file
        with open(self.txt_file, 'w') as f:
            f.write(df.to_string(index=False))
        with open(self.action_count_file, 'w') as f:
            f.write(action_df.to_string(index=False))

# Create the callback
callback = CustomCallback(log_dir=logdir, 
                          excel_file=excel_file,
                          txt_file=log_file,
                          action_count_file=action_count_file,
                          action_excel_file=action_excel_file,
                          verbose=1)

# Initialize the DQN model
model_phase1 = DQN("MlpPolicy", 
                    env1, 
                    verbose=1, 
                    tensorboard_log=logdir, 
                    train_freq=(1,"episode"),
                    exploration_initial_eps =1.00, 
                    exploration_final_eps   =0.20, 
                    exploration_fraction    =0.80,     
                    target_update_interval  =2000
                    )

# Train the model
model_phase1.learn(total_timesteps=12000, 
                    reset_num_timesteps=False, 
                    tb_log_name="DQN_phase1", 
                    callback=callback)
model_phase1_path = os.path.join(models_dir, "dqn_phase1")
model_phase1.save(model_phase1_path)

# Phase 2 training
model_phase2 = DQN.load(model_phase1_path, env=env2)
model_phase2.exploration_initial_eps    = 0.50  
model_phase2.exploration_final_eps      = 0.10    
model_phase2.exploration_fraction       = 0.50 
model_phase2.learn(total_timesteps=15000, tb_log_name="DQN_phase2")
model_phase2_path = os.path.join(models_dir, "dqn_phase2")
model_phase2.save(model_phase2_path)

# Phase 3 training
model_phase3 = DQN.load(model_phase2_path, env=env3)
model_phase3.exploration_initial_eps    = 0.20    
model_phase3.exploration_final_eps      = 0.02    
model_phase3.exploration_fraction       = 0.3 
model_phase3.learn(total_timesteps=23000, tb_log_name="DQN_phase3")
model_phase3_path = os.path.join(models_dir, "dqn_phase3")
model_phase3.save(model_phase3_path)

# tensorboard --logdir=logs/
