In [1]:
import numpy as np
import pandas as pd
import copy
import random
from util.util_load         import read_txt
from env_action.metaheu     import GeneticAlgorithm
from env_action.environment import FJSP_under_uncertainties_Env

maxtime                = 3
maxtimeacceptnewjob    = 150
defectProb             = 0.03

action_list            = ["exact", "GA", "LFOH", "LAPH", "LAP_LFO", "CDR1", "CDR2", "CDR4"]

J, I, K, p_ijk, h_ijk,   \
d_j, n_j, MC_ji, n_MC_ji,\
OperationPool          = read_txt("data\jobs.txt")


# J, I, K, p_ijk, h_ijk,   \
# d_j, n_j, MC_ji, n_MC_ji,\
# OperationPool          = read_txt("/content/drive/My Drive/Thesis/data/jobs.txt")


S_k                    = np.zeros((K))
S_j                    = np.zeros((J))
n_ops_left_j           = copy.deepcopy(n_j)
MB_info                = np.zeros((0))

t                      = 0
JSet                   = list(range(J))
OJSet                  = [[] for _ in range(J)]
for j in JSet:
    OJSet[j]           = [i for i in range(int(n_j[j]))]

# Preschedule
pre_GBest, X_ijk, S_ij, C_ij, C_j = GeneticAlgorithm (S_k, S_j, JSet, OJSet, J, I, K, 
                                                p_ijk, h_ijk, d_j, n_j, n_ops_left_j, 
                                                MC_ji, n_MC_ji, OperationPool, maxtime= 1000)

In [2]:
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env     import DummyVecEnv
from stable_baselines3.common.callbacks   import BaseCallback, EvalCallback
from stable_baselines3.common.monitor     import Monitor
from stable_baselines3.common.env_checker import check_env 

env = FJSP_under_uncertainties_Env(J, I, K, X_ijk, S_ij, C_ij, C_j, p_ijk, d_j, n_j, MC_ji, n_MC_ji, h_ijk, OperationPool, maxtimeacceptnewjob, maxtime, defectProb, pre_GBest)

# check_env(env)
# obs = env.reset(seed=42)
# print("Observation:", obs)

# episodes = 10
# for episode in range(episodes):
# 	done = False
# 	obs = env.reset()
# 	while done == False:#not done:
# 		random_action = env.action_space.sample()
# 		obs, reward, done, truncated, info = env.step(random_action)
# 		print('reward', reward)


In [3]:
from stable_baselines3 import DQN
import os
import datetime
import pandas as pd
from stable_baselines3.common.callbacks import BaseCallback

# Create directories for models and logs
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
models_dir = f"models/DQN-{current_time}"
logdir = f"logs/DQN-{current_time}"
log_training_txt_dir = "log_training_txt"
log_training_excel_dir = "log_training_excel"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
if not os.path.exists(logdir):
    os.makedirs(logdir)
if not os.path.exists(log_training_txt_dir):
    os.makedirs(log_training_txt_dir)
if not os.path.exists(log_training_excel_dir):
    os.makedirs(log_training_excel_dir)

# Generate unique file names based on current time
log_file          = os.path.join(log_training_txt_dir,   f"training_{current_time}.txt")
excel_file        = os.path.join(log_training_excel_dir, f"training_{current_time}.xlsx")
action_count_file = os.path.join(log_training_txt_dir,   f"action_count_{current_time}.txt")
action_excel_file = os.path.join(log_training_excel_dir, f"action_count_{current_time}.xlsx")

# Define the custom callback -------------------------------------------------------------
class CustomCallback(BaseCallback):
    def __init__(self, log_dir, excel_file, txt_file, action_count_file, action_excel_file, verbose=0):
        super(CustomCallback, self).__init__(verbose)
        self.log_dir = log_dir
        self.excel_file = excel_file
        self.txt_file = txt_file
        self.action_count_file = action_count_file
        self.action_excel_file = action_excel_file
        self.logs = []
        self.episode_rewards = []
        self.action_counts = {}
        self.episode_start = True

    def _on_training_start(self) -> None:
        # Initialize action counts
        self.action_counts = {action: 0 for action in action_list}

    def _on_step(self) -> bool:
        if self.episode_start:
            self.episode_rewards.append(0)
            self.episode_start = False

        # Record reward for the current step
        reward = self.locals['rewards'][0]
        self.episode_rewards[-1] += reward

        # Increment action count
        action = self.locals.get('actions', None)
        if action is not None:
            action_name = action_list[action[0]]
            self.action_counts[action_name] += 1
        
        return True

    def _on_rollout_end(self) -> None:
        # Called at the end of each episode
        sum_reward   = self.episode_rewards[-1] if self.episode_rewards else 0
        tardiness    = self.training_env.get_attr('unwrapped')[0].all_Tard
        
        self.logger.record('train/episode_reward',   sum_reward)
        self.logger.record('train/actual_tardiness', tardiness)

        self.logs.append({
            'episode': len(self.episode_rewards),
            'sum_reward': sum_reward,
            'tardiness': tardiness
        })
        self.episode_start = True

    def _on_training_end(self) -> None:
        # Save logs to Excel
        df = pd.DataFrame(self.logs)
        df.to_excel(self.excel_file, index=False)

        action_df = pd.DataFrame(list(self.action_counts.items()), columns=['Action', 'Count'])
        action_df.to_excel(self.action_excel_file, index=False)

        # Save logs to text file
        with open(self.txt_file, 'w') as f:
            f.write(df.to_string(index=False))
        with open(self.action_count_file, 'w') as f:
            f.write(action_df.to_string(index=False))


# Initialize the DQN model
model = DQN("MlpPolicy", 
            env, 
            verbose=1, 
            tensorboard_log=logdir, 
            train_freq=(1,"episode"),
            exploration_fraction=0.4,       # Fraction of the training period over which the exploration rate is annealed
            exploration_final_eps=0.02,     # Final value of the exploration rate after annealing
            exploration_initial_eps=1.0,    # Initial value of the exploration rate
            target_update_interval=2000,
            learning_rate=0.0002
            )

# Create the callback
callback = CustomCallback(log_dir=logdir, 
                          excel_file=excel_file,
                          txt_file=log_file,
                          action_count_file=action_count_file,
                          action_excel_file=action_excel_file,
                          verbose=1)


# Train the model
TIMESTEPS = 40000
model.learn(total_timesteps=TIMESTEPS, 
            reset_num_timesteps=False, 
            tb_log_name="DQN", 
            callback=callback)
model_path = os.path.join(models_dir, f"DQN_.zip")
model.save(model_path)

# tensorboard --logdir=logs/


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs/DQN-2024-06-11_07-57-15\DQN_0
-------------------------------------------------
Method selection:                    exact
-------------------------------------------------
Method selection:                    GA
-------------------------------------------------
Method selection:                    LAPH
-------------------------------------------------
Method selection:                    LAP_LFO
-------------------------------------------------
Method selection:                    LAPH
-------------------------------------------------
Method selection:                    exact
-------------------------------------------------
Method selection:                    CDR1
-------------------------------------------------
Method selection:                    LAPH
-------------------------------------------------
Method selection:                    GA
-------------------------------