In [10]:
import numpy as np
import pandas as pd
import sys
import pickle
from env_action.environment import FJSP_under_uncertainties_Env

directory           = 'SMALL'
planning_horizon    = 480*60
critical_machines   = {5, 6, 7, 8, 9, 10, 11, 12, 13, 21, 22, 26, 27}
ReworkProbability   = 0.03
maxtime             = 10
PopSize             = 80
WeibullDistribution = pd.read_excel('DATA/DataMaster.xlsx', sheet_name='Distribution')
K                   = 30
maxJob              = 1320
maxOpe              = 5760

# Default setting
reward_ratio            = 0.99
learning_rate           = 1e-4
explore_fraction_phase1 = 0.8
explore_fraction_phase2 = 0.5
explore_fraction_phase3 = 0.3
num_timestep_phase1     = 2000
num_timestep_phase2     = 6000
num_timestep_phase3     = 12000


purpose = 'testing training freq'

#------- EXPLORATION FRACTION------------
if purpose == "SA low exploration fraction":
    explore_fraction_phase1 = 0.7
    explore_fraction_phase2 = 0.4
    explore_fraction_phase3 = 0.2
if purpose == "SA high exploration fraction":
    explore_fraction_phase1 = 0.9
    explore_fraction_phase2 = 0.6
    explore_fraction_phase3 = 0.4

#---------- REWARD RATIO-------------
if purpose == "SA reward 0.90":
    reward_ratio = 0.90
if purpose == "SA reward 0.80":
    reward_ratio = 0.80

#---------- CURRICULUM -------------
if purpose == "SA learning distribution smaller":
    num_timestep_phase1     = 1000
    num_timestep_phase2     = 2000
    num_timestep_phase3     = 5000
if purpose == "SA learning distribution more in ealier":
    num_timestep_phase1     = 10000
    num_timestep_phase2     = 7000
    num_timestep_phase3     = 6000

#-----------LEARNING RATE ---------
if purpose == "SA learning rate 1e-3 ":
    learning_rate = 1e-3
if purpose == "SA learning rate 1e-5 ":
    learning_rate = 1e-5

In [11]:
import gymnasium as gym
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env     import DummyVecEnv
from stable_baselines3.common.callbacks   import BaseCallback, EvalCallback
from stable_baselines3.common.monitor     import Monitor
from stable_baselines3.common.env_checker import check_env 

with open('DATA/SMALL/pickle_instances_480.pkl', 'rb') as f:
    instances = pickle.load(f)
with open('DATA/SMALL/pickle_scenarios_480.pkl', 'rb') as f:
    scenarios = pickle.load(f)

env1 = FJSP_under_uncertainties_Env(True , True , instances, scenarios, K, WeibullDistribution, critical_machines, 
                                    ReworkProbability, planning_horizon, PopSize, maxtime, maxJob, maxOpe, reward_ratio)
env2 = FJSP_under_uncertainties_Env(True , False, instances, scenarios, K, WeibullDistribution, critical_machines, 
                                    ReworkProbability, planning_horizon, PopSize, maxtime, maxJob, maxOpe, reward_ratio)
env3 = FJSP_under_uncertainties_Env(False, False, instances, scenarios, K, WeibullDistribution, critical_machines, 
                                    ReworkProbability, planning_horizon, PopSize, maxtime, maxJob, maxOpe, reward_ratio)

# check_env(env1)
# obs = env1.reset(seed=42)
# print("Observation:", obs)

# episodes = 2
# for episode in range(episodes):
# 	done = False
# 	obs = env1.reset()
# 	while done == False:#not done:
# 		random_action = env1.action_space.sample()
# 		obs, reward, done, truncated, info = env1.step(random_action)
# 		print('reward', reward)


In [12]:
from stable_baselines3 import DQN
import os
import datetime
import pandas as pd
from stable_baselines3.common.callbacks import BaseCallback

action_list            = ["GA", "TS", 
                          "LFOH", "LAPH", "LAP_LFO", 
                          "LFOH_TS", "LAPH_TS", "LFOH_GA", "LAPH_GA",
                          "CDR1", "CDR2", "CDR3", "CDR5", "CDR6",
                          "RCRS"]
                          
# Create directories for models and logs
current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")


models_dir = f"models/{purpose}_{current_time}"
logdir = f"logs/{purpose}_{current_time}"
log_training_txt_dir = "log_training_txt"
log_training_excel_dir = "log_training_excel"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
if not os.path.exists(logdir):
    os.makedirs(logdir)
if not os.path.exists(log_training_txt_dir):
    os.makedirs(log_training_txt_dir)
if not os.path.exists(log_training_excel_dir):
    os.makedirs(log_training_excel_dir)

# Generate unique file names based on current time
log_file          = os.path.join(log_training_txt_dir,   f"training_{purpose}_{current_time}.txt")
excel_file        = os.path.join(log_training_excel_dir, f"training_{purpose}_{current_time}.xlsx")
action_count_file = os.path.join(log_training_txt_dir,   f"action_count_{purpose}_{current_time}.txt")
action_excel_file = os.path.join(log_training_excel_dir, f"action_count_{purpose}_{current_time}.xlsx")

# Define the custom callback -------------------------------------------------------------
class CustomCallback(BaseCallback):
    def __init__(self, log_dir, excel_file, txt_file, action_count_file, action_excel_file, verbose=0):
        super(CustomCallback, self).__init__(verbose)
        self.log_dir = log_dir
        self.excel_file = excel_file
        self.txt_file = txt_file
        self.action_count_file = action_count_file
        self.action_excel_file = action_excel_file
        self.logs = []
        self.episode_rewards = []
        self.action_counts = {}
        self.episode_start = True

    def _on_training_start(self) -> None:
        # Initialize action counts
        self.action_counts = {action: 0 for action in action_list}

    def _on_step(self) -> bool:
        if self.episode_start:
            self.episode_rewards.append(0)
            self.episode_start = False

        # Record reward for the current step
        reward = self.locals['rewards'][0]
        self.episode_rewards[-1] += reward

        # Increment action count
        action = self.locals.get('actions', None)
        if action is not None:
            action_name = action_list[action[0]]
            self.action_counts[action_name] += 1
        
        return True

    def _on_rollout_end(self) -> None:
        # Called at the end of each episode
        sum_reward   = self.episode_rewards[-1] if self.episode_rewards else 0
        tardiness    = self.training_env.get_attr('unwrapped')[0].all_Tard
        
        self.logger.record('train/episode_reward',   sum_reward)
        self.logger.record('train/actual_tardiness', tardiness)
        

        self.logs.append({
            'episode': len(self.episode_rewards),
            'sum_reward': sum_reward,
            'tardiness': tardiness
        })

        self.episode_start = True

    
    def _on_training_end(self) -> None:
        # Save logs to Excel
        df = pd.DataFrame(self.logs)
        df.to_excel(self.excel_file, index=False)

        action_df = pd.DataFrame(list(self.action_counts.items()), columns=['Action', 'Count'])
        action_df.to_excel(self.action_excel_file, index=False)

        # Save logs to text file
        with open(self.txt_file, 'w') as f:
            f.write(df.to_string(index=False))
        with open(self.action_count_file, 'w') as f:
            f.write(action_df.to_string(index=False))

# Create the callback
callback = CustomCallback(log_dir=logdir, 
                          excel_file=excel_file,
                          txt_file=log_file,
                          action_count_file=action_count_file,
                          action_excel_file=action_excel_file,
                          verbose=1)

# Initialize the DQN model
model_path = os.path.join(models_dir, f"DQN_.zip")
model_phase1 = DQN("MlpPolicy", 
                    env1, 
                    verbose=1, 
                    tensorboard_log=logdir, 
                    train_freq=(3,"episode"),
                    target_update_interval  = 2000,
                    exploration_initial_eps = 0.80, 
                    exploration_final_eps   = 0.30, 
                    exploration_fraction    = explore_fraction_phase1, 
                    )

# Phase 1 training
model_phase1.learn(total_timesteps=num_timestep_phase1, 
                    reset_num_timesteps=True, 
                    tb_log_name="DQN",
                    log_interval=3,
                    callback=callback
                    )
model_phase1.save(model_path)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
urgent
Logging to logs/testing training freq_2024-07-14_07-22-29/DQN_1


Method selection:                    CDR3
Method selection:                    CDR1
urgent
Method selection:                    LAPH-GA
Method selection:                    LFOH
urgent
Method selection:                    CDR5
urgent
Method selection:                    LFOH-GA
urgent
Method selection:                    GA
urgent
Method selection:                    GA
urgent
Method selection:                    RCRS
urgent
Method selection:                    LAPH-GA
urgent
Method selection:                    LAPH-GA
long
urgent
Method selection:                    CDR2
long
urgent
Method selection:                    CDR2
long
urgent
Method selection:                    LFOH-TS
long
urgent
Method selection:                    CDR3
urgent
Method selection:                    LFOH
urgent
Method selection:                    CDR5
urgent
Method selection:                    RCRS
urgent
Method selection:                    CDR1
urgent
Method selection:                    CDR6
urgent
Met

In [14]:
# Phase 2 training
model_phase2 = DQN.load(model_path, 
                        env=env2, 
                        verbose=1, 
                        tensorboard_log=logdir, 
                        train_freq=(1, "episode"),
                        target_update_interval=2000
                        # , exploration_initial_eps=0.50, 
                        # exploration_final_eps=0.10, 
                        # exploration_fraction=explore_fraction_phase2
                        )

model_phase2.exploration_schedule.initial_p = 0.5
model_phase2.exploration_schedule.final_p = 0.1
model_phase2.exploration_schedule.schedule_timesteps = explore_fraction_phase2 * num_timestep_phase2


model_phase2.learn(total_timesteps=num_timestep_phase2, 
                   tb_log_name="DQN_phase2",
                   log_interval=3,
                   reset_num_timesteps=False,
                   callback=callback)
model_phase2.save(model_path)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs/testing training freq_2024-07-14_07-22-29/DQN_phase2_0


Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
Method selection:                    LAP_LFO
Method selection:                    LAPH-GA
long
Method selection:                    LAPH-GA
Method selection:                    GA
Method selection:                    GA
Method selection:                    GA
long
Method selection:                    CDR3
long
Method selection:                    CDR3
long
Method selection:                    RCRS
long
Method selection:                    CDR3
long
Method selection:                    CDR3
Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
long
Method selection:                    GA
Method selection:                    CDR2
long
Method selection:                    CDR3
long
Method selection:                    CDR6
long
Method selection:                    CDR5
Method selection:                    CDR3
Method sel

In [17]:
# Phase 3 training
from stable_baselines3 import DQN
model_phase3 = DQN.load(model_path, 
                        env=env3, 
                        verbose=1, 
                        tensorboard_log=logdir, 
                        train_freq=(1, "episode"),
                        target_update_interval=2000
                        # ,exploration_initial_eps=0.20, 
                        # exploration_final_eps=0.1, 
                        # exploration_fraction=explore_fraction_phase3
                        )

model_phase3.exploration_schedule.initial_p = 0.20
model_phase3.exploration_schedule.final_p = 0.05
model_phase3.exploration_schedule.schedule_timesteps = explore_fraction_phase3 * 10000

model_phase3.learn(total_timesteps=10000, 
                   tb_log_name="DQN_phase3",
                   log_interval=3,
                   reset_num_timesteps=False,
                   callback=callback)
model_phase3.save(model_path)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs/testing training freq_2024-07-14_07-22-29/DQN_phase3_0


Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
long
Method selection:                    RCRS
Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
Method selection:                    LFOH
Method selection:                    LAPH-GA
Method selection:                    CDR6
long
Method selection:                    CDR6
long
Method selection:                    CDR6
Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
Method selection:                    LAPH-GA
long
Method selection:                    GA
Method selection:                    GA
Method selection:                    GA
urgent
Method selection:                    CDR6
long
urgent
Method selection:                    CDR6
urgent
Method selection:                    CDR6
urg

In [None]:
with open('VALIDATION/SMALL/pickle_valid_instances_480.pkl', 'rb') as f:
    valid_instances = pickle.load(f)
with open('VALIDATION/SMALL/pickle_valid_scenarios_480.pkl', 'rb') as f:
    valid_scenarios = pickle.load(f)

results = []
method = 'predictive-reactive DQN'
InstanceList = [f'valid{i+1}' for i in range(12)]
ScenarioList = ['A', 'B', 'C']

valenv = FJSP_under_uncertainties_Env(False, False, valid_instances, valid_scenarios, K, WeibullDistribution, critical_machines, 
                                      ReworkProbability, planning_horizon, PopSize, maxtime, maxJob, maxOpe, reward_ratio)


model = DQN.load(model_path, env=valenv)

for instance_id in InstanceList:
    print("-----------", instance_id)
    for scenario_id in ScenarioList:
        print("-----", scenario_id)
        # Reset the environment with the new dataset
        valenv.reset(test=True, 
                  datatest=instance_id, 
                  scenariotest=scenario_id)
        
        obs, info = valenv.reset()
        done = False
        
        while not done:
            action, _states = model.predict(obs)
            obs, reward, done, truncated, info = valenv.step(action)
        
        tardiness = valenv.calc_tardiness()
    
        results.append({
                        'Method'    : method,
                        'InstanceID': instance_id,
                        'ScenarioID': scenario_id,
                        'Tardiness' : tardiness
                        })

df = pd.DataFrame(results)
file_name = f"VALIDATION/{purpose}.xlsx"
df.to_excel(file_name, index=False)