In [1]:
import os
import torch
from rl.agent import DQNAgent
from gymjsp.jsspenv import HeuristicJsspEnv
from tianshou_ppo import tianshou_ppo_train
from ortools_scheduler import ORtools_scheduler
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd

In [2]:
instances = ["ft06", "la01", "la06", "la11", "la21", "la31", "la36", "orb01", "swv01", "swv06", "swv11", "yn1"]

num_episodes = 1000
memory_size = 100000
batch_size = 64
target_update = 100
noisy = False
plotting_inteval = 10

#### 随机环境

In [3]:
random_rate = 0.5
cv = 0.2
n = 10 

In [4]:
directory = f"figs_no_future_infomation/p{random_rate}cv{cv}num_episodes{num_episodes}_dqn"

if not os.path.exists(directory):
    os.makedirs(directory)

ret = {}

ortools_mean, ortools_std, policy_mean, policy_std = [], [], [], []
ortools_on_original, policy_on_original = [], []
optimal_mean, optimal_std = [], []
ortools_300s_optimal_rate = []
current_instances = []

for instance in instances:
    sols_directory = f"sols/{instance}/p{random_rate}cv{cv}"
    policy_file = f"policies/dqn_mlp/{instance}_num_episodes={num_episodes}_memory_size={memory_size}_target_update={target_update}_noisy={noisy}.pth"
    env = HeuristicJsspEnv(instance)
    agent = DQNAgent(env, memory_size, batch_size, target_update, noisy=noisy)
    agent.load_dqn(policy_file)
    makespan = agent.test()
    model = agent._get_dqn()
    #print(next(model.parameters()).device)

    #makespan, policy = tianshou_ppo_train(instance_name=instance, max_epoch=epochs)
    ret[instance] = makespan

    scheduler = ORtools_scheduler(instance)
    #scheduler.optimize()
    #obj_val = scheduler.obj_val
    scheduler.read_solution()           # 读取静态解
    obj_val = scheduler.compute_makespan()

    policy_vals, ortools_vals, optimal_vals, if_optimals = [], [], [], []

    for i in range(n):
        #times = scheduler.shifted_time_(random_rate=random_rate, cv=cv)
        #policy_val = scheduler.policy_makespan('ppo', policy, shifted_time=times)

        scheduler.load_time_mat(os.path.join(sols_directory, f"{i}.npy"))
        times = scheduler.times
        
        policy_val = scheduler.policy_makespan('dqn', model, shifted_time=times)
        ortools_val = scheduler.compute_makespan(shifted_time=times)        # 静态调度面对工时波动

        #if_optimal, optimal_val = scheduler.get_optimal_of_new_time_mat(times)

        policy_vals.append(policy_val)
        ortools_vals.append(ortools_val)
        #optimal_vals.append(optimal_val)
        #if_optimals.append(int(if_optimal))

    info_df = pd.read_csv(os.path.join(sols_directory, "info.csv")) ##############################
    optimal_vals, if_optimals = info_df['obj_val'].values.tolist(), info_df['optimal'].values.tolist()

    plt.plot(policy_vals, color='g', label='policy')
    plt.plot(ortools_vals, color='r', label='ortools_static')
    plt.plot(optimal_vals, color='blue', label='ortools_300s')
    policy_vals, ortools_vals, optimal_vals = np.array(policy_vals), np.array(ortools_vals), np.array(optimal_vals)

    plt.hlines(np.mean(ortools_vals), -2, n+2, linestyles='dotted', colors='r')
    plt.hlines(np.mean(policy_vals), -2, n+2, linestyles='dotted', colors='g')
    plt.hlines(np.mean(optimal_vals), -2, n+2, linestyles='dotted', colors='blue')
    scatter_x = np.where(if_optimals)
    scatter_y = np.array(optimal_vals)[scatter_x]
    plt.scatter(scatter_x, scatter_y, color='blue')



    ortools_mean.append(np.mean(ortools_vals))
    ortools_std.append(np.std(ortools_vals))
    policy_mean.append(np.mean(policy_vals))
    policy_std.append(np.std(policy_vals))
    optimal_mean.append(np.mean(optimal_vals))
    optimal_std.append(np.std(optimal_vals))
    ortools_on_original.append(obj_val)
    policy_on_original.append(makespan)
    ortools_300s_optimal_rate.append(np.mean(if_optimals))
    current_instances.append(instance)

    plt.xlabel('trial')
    plt.ylabel('makespan')
    plt.title(f"random_rate={random_rate},cv={cv},instance={instance}")
    plt.legend()
    plt.savefig(f"{directory}/policy_vs_ortools_{instance}.png")
    plt.clf()

    
df = pd.DataFrame()
# 将每个列添加到 DataFrame 中
df['instance'] = current_instances
df['ortools_mean'] = ortools_mean
df['policy_mean'] = policy_mean
df['optimal_mean'] = optimal_mean
df['ortools_std'] = ortools_std
df['policy_std'] = policy_std
df['optimal_std'] = optimal_std
df['ortools_on_original'] = ortools_on_original
df['policy_on_original'] = policy_on_original
df['ortools_300s_optimal_rate'] = ortools_300s_optimal_rate

if os.path.exists(f"{directory}/data.csv"):
    df2 = pd.read_csv(f"{directory}/data.csv")
    df2 = df2.append(df, ignore_index=True)
    df2.to_csv(f"{directory}/data.csv", index=False)
else:
    df.to_csv(f"{directory}/data.csv", index=False)

FileNotFoundError: [Errno 2] No such file or directory: 'policies/dqn_mlp/swv11_num_episodes=1000_memory_size=100000_target_update=100_noisy=False.pth'

<Figure size 640x480 with 0 Axes>

In [5]:
ret

{'ft06': 65,
 'la01': 675,
 'la06': 967,
 'la11': 1259,
 'la21': 1234,
 'la31': 1819,
 'la36': 1453,
 'orb01': 1139,
 'swv01': 1635,
 'swv06': 2077}