In [1]:
from tianshou_ppo import tianshou_ppo_train
from ortools_scheduler import ORtools_scheduler
from matplotlib import pyplot as plt
import numpy as np
import os

In [2]:
instances = ["ft06", "la01", "la06", "la11", "la21", "la31", "la36", "orb01", "swv01", "swv06", "swv11", "yn1"]
test_instances = ["ft06"]

In [3]:
random_rate = 0.5
cv = 0.2
epochs = 10

In [4]:
%%capture hide_output
directory = f"figs/p{random_rate}cv{cv}epochs{epochs}"
if not os.path.exists(directory):
    os.makedirs(directory)

ret = {}
output = ""
for instance in instances:
    makespan, policy = tianshou_ppo_train(instance_name=instance, max_epoch=epochs, random_rate=random_rate, cv=cv)
    ret[instance] = makespan

    scheduler = ORtools_scheduler(instance)
    scheduler.optimize()
    obj_val = scheduler.obj_val

    n = 10
    policy_vals, ortools_vals, optimal_vals = [], [], []
    for _ in range(n):
        times = scheduler.shifted_time_(random_rate=random_rate, cv=cv)
        policy_val = scheduler.policy_makespan('ppo', policy, shifted_time=times)
        ortools_val = scheduler.compute_makespan(shifted_time=times)        # 静态调度面对工时波动

        optimal_val = scheduler.get_optimal_of_new_time_mat(times)

        policy_vals.append(policy_val)
        ortools_vals.append(ortools_val)
        optimal_vals.append(optimal_val)

    plt.plot(policy_vals, color='g', label='policy')
    plt.plot(ortools_vals, color='r', label='ortools')
    plt.plot(optimal_vals, color='blue', label='optimal')
    policy_vals, ortools_vals, optimal_vals = np.array(policy_vals), np.array(ortools_vals), np.array(optimal_vals)

    plt.hlines(np.mean(ortools_vals), -2, n+2, linestyles='dotted', colors='r')
    plt.hlines(np.mean(policy_vals), -2, n+2, linestyles='dotted', colors='g')
    plt.hlines(np.mean(optimal_vals), -2, n+2, linestyles='dotted', colors='blue')

    output += f"Instance : {instance}\n"
    output += f"random rate={random_rate}, cv={cv}, epochs={epochs}"
    output += f"ortools makespan on 原始算例 is {obj_val}, policy makespan on 原始算例 is {makespan}\n"
    output += f"mean of makespan when time mat varies, ortools:{np.mean(ortools_vals)}, policy:{np.mean(policy_vals)}\n"
    output += f"std of makespan when time mat varies, ortools:{np.std(ortools_vals)}, policy:{np.std(policy_vals)}\n"

    plt.xlabel('trial')
    plt.ylabel('makespan')
    plt.title(f"random_rate={random_rate},cv={cv},instance={instance}")
    plt.legend()
    plt.savefig(f"{directory}/policy_vs_ortools_{instance}.png")
    plt.clf()

Model trivially infeasible, variable 26 has lower bound 18 and LowerOrEqual() was called with an upper bound of -1


In [5]:
print(output)

Instance : ft06
random rate=0.5, cv=0.2, epochs=10ortools makespan on 原始算例 is 55.0, policy makespan on 原始算例 is 68
mean of makespan when time mat varies, ortools:66.2, policy:67.5
std of makespan when time mat varies, ortools:6.257795138864806, policy:8.811923740024081
Instance : la01
random rate=0.5, cv=0.2, epochs=10ortools makespan on 原始算例 is 666.0, policy makespan on 原始算例 is 751
mean of makespan when time mat varies, ortools:891.8, policy:878.8
std of makespan when time mat varies, ortools:89.15357536296568, policy:117.12369529689542
Instance : la06
random rate=0.5, cv=0.2, epochs=10ortools makespan on 原始算例 is 926.0, policy makespan on 原始算例 is 1098
mean of makespan when time mat varies, ortools:1200.3, policy:1088.0
std of makespan when time mat varies, ortools:69.51985327947693, policy:155.13993683123633
Instance : la11
random rate=0.5, cv=0.2, epochs=10ortools makespan on 原始算例 is 1222.0, policy makespan on 原始算例 is 1222
mean of makespan when time mat varies, ortools:1540.4, policy:

In [6]:
ret

{'ft06': 68,
 'la01': 751,
 'la06': 1098,
 'la11': 1222,
 'la21': 1324,
 'la31': 1951,
 'la36': 1799,
 'orb01': 1458,
 'swv01': 1737,
 'swv06': 2140,
 'swv11': 3714,
 'yn1': 1196}