# 指派问题简易环境



In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from heapq import heappush, heappop

In [49]:
from IPython.core.interactiveshell import InteractiveShell 
InteractiveShell.ast_node_interactivity = 'all'

In [73]:
# 环境参数
N_QC = 3       # 岸桥数量
N_ARMG = 2     # 场桥数量
N_VEHICLE = 3  # 运输车数量

# 距离矩阵，大小为：N_QC + N_ARMG + 1，最后一行/列为车辆起始位置
DIST_MAT = np.array([
    [0, 1, 2, 4, 6, 3],
    [1, 0, 1, 4, 4, 3],
    [2, 1, 0, 6, 4, 3],
    [4, 4, 6, 0, 1, 3],
    [6, 4, 4, 1, 0, 3],
    [3, 3, 3, 3, 3, 0]
])
assert DIST_MAT.shape == (N_QC + N_ARMG + 1, N_QC + N_ARMG + 1)

QC_PREPARE_TIME = 30    # 岸桥装/卸箱准备时间
QC_LIFT_TIME = 10       # 岸桥吊装时间
ARMG_PREPARE_TIME = 20  # 场桥装/卸箱准备时间
ARMG_LIFT_TIME = 6      # 场桥吊装时间

MIN_IDLE_TIME = 5       # 车辆最短闲置时间

In [122]:
class Simulation:
    """
    仿真类负责计算各个设备完成任务的时间，环境依赖仿真推进任务队列的执行。
    """
    def __init__(self):
        self.equip_state = {i: [] for i in range(N_QC + N_ARMG)}
        self.vehicle_position = [N_QC + N_ARMG] * N_VEHICLE
        
        self.future_events = [(0, i) for i in range(N_VEHICLE)]
        self.now = 0
    
    def current_state(self):
        return {
            "equip_state": self.equip_state,
            "vehicle_position": self.vehicle_position,
            "future_events": self.future_events,
            "now": self.now
        }
    
    def peek_future(self):
        return self.future_events[0]
    
    def predict_travel_time(self, i_vehicle, i_equip):
        vehicle_pos = self.vehicle_position[i_vehicle]
        return DIST_MAT[vehicle_pos, i_equip] * 10
    
    def get_equip_state(self, i_equip):
        state = self.equip_state[i_equip]
        if len(state) == 0:
            return 0
        return state[-1]
    
    def do_idle(self, i_vehicle):
        recycle_time = self.predict_travel_time(self.vehicle_position[i_vehicle], len(DIST_MAT) - 1)
        idle_time = max(recycle_time, MIN_IDLE_TIME)
        self.vehicle_position[i_vehicle] = len(DIST_MAT) - 1
        return self.now + idle_time
    
    def do_task(self, task, i_vehicle, phase=1):
        i_equip = task[0] if phase == 0 else task[1]
        
        last_time = self.get_equip_state(i_equip)
        prepare_time = QC_PREPARE_TIME if i_equip < N_QC else ARMG_PREPARE_TIME
        lift_time = QC_LIFT_TIME if i_equip < N_QC else ARMG_LIFT_TIME
        if phase == 0:
            arrival_time = self.now + self.predict_travel_time(i_vehicle, i_equip)
        else:
            depart_time = self.do_task(task, i_vehicle, 0)
            arrival_time = depart_time + self.predict_travel_time(i_vehicle, i_equip)
        finish_time = max(last_time + prepare_time, arrival_time) + lift_time
        
        self.equip_state[i_equip].append(finish_time)
        self.vehicle_position[i_vehicle] = i_equip
        return finish_time
        
    def resume(self, task):
        self.now, i_vehicle = heappop(self.future_events)
        if task is None:
            next_time = self.do_idle(i_vehicle)
        else:
            next_time = self.do_task(task, i_vehicle)
        heappush(self.future_events, (next_time, i_vehicle))
        return next_time

In [125]:
tasks = [
    (0, 3),
    (4, 0),
    (3, 1),
    (3, 2)
]
simulation = Simulation()
for task in tasks:
    print(f"Simulation current state: {simulation.current_state()}.")
    future = simulation.peek_future()
    print(f"Vehicle {future[1]} is available at {future[0]}s.")
    _ = simulation.resume(task)
    print(f"Task {task} is done.")
print(f"Simulation current state: {simulation.current_state()}.")
future = simulation.peek_future()
print(f"Vehicle {future[1]} back to origin at {future[0]}s.")
_ = simulation.resume(None)
print(f"Simulation current state: {simulation.current_state()}.")

Simulation current state: {'equip_state': {0: [], 1: [], 2: [], 3: [], 4: []}, 'vehicle_position': [5, 5, 5], 'future_events': [(0, 0), (0, 1), (0, 2)], 'now': 0}.
Vehicle 0 is available at 0s.
Task (0, 3) is done.
Simulation current state: {'equip_state': {0: [40], 1: [], 2: [], 3: [86], 4: []}, 'vehicle_position': [3, 5, 5], 'future_events': [(0, 1), (0, 2), (86, 0)], 'now': 0}.
Vehicle 1 is available at 0s.
Task (4, 0) is done.
Simulation current state: {'equip_state': {0: [40, 106], 1: [], 2: [], 3: [86], 4: [36]}, 'vehicle_position': [3, 0, 5], 'future_events': [(0, 2), (86, 0), (106, 1)], 'now': 0}.
Vehicle 2 is available at 0s.
Task (3, 1) is done.
Simulation current state: {'equip_state': {0: [40, 106], 1: [162], 2: [], 3: [86, 112], 4: [36]}, 'vehicle_position': [3, 0, 1], 'future_events': [(86, 0), (106, 1), (162, 2)], 'now': 0}.
Vehicle 0 is available at 86s.
Task (3, 2) is done.
Simulation current state: {'equip_state': {0: [40, 106], 1: [162], 2: [208], 3: [86, 112, 138], 

In [128]:
def check_tasks(tasks):
    for task in tasks:
        assert type(task) is tuple and len(task) == 2
        assert task[0] != tasks[1] and 0 <= task[0] < N_QC + N_ARMG and 0 <= task[1] < N_QC + N_ARMG
    return tasks

def generate_task_group(tasks):
    group = {i: [] for i in range(N_QC)}
    for i, task in enumerate(tasks):
        if task[0] < N_QC:
            group[task[0]].append(i)
        if task[1] < N_QC:
            group[task[1]].append(i)
    return group

def generate_equip_map(tasks):
    equip_map = {}
    for i, task in enumerate(tasks):
        if task[0] < N_QC:
            equip_map[i] = task[0]
        if task[1] < N_QC:
            equip_map[i] = task[1]
    return equip_map
    
    
class Environment:
    def __init__(self, tasks):
        self.tasks = check_tasks(tasks)
        self.task_group = generate_task_group(tasks)
        self.equip_map = generate_equip_map(tasks)
        
        self.action_space = np.arange(len(tasks))
        self.observation_space = None
        
        self.reset()
    
    def get_next_task(self, i_task):
        i_equip = self.equip_map[i_task]
        for i in range(i_task+1, len(self.tasks)):
            if self.equip_map[i] == i_equip:
                return i
        return None
    
    def get_selectable_tasks(self):
        return np.argwhere(np.sum(self.observation, axis=0) == 2).T[0]
        
    def reset(self):
        self.sim = Simulation()
        
        self.observation = np.array([
            [1 for i in range(len(self.tasks))],
            [0 for i in range(len(self.tasks))]
        ])
        selectable_tasks = [tasks[0] for tasks in self.task_group.values()]
        self.observation[1, selectable_tasks] = 1
        return self.observation
    
    def step(self, i_task):
        if i_task < 0:
            self.sim.resume(None)
            return self.observation, 0, False, None
        
        self.observation[0, i_task] = 0
        i_next_task = self.get_next_task(i_task)
        if i_next_task:
            self.observation[1, i_next_task] = 1
        
        task_finish_time = self.sim.resume(self.tasks[i_task])
        reward = 1 / task_finish_time
        
        done = np.sum(self.observation[0]) == 0
        return self.observation, reward, done, None

In [129]:
env = Environment(tasks)
print(f"Task group: {env.task_group}")
print(f"Equip map: {env.equip_map}")
print(f"Current observation: \n{env.observation}")

while True:
    selectable_tasks = env.get_selectable_tasks()
    if len(selectable_tasks) == 0:
        break
    new_observation, reward, done, _ = env.step(selectable_tasks[0])
    print(f"Selectable tasks: {selectable_tasks}, selected {selectable_tasks[0]}, reward is {reward}.")
    print(f"Current observation: \n{new_observation}.")
    if done:
        print(f"Tasks finished.")

Task group: {0: [0, 1], 1: [2], 2: [3]}
Equip map: {0: 0, 1: 0, 2: 1, 3: 2}
Current observation: 
[[1 1 1 1]
 [1 0 1 1]]
Selectable tasks: [0 2 3], selected 0, reward is 0.011627906976744186.
Current observation: 
[[0 1 1 1]
 [1 1 1 1]].
Selectable tasks: [1 2 3], selected 1, reward is 0.009433962264150943.
Current observation: 
[[0 0 1 1]
 [1 1 1 1]].
Selectable tasks: [2 3], selected 2, reward is 0.006172839506172839.
Current observation: 
[[0 0 0 1]
 [1 1 1 1]].
Selectable tasks: [3], selected 3, reward is 0.004807692307692308.
Current observation: 
[[0 0 0 0]
 [1 1 1 1]].
Tasks finished.
