Reference notebook to check if the environment correctly implements the paper's rules.

If the final cell runs properly, it means the state transitions and rewards are computed correctly.

In [1]:
from envs import BlendEnv, flatten_and_track_mappings, reconstruct_dict

In [2]:
act_1 = {
    "source_blend": {
        "s1": {
            "j1": 10,
            "j2": 0,
            "j3": 0,
            "j4": 0
        },
        "s2": {
            "j1": 10,
            "j2": 0,
            "j3": 20,
            "j4": 0
        }
    },
    "blend_blend": {
        "j1": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j2": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j3": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j4": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j5": {},
        "j6": {},
        "j7": {},
        "j8": {}
    },
    "blend_demand": {
        "j1": {},
        "j2": {},
        "j3": {},
        "j4": {},
        "j5": {
            "p1": 0,
            "p2": 0
        },
        "j6": {
            "p1": 0,
            "p2": 0
        },
        "j7": {
            "p1": 0,
            "p2": 0
        },
        "j8": {
            "p1": 0,
            "p2": 0
        }
    },
    "tau": {
        "s1": 10,
        "s2": 30
    },
    "delta": {
        "p1": 0,
        "p2": 0
    }
}

In [3]:
act_2 = {
    "source_blend": {
        "s1": {
            "j1": 0,
            "j2": 10,
            "j3": 0,
            "j4": 0
        },
        "s2": {
            "j1": 0,
            "j2": 10,
            "j3": 0,
            "j4": 20
        }
    },
    "blend_blend": {
        "j1": {
            "j5": 15,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j2": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j3": {
            "j5": 0,
            "j6": 0,
            "j7": 15,
            "j8": 0
        },
        "j4": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j5": {},
        "j6": {},
        "j7": {},
        "j8": {}
    },
    "blend_demand": {
        "j1": {},
        "j2": {},
        "j3": {},
        "j4": {},
        "j5": {
            "p1": 0,
            "p2": 0
        },
        "j6": {
            "p1": 0,
            "p2": 0
        },
        "j7": {
            "p1": 0,
            "p2": 0
        },
        "j8": {
            "p1": 0,
            "p2": 0
        }
    },
    "tau": {
        "s1": 10,
        "s2": 30
    },
    "delta": {
        "p1": 0,
        "p2": 0
    }
}

In [4]:
act_3 = {
    "source_blend": {
        "s1": {
            "j1": 10,
            "j2": 0,
            "j3": 0,
            "j4": 0
        },
        "s2": {
            "j1": 10,
            "j2": 0,
            "j3": 20,
            "j4": 0
        }
    },
    "blend_blend": {
        "j1": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j2": {
            "j5": 0,
            "j6": 20,
            "j7": 0,
            "j8": 0
        },
        "j3": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j4": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 20
        },
        "j5": {},
        "j6": {},
        "j7": {},
        "j8": {}
    },
    "blend_demand": {
        "j1": {},
        "j2": {},
        "j3": {},
        "j4": {},
        "j5": {
            "p1": 15,
            "p2": 0
        },
        "j6": {
            "p1": 0,
            "p2": 0
        },
        "j7": {
            "p1": 0,
            "p2": 15
        },
        "j8": {
            "p1": 0,
            "p2": 0
        }
    },
    "tau": {
        "s1": 10,
        "s2": 30
    },
    "delta": {
        "p1": 15,
        "p2": 15
    }
}

In [5]:
act_4 = {
    "source_blend": {
        "s1": {
            "j1": 0,
            "j2": 0,
            "j3": 0,
            "j4": 0
        },
        "s2": {
            "j1": 0,
            "j2": 0,
            "j3": 0,
            "j4": 0
        }
    },
    "blend_blend": {
        "j1": {
            "j5": 15,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j2": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j3": {
            "j5": 0,
            "j6": 0,
            "j7": 15,
            "j8": 0
        },
        "j4": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j5": {},
        "j6": {},
        "j7": {},
        "j8": {}
    },
    "blend_demand": {
        "j1": {},
        "j2": {},
        "j3": {},
        "j4": {},
        "j5": {
            "p1": 0,
            "p2": 0
        },
        "j6": {
            "p1": 15,
            "p2": 0
        },
        "j7": {
            "p1": 0,
            "p2": 0
        },
        "j8": {
            "p1": 0,
            "p2": 15
        }
    },
    "tau": {
        "s1": 0,
        "s2": 0
    },
    "delta": {
        "p1": 15,
        "p2": 15
    }
}

In [6]:
act_5 = {
    "source_blend": {
        "s1": {
            "j1": 0,
            "j2": 0,
            "j3": 0,
            "j4": 0
        },
        "s2": {
            "j1": 0,
            "j2": 0,
            "j3": 0,
            "j4": 0
        }
    },
    "blend_blend": {
        "j1": {
            "j5": 0,
            "j6": 10,
            "j7": 0,
            "j8": 0
        },
        "j2": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j3": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 10
        },
        "j4": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j5": {},
        "j6": {},
        "j7": {},
        "j8": {}
    },
    "blend_demand": {
        "j1": {},
        "j2": {},
        "j3": {},
        "j4": {},
        "j5": {
            "p1": 15,
            "p2": 0
        },
        "j6": {
            "p1": 0,
            "p2": 0
        },
        "j7": {
            "p1": 0,
            "p2": 15
        },
        "j8": {
            "p1": 0,
            "p2": 0
        }
    },
    "tau": {
        "s1": 0,
        "s2": 0
    },
    "delta": {
        "p1": 15,
        "p2": 15
    }
}

In [7]:
act_6 = {
    "source_blend": {
        "s1": {
            "j1": 0,
            "j2": 0,
            "j3": 0,
            "j4": 0
        },
        "s2": {
            "j1": 0,
            "j2": 0,
            "j3": 0,
            "j4": 0
        }
    },
    "blend_blend": {
        "j1": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j2": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j3": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j4": {
            "j5": 0,
            "j6": 0,
            "j7": 0,
            "j8": 0
        },
        "j5": {},
        "j6": {},
        "j7": {},
        "j8": {}
    },
    "blend_demand": {
        "j1": {},
        "j2": {},
        "j3": {},
        "j4": {},
        "j5": {
            "p1": 0,
            "p2": 0
        },
        "j6": {
            "p1": 15,
            "p2": 0
        },
        "j7": {
            "p1": 0,
            "p2": 0
        },
        "j8": {
            "p1": 0,
            "p2": 15
        }
    },
    "tau": {
        "s1": 0,
        "s2": 0
    },
    "delta": {
        "p1": 15,
        "p2": 15
    }
}

In [8]:
act_list = [act_1, act_2, act_3, act_4, act_5, act_6]

In [9]:
env = BlendEnv(v=True, Z = 1)

ACTION: {'source_blend': {'s1': {'j1': 0, 'j2': 0, 'j3': 0, 'j4': 0}, 's2': {'j1': 0, 'j2': 0, 'j3': 0, 'j4': 0}}, 'blend_blend': {'j1': {'j5': 0, 'j6': 0, 'j7': 0, 'j8': 0}, 'j2': {'j5': 0, 'j6': 0, 'j7': 0, 'j8': 0}, 'j3': {'j5': 0, 'j6': 0, 'j7': 0, 'j8': 0}, 'j4': {'j5': 0, 'j6': 0, 'j7': 0, 'j8': 0}, 'j5': {}, 'j6': {}, 'j7': {}, 'j8': {}}, 'blend_demand': {'j1': {}, 'j2': {}, 'j3': {}, 'j4': {}, 'j5': {'p1': 0, 'p2': 0}, 'j6': {'p1': 0, 'p2': 0}, 'j7': {'p1': 0, 'p2': 0}, 'j8': {'p1': 0, 'p2': 0}}, 'tau': {'s1': 0, 's2': 0}, 'delta': {'p1': 0, 'p2': 0}}
action space shape: 36


In [10]:
episode_rewards = []
obs = env.reset()
obs, obs_dict = obs
for action in act_list:
    # san_action = env.sanitize_action(action)
    action_flatt, mapp = flatten_and_track_mappings(action)
    # print(action_flatt.shape)
    obs, reward, done, term, _ = env.step(action_flatt)
    print("After step:",env.t)
    # print(env.delta0["p1"][env.t])
    obs_clean = reconstruct_dict(obs, env.mapping_obs)
    # print("blend_demand:",action["blend_demand"], "\ndelta:",action["delta"])
    # print("state:",obs_clean)
    print("reward:",round(reward,3), '\n')

sanitizing action structure... {'source_blend': {'s1': {'j1': 10.0, 'j2': 0.0, 'j3': 0.0, 'j4': 0.0}, 's2': {'j1': 10.0, 'j2': 0.0, 'j3': 20.0, 'j4': 0.0}}, 'blend_blend': {'j1': {'j5': 0.0, 'j6': 0.0, 'j7': 0.0, 'j8': 0.0}, 'j2': {'j5': 0.0, 'j6': 0.0, 'j7': 0.0, 'j8': 0.0}, 'j3': {'j5': 0.0, 'j6': 0.0, 'j7': 0.0, 'j8': 0.0}, 'j4': {'j5': 0.0, 'j6': 0.0, 'j7': 0.0, 'j8': 0.0}}, 'blend_demand': {'j5': {'p1': 0.0, 'p2': 0.0}, 'j6': {'p1': 0.0, 'p2': 0.0}, 'j7': {'p1': 0.0, 'p2': 0.0}, 'j8': {'p1': 0.0, 'p2': 0.0}}, 'tau': {'s1': 10.0, 's2': 30.0}, 'delta': {'p1': 0.0, 'p2': 0.0}}
After step: 1
reward: -0.3 

sanitizing action structure... {'source_blend': {'s1': {'j1': 0.0, 'j2': 10.0, 'j3': 0.0, 'j4': 0.0}, 's2': {'j1': 0.0, 'j2': 10.0, 'j3': 0.0, 'j4': 20.0}}, 'blend_blend': {'j1': {'j5': 15.0, 'j6': 0.0, 'j7': 0.0, 'j8': 0.0}, 'j2': {'j5': 0.0, 'j6': 0.0, 'j7': 0.0, 'j8': 0.0}, 'j3': {'j5': 0.0, 'j6': 0.0, 'j7': 15.0, 'j8': 0.0}, 'j4': {'j5': 0.0, 'j6': 0.0, 'j7': 0.0, 'j8': 0.0}}, '

In [11]:
assert reward == 177.5