From 4a72e81a6cca5d80ba41520f65447dbb92deca8b Mon Sep 17 00:00:00 2001 From: Ashkan Y Date: Tue, 16 Jul 2019 17:27:12 -0700 Subject: [PATCH] Added reward and Apply RL actions --- flow/envs/green_wave_env.py | 5 +- tutorials/tutorial11_traffic_lights.ipynb | 99 ++++++++++++++++++++++- 2 files changed, 99 insertions(+), 5 deletions(-) diff --git a/flow/envs/green_wave_env.py b/flow/envs/green_wave_env.py index 5b8b02fa9..f36d907ba 100644 --- a/flow/envs/green_wave_env.py +++ b/flow/envs/green_wave_env.py @@ -200,8 +200,9 @@ def _apply_rl_actions(self, rl_actions): rl_mask = [int(x) for x in list('{0:0b}'.format(rl_actions))] rl_mask = [0] * (self.num_traffic_lights - len(rl_mask)) + rl_mask else: - # convert values less than 0.5 to zero and above to 1. 0's indicate - # that should not switch the direction + # convert values less than 0 to zero and above 0 to 1. 0 indicates + # that should not switch the direction, and 1 indicates that switch + # should happen rl_mask = rl_actions > 0.0 for i, action in enumerate(rl_mask): diff --git a/tutorials/tutorial11_traffic_lights.ipynb b/tutorials/tutorial11_traffic_lights.ipynb index 015cca1e7..393e5e5bf 100644 --- a/tutorials/tutorial11_traffic_lights.ipynb +++ b/tutorials/tutorial11_traffic_lights.ipynb @@ -442,7 +442,7 @@ " return Discrete(2 ** self.num_traffic_lights)\n", " else:\n", " return Box(\n", - " low=-1,\n", + " low=0,\n", " high=1,\n", " shape=(self.num_traffic_lights,),\n", " dtype=np.float32)" @@ -452,6 +452,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "In the case that the action space is discrete, we need 1-bit (that can be 0 or 1) for the action of each traffic light node. Hence, we need `self.num_traffic_lights` bits to represent the action spacs. To make a `self.num_traffic_lights`-bit number, we use the pyhton's `Discrete(range)`, and since we have `self.num_traffic_lights` bits, the `range` will be 2^`self.num_traffic_lights`.\n", + "\n", + "In the case that the action space is continuous, we use a range (that is currently (0,1)) of numbers for each traffic light node. Hence, we will define `self.num_traffic_lights` \"Boxes\", each in the range (0,1). \n", + "\n", "Note that the variable `num_traffic_lights` is actually the number of intersections in the grid system, not the number of traffic lights. Number of traffic lights in our example is 4 times the number of intersections" ] }, @@ -562,7 +566,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "...." + "The agents in an RL scenario will learn to maximize a certain reward. This objective can be defined in terms of maximizing rewards or minimizing the penalty. In this example, we penalize the large delay and boolean actions that indicate a switch (with the negative sign)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def compute_reward(self, rl_actions, **kwargs):\n", + " return - rewards.min_delay_unscaled(self) - rewards.boolean_action_penalty(rl_actions >= 0.5, gain=1.0)" ] }, { @@ -576,7 +590,86 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "..." + "In the `_apply_rl_actions` function, we specify what actions our agents should take in the environment. In this example, the agents (traffic light nodes) decide based on the action value how to change the traffic lights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def _apply_rl_actions(self, rl_actions):\n", + " \"\"\"See class definition.\"\"\"\n", + " # check if the action space is discrete\n", + " if self.discrete:\n", + " # convert single value to list of 0's and 1's\n", + " rl_mask = [int(x) for x in list('{0:0b}'.format(rl_actions))]\n", + " rl_mask = [0] * (self.num_traffic_lights - len(rl_mask)) + rl_mask\n", + " else:\n", + " # convert values less than 0.5 to zero and above 0.5 to 1. 0 \n", + " # indicates that should not switch the direction, and 1 indicates\n", + " # that switch should happen\n", + " rl_mask = rl_actions > 0.5\n", + "\n", + " # Loop through the traffic light nodes \n", + " for i, action in enumerate(rl_mask):\n", + " if self.currently_yellow[i] == 1: # currently yellow\n", + " # Code to change from yellow to red\n", + " ...\n", + " else:\n", + " # Code to change to yellow\n", + " ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are the portions of the code that are hidden from the above code for shortening the code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " # Code to change from yellow to red\n", + " self.last_change[i] += self.sim_step\n", + " # Check if our timer has exceeded the yellow phase, meaning it\n", + " # should switch to red\n", + " if self.last_change[i] >= self.min_switch_time:\n", + " if self.direction[i] == 0:\n", + " self.k.traffic_light.set_state(\n", + " node_id='center{}'.format(i),\n", + " state=\"GrGr\")\n", + " else:\n", + " self.k.traffic_light.set_state(\n", + " node_id='center{}'.format(i),\n", + " state='rGrG')\n", + " self.currently_yellow[i] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " # Code to change to yellow\n", + " if action:\n", + " if self.direction[i] == 0:\n", + " self.k.traffic_light.set_state(\n", + " node_id='center{}'.format(i),\n", + " state='yryr')\n", + " else:\n", + " self.k.traffic_light.set_state(\n", + " node_id='center{}'.format(i),\n", + " state='ryry')\n", + " self.last_change[i] = 0.0\n", + " self.direction[i] = not self.direction[i]\n", + " self.currently_yellow[i] = 1" ] } ],