Switch PID implementations to seemingly more stable and one that is a…

… better match for our C++ implementation
deepdrive · Dec 21, 2018 · c2eeb70 · c2eeb70
1 parent 6344afd
commit c2eeb70
Show file tree

Hide file tree

Showing 3 changed files with 169 additions and 25 deletions.
diff --git a/agents/dagger/agent.py b/agents/dagger/agent.py
@@ -7,7 +7,7 @@
 
 import tensorflow as tf
 import numpy as np
-from simple_pid import PID
+from control.pid import PID
 
 import config as c
 import deepdrive
@@ -84,7 +84,7 @@ def __init__(self, tf_session, should_record_recovery_from_random_actions=True,
             self.net = None
             self.sess = None
 
-        self.throttle_pid = PID(Kp=0.2, Ki=0.05, Kd=0.05)
+        self.throttle_pid = PID(0.2, 0.05, 0.05)
 
 
     def act(self, obz, reward, done, episode_time=None):
@@ -187,14 +187,15 @@ def get_next_action(self, obz, net_out):
             # TODO: Support different driving styles
 
             # desired_throttle = get_throttle(actual_speed, desired_speed * 0.48)
-            desired_throttle = get_throttle(actual_speed, TARGET_MPS_TEST)
+            desired_throttle = self.get_target_throttle(obz)
 
-            desired_throttle = min(max(desired_throttle, 0.), 1.)
+            # desired_throttle = min(max(desired_throttle, 0.), 1.)
+            #
+            # if self.previous_net_out:
+            #     desired_throttle = 0.2 * self.previous_action.throttle + 0.5 * desired_throttle
+            # else:
+            #     desired_throttle = desired_throttle * 0.95
 
-            if self.previous_net_out:
-                desired_throttle = 0.2 * self.previous_action.throttle + 0.5 * desired_throttle
-            else:
-                desired_throttle = desired_throttle * 0.95
             # desired_throttle = 0.4
         else:
             # AlexNet
@@ -221,7 +222,16 @@ def get_next_action(self, obz, net_out):
             smoothed_steering = desired_steering * 0.7
 
         # desired_throttle = desired_throttle * 1.1
-        action = Action(smoothed_steering, desired_throttle)
+
+        if desired_steering < 0:
+            log.info('STEERING NEGATIVE %f', desired_throttle)
+        else:
+            log.info('STEERING POSITIVE %f', desired_steering)
+
+        if self.previous_action.steering == desired_steering:
+            log.info('STEERING NOT CHANGED')
+
+        action = Action(desired_steering, desired_throttle)
         return action
 
     def maybe_save(self):
@@ -261,12 +271,12 @@ def set_random_action_repeat_count(self):
         else:
             self.semirandom_sequence_step += 1
 
-    def toggle_random_action(self, episode_time):
+    def toggle_random_action(self, obz):
         """Reduce sampling error by randomly exploring space around non-random agent's trajectory"""
 
         if self.performing_random_actions:
             if self.sequence_action_count < self.sequence_random_action_count and self.previous_action is not None:
-                action = self.previous_action
+                action = Action(self.previous_action.steering, self.get_target_throttle(obz))
             else:
                 # switch to non-random
                 log.debug('Switching to non-random action. action_count %d random_action_count %d '
@@ -277,7 +287,8 @@ def toggle_random_action(self, episode_time):
                 self.performing_random_actions = False
         else:
             if self.sequence_action_count < self.sequence_non_random_action_count and self.previous_action is not None:
-                action = self.previous_action  # Where has_control can be False, meaning no change, i.e. Game AI is driving
+                action = Action(has_control=False)
+                world.set_ego_mph(25, 25)
             else:
                 # switch to random
                 log.debug('Switching to random action. action_count %d random_action_count %d '
@@ -287,7 +298,9 @@ def toggle_random_action(self, episode_time):
                 log.debug('random steering %f', steering)
 
                 # TODO: Make throttle random as well
-                throttle = 0.65
+                # throttle = 0.65
+                # TODO: Find out why we actually slow down when setting the cm/s to what should be the same rate as world.set_ego_speed(mpH)
+                throttle = self.get_target_throttle(obz) * 0.5  # Slow down a bit so we don't crash before recovering
                 action = Action(steering, throttle)
                 self.sequence_action_count = 0
                 self.performing_random_actions = True
@@ -301,9 +314,10 @@ def get_target_throttle(self, obz):
 
         pid = self.throttle_pid
         target_cmps = TARGET_MPS * 100
-        if pid.setpoint != target_cmps:
-            pid.setpoint = target_cmps
-        throttle = pid(actual_speed)
+        if pid.SetPoint != target_cmps:
+            pid.SetPoint = target_cmps
+        pid.update(actual_speed)
+        throttle = pid.output
         throttle = min(max(throttle, 0.), 1.)
         return throttle
 
@@ -472,14 +486,15 @@ def close():
 
 
 def domain_randomization(env, randomize_month, randomize_shadow_level, randomize_sun_speed, randomize_view_mode):
-    if randomize_view_mode:
-        env.unwrapped.set_view_mode(c.rng.choice(list(ViewMode.__members__.values())))
-    if randomize_sun_speed:
-        world.randomize_sun_speed()
-    if randomize_shadow_level:
-        graphics.randomize_shadow_level()
-    if randomize_month:
-        world.randomize_sun_month()
+    # if randomize_view_mode:
+    #     env.unwrapped.set_view_mode(c.rng.choice(list(ViewMode.__members__.values())))
+    # if randomize_sun_speed:
+    #     world.randomize_sun_speed()
+    # if randomize_shadow_level:
+    #     graphics.randomize_shadow_level()
+    # if randomize_month:
+    #     world.randomize_sun_month()
+    pass
 
 
 def setup(experiment, camera_rigs, driving_style, net_name, net_path, path_follower, recording_dir, run_baseline_agent,
@@ -519,7 +534,7 @@ def setup(experiment, camera_rigs, driving_style, net_name, net_path, path_follo
     use_sim_start_command_first_lap = c.SIM_START_COMMAND is not None
 
     def start_env():
-        return deepdrive.start(experiment_name=experiment, env_id=env_id, should_benchmark=should_benchmark,
+        return deepdrive.start(experiment=experiment, env_id=env_id, should_benchmark=should_benchmark,
                                cameras=cameras,
                                use_sim_start_command=use_sim_start_command_first_lap, render=render, fps=fps,
                                driving_style=driving_style, is_sync=is_sync, reset_returns_zero=False,

diff --git a/control/__init__.py b/control/__init__.py
diff --git a/control/pid.py b/control/pid.py
@@ -0,0 +1,129 @@
+#!/usr/bin/python
+#
+# This file is part of IvPID.
+# Copyright (C) 2015 Ivmech Mechatronics Ltd. <bilgi@ivmech.com>
+#
+# IvPID is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# IvPID is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# title           :PID.py
+# description     :python pid controller
+# author          :Caner Durmusoglu
+# date            :20151218
+# version         :0.1
+# notes           :
+# ==============================================================================
+
+"""Ivmech PID Controller is simple implementation of a Proportional-Integral-Derivative (PID) Controller in the Python Programming Language.
+More information about PID Controller: http://en.wikipedia.org/wiki/PID_controller
+"""
+import time
+
+
+class PID:
+    """PID Controller
+    """
+
+    def __init__(self, P=0.2, I=0.0, D=0.0):
+
+        self.Kp = P
+        self.Ki = I
+        self.Kd = D
+
+        self.sample_time = 0.00
+        self.current_time = time.time()
+        self.last_time = self.current_time
+
+        self.clear()
+
+    def clear(self):
+        """Clears PID computations and coefficients"""
+        self.SetPoint = 0.0
+
+        self.PTerm = 0.0
+        self.ITerm = 0.0
+        self.DTerm = 0.0
+        self.last_error = 0.0
+
+        # Windup Guard
+        self.int_error = 0.0
+        self.windup_guard = 20.0
+
+        self.output = 0.0
+
+    def update(self, feedback_value):
+        """Calculates PID value for given reference feedback
+
+        .. math::
+            u(t) = K_p e(t) + K_i \int_{0}^{t} e(t)dt + K_d {de}/{dt}
+
+        .. figure:: images/pid_1.png
+           :align:   center
+
+           Test PID with Kp=1.2, Ki=1, Kd=0.001 (test_pid.py)
+
+        """
+        error = self.SetPoint - feedback_value
+
+        self.current_time = time.time()
+        delta_time = self.current_time - self.last_time
+        delta_error = error - self.last_error
+
+        if (delta_time >= self.sample_time):
+            self.PTerm = self.Kp * error
+            self.ITerm += error * delta_time
+
+            if (self.ITerm < -self.windup_guard):
+                self.ITerm = -self.windup_guard
+            elif (self.ITerm > self.windup_guard):
+                self.ITerm = self.windup_guard
+
+            self.DTerm = 0.0
+            if delta_time > 0:
+                self.DTerm = delta_error / delta_time
+
+            # Remember last time and last error for next calculation
+            self.last_time = self.current_time
+            self.last_error = error
+
+            self.output = self.PTerm + (self.Ki * self.ITerm) + (self.Kd * self.DTerm)
+
+    def setKp(self, proportional_gain):
+        """Determines how aggressively the PID reacts to the current error with setting Proportional Gain"""
+        self.Kp = proportional_gain
+
+    def setKi(self, integral_gain):
+        """Determines how aggressively the PID reacts to the current error with setting Integral Gain"""
+        self.Ki = integral_gain
+
+    def setKd(self, derivative_gain):
+        """Determines how aggressively the PID reacts to the current error with setting Derivative Gain"""
+        self.Kd = derivative_gain
+
+    def setWindup(self, windup):
+        """Integral windup, also known as integrator windup or reset windup,
+        refers to the situation in a PID feedback controller where
+        a large change in setpoint occurs (say a positive change)
+        and the integral terms accumulates a significant error
+        during the rise (windup), thus overshooting and continuing
+        to increase as this accumulated error is unwound
+        (offset by errors in the other direction).
+        The specific problem is the excess overshooting.
+        """
+        self.windup_guard = windup
+
+    def setSampleTime(self, sample_time):
+        """PID that should be updated at a regular interval.
+        Based on a pre-determined sampe time, the PID decides if it should compute or return immediately.
+        """
+        self.sample_time = sample_time