Some simple bug fixes (removing cv2 where unnecessary, commenting qt) (…

…#87) * Added plots for validation and test scores * removed cv2 where unnecessary, commented qt back-end, fixed a typo in import, and placed some comments for things that worked for me * removed self. from local var
VinF · Mar 3, 2021 · bdb383c · bdb383c
1 parent eb18de7
commit bdb383c
Show file tree

Hide file tree

Showing 9 changed files with 36 additions and 14 deletions.
diff --git a/deer/agent.py b/deer/agent.py
@@ -68,6 +68,7 @@ def __init__(self, environment, learning_algo, replay_memory_size=1000000, repla
         self._dataset = DataSet(environment, max_size=replay_memory_size, random_state=random_state, use_priority=self._exp_priority, only_full_history=self._only_full_history)
         self._tmp_dataset = None # Will be created by startTesting() when necessary
         self._mode = -1
+        self._totalModeNbrEpisode = 0
         self._total_mode_reward = 0
         self._training_loss_averages = []
         self._Vs_on_last_episode = []

diff --git a/deer/experiment/base_controllers.py b/deer/experiment/base_controllers.py
@@ -11,6 +11,7 @@
 import numpy as np
 import joblib
 import os
+import matplotlib.pyplot as plt
 
 class Controller(object):
     """A base controller that does nothing when receiving the various signals emitted by an agent. This class should 
@@ -547,6 +548,24 @@ def onEpochEnd(self, agent):
             self._testScores.append(score)
         else:
             self._trainingEpochCount += 1
+
+        #live plotting of reward over time
+        if mode == self._validationID:
+            plt.plot(range(1, len(self._validationScores)+1), self._validationScores, label="VS", color='b')
+            plt.legend()
+            plt.xlabel("Number of epochs")
+            plt.ylabel("Score")
+            plt.savefig("validation_scores.pdf")
+            plt.close()
+            # plt.show()
+        elif mode == self._testID:
+            plt.plot(range(1, len(self._testScores)+1), self._testScores, label="TS", color='b')
+            plt.legend()
+            plt.xlabel("Number of epochs")
+            plt.ylabel("Score")
+            plt.savefig("test_scores.pdf")
+            plt.close()
+            # plt.show()
 
     def onEnd(self, agent):
         if (self._active == False):

diff --git a/deer/learning_algos/CRAR_keras.py b/deer/learning_algos/CRAR_keras.py
@@ -12,6 +12,10 @@
 #config = tf.ConfigProto()
 #config.gpu_options.allow_growth=True
 #sess = tf.Session(config=config)
+
+#this did work for me:
+#import tensorflow as tf
+#tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)
 import copy
 
 def mean_squared_error_p(y_true, y_pred):

diff --git a/examples/ALE/ALE_env_gym.py b/examples/ALE/ALE_env_gym.py
@@ -37,6 +37,7 @@ def __init__(self, rng, **kwargs):
 
         self._screen=np.average(self.env.render(mode='rgb_array'),axis=-1)
         self._reduced_screen = cv2.resize(self._screen, (84, 84), interpolation=cv2.INTER_LINEAR) 
+            #decide whether you want to keep this in repo, if so: add dependency to cv2
         #plt.imshow(self._reduced_screen, cmap='gray')
         #plt.show()
 

diff --git a/examples/maze/maze_env.py b/examples/maze/maze_env.py
@@ -3,7 +3,6 @@
 Author: Vincent Francois-Lavet
 """
 import numpy as np
-import cv2
 
 from deer.base_classes import Environment
 
@@ -106,7 +105,7 @@ def act(self, action):
         self._episode_steps += 1
         action = self._actions[action]
 
-        self.reward = -0.1
+        reward = -0.1
 
         if(action==0):
             if([self._pos_agent[0]+1,self._pos_agent[1]] not in self._pos_walls):
@@ -122,11 +121,11 @@ def act(self, action):
                 self._pos_agent[1]=self._pos_agent[1]-1
 
         if (self._pos_agent in self._pos_rewards):
-            self.reward = 1
+            reward = 1
             self._pos_rewards.remove(self._pos_agent)
 
-        self._mode_score += self.reward
-        return self.reward
+        self._mode_score += reward
+        return reward
 
 
     def summarizePerformance(self, test_data_set, learning_algo, *args, **kwargs):

diff --git a/examples/test_CRAR/catcher_env.py b/examples/test_CRAR/catcher_env.py
@@ -2,12 +2,11 @@
 
 """
 import numpy as np
-import cv2
 
 from deer.base_classes import Environment
 
 import matplotlib
-matplotlib.use('qt5agg')
+# matplotlib.use('qt5agg')
 from mpl_toolkits.axes_grid1 import host_subplot
 import mpl_toolkits.axisartist as AA
 import matplotlib.pyplot as plt

diff --git a/examples/test_CRAR/run_catcher.py b/examples/test_CRAR/run_catcher.py
@@ -10,7 +10,7 @@
 
 from deer.default_parser import process_args
 from deer.agent import NeuralAgent
-from deer.learning_algos.CRAR_modif_keras import CRAR
+from deer.learning_algos.CRAR_keras import CRAR
 from catcher_env import MyEnv as catcher_env
 import deer.experiment.base_controllers as bc
 

diff --git a/examples/test_CRAR/run_simple_maze.py b/examples/test_CRAR/run_simple_maze.py
@@ -48,7 +48,7 @@ class Defaults:
     EPSILON_MIN = 1.0
     EPSILON_DECAY = 10000
     UPDATE_FREQUENCY = 1
-    REPLAY_MEMORY_SIZE = 1000000
+    REPLAY_MEMORY_SIZE = 1000000 #replacing with 200000 will works just fine (in case you dont have 18gb of memory)
     BATCH_SIZE = 32
     FREEZE_INTERVAL = 1000
     DETERMINISTIC = False

diff --git a/examples/test_CRAR/simple_maze_env.py b/examples/test_CRAR/simple_maze_env.py
@@ -2,13 +2,12 @@
 
 """
 import numpy as np
-import cv2
 
 from deer.base_classes import Environment
 
 import matplotlib
 #matplotlib.use('agg')
-matplotlib.use('qt5agg')
+# matplotlib.use('qt5agg')
 from mpl_toolkits.axes_grid1 import host_subplot
 import mpl_toolkits.axisartist as AA
 import matplotlib.pyplot as plt
@@ -95,10 +94,10 @@ def act(self, action):
                 self._pos_agent[1]=self._pos_agent[1]+1
 
         # There is no reward in this simple environment
-        self.reward = 0
+        reward = 0
 
-        self._mode_score += self.reward
-        return self.reward
+        self._mode_score += reward
+        return reward
 
     def summarizePerformance(self, test_data_set, learning_algo, *args, **kwargs):
         """ Plot of the low-dimensional representation of the environment built by the model