Skip to content

Commit

Permalink
Some simple bug fixes (removing cv2 where unnecessary, commenting qt) (
Browse files Browse the repository at this point in the history
…#87)

* Added plots for validation and test scores

* removed cv2 where unnecessary, commented qt back-end, fixed a typo in import, and placed some comments for things that worked for me

* removed self. from local var
  • Loading branch information
geoffreyvd committed Mar 3, 2021
1 parent eb18de7 commit bdb383c
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 14 deletions.
1 change: 1 addition & 0 deletions deer/agent.py
Expand Up @@ -68,6 +68,7 @@ def __init__(self, environment, learning_algo, replay_memory_size=1000000, repla
self._dataset = DataSet(environment, max_size=replay_memory_size, random_state=random_state, use_priority=self._exp_priority, only_full_history=self._only_full_history)
self._tmp_dataset = None # Will be created by startTesting() when necessary
self._mode = -1
self._totalModeNbrEpisode = 0
self._total_mode_reward = 0
self._training_loss_averages = []
self._Vs_on_last_episode = []
Expand Down
19 changes: 19 additions & 0 deletions deer/experiment/base_controllers.py
Expand Up @@ -11,6 +11,7 @@
import numpy as np
import joblib
import os
import matplotlib.pyplot as plt

class Controller(object):
"""A base controller that does nothing when receiving the various signals emitted by an agent. This class should
Expand Down Expand Up @@ -547,6 +548,24 @@ def onEpochEnd(self, agent):
self._testScores.append(score)
else:
self._trainingEpochCount += 1

#live plotting of reward over time
if mode == self._validationID:
plt.plot(range(1, len(self._validationScores)+1), self._validationScores, label="VS", color='b')
plt.legend()
plt.xlabel("Number of epochs")
plt.ylabel("Score")
plt.savefig("validation_scores.pdf")
plt.close()
# plt.show()
elif mode == self._testID:
plt.plot(range(1, len(self._testScores)+1), self._testScores, label="TS", color='b')
plt.legend()
plt.xlabel("Number of epochs")
plt.ylabel("Score")
plt.savefig("test_scores.pdf")
plt.close()
# plt.show()

def onEnd(self, agent):
if (self._active == False):
Expand Down
4 changes: 4 additions & 0 deletions deer/learning_algos/CRAR_keras.py
Expand Up @@ -12,6 +12,10 @@
#config = tf.ConfigProto()
#config.gpu_options.allow_growth=True
#sess = tf.Session(config=config)

#this did work for me:
#import tensorflow as tf
#tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)
import copy

def mean_squared_error_p(y_true, y_pred):
Expand Down
1 change: 1 addition & 0 deletions examples/ALE/ALE_env_gym.py
Expand Up @@ -37,6 +37,7 @@ def __init__(self, rng, **kwargs):

self._screen=np.average(self.env.render(mode='rgb_array'),axis=-1)
self._reduced_screen = cv2.resize(self._screen, (84, 84), interpolation=cv2.INTER_LINEAR)
#decide whether you want to keep this in repo, if so: add dependency to cv2
#plt.imshow(self._reduced_screen, cmap='gray')
#plt.show()

Expand Down
9 changes: 4 additions & 5 deletions examples/maze/maze_env.py
Expand Up @@ -3,7 +3,6 @@
Author: Vincent Francois-Lavet
"""
import numpy as np
import cv2

from deer.base_classes import Environment

Expand Down Expand Up @@ -106,7 +105,7 @@ def act(self, action):
self._episode_steps += 1
action = self._actions[action]

self.reward = -0.1
reward = -0.1

if(action==0):
if([self._pos_agent[0]+1,self._pos_agent[1]] not in self._pos_walls):
Expand All @@ -122,11 +121,11 @@ def act(self, action):
self._pos_agent[1]=self._pos_agent[1]-1

if (self._pos_agent in self._pos_rewards):
self.reward = 1
reward = 1
self._pos_rewards.remove(self._pos_agent)

self._mode_score += self.reward
return self.reward
self._mode_score += reward
return reward


def summarizePerformance(self, test_data_set, learning_algo, *args, **kwargs):
Expand Down
3 changes: 1 addition & 2 deletions examples/test_CRAR/catcher_env.py
Expand Up @@ -2,12 +2,11 @@
"""
import numpy as np
import cv2

from deer.base_classes import Environment

import matplotlib
matplotlib.use('qt5agg')
# matplotlib.use('qt5agg')
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import matplotlib.pyplot as plt
Expand Down
2 changes: 1 addition & 1 deletion examples/test_CRAR/run_catcher.py
Expand Up @@ -10,7 +10,7 @@

from deer.default_parser import process_args
from deer.agent import NeuralAgent
from deer.learning_algos.CRAR_modif_keras import CRAR
from deer.learning_algos.CRAR_keras import CRAR
from catcher_env import MyEnv as catcher_env
import deer.experiment.base_controllers as bc

Expand Down
2 changes: 1 addition & 1 deletion examples/test_CRAR/run_simple_maze.py
Expand Up @@ -48,7 +48,7 @@ class Defaults:
EPSILON_MIN = 1.0
EPSILON_DECAY = 10000
UPDATE_FREQUENCY = 1
REPLAY_MEMORY_SIZE = 1000000
REPLAY_MEMORY_SIZE = 1000000 #replacing with 200000 will works just fine (in case you dont have 18gb of memory)
BATCH_SIZE = 32
FREEZE_INTERVAL = 1000
DETERMINISTIC = False
Expand Down
9 changes: 4 additions & 5 deletions examples/test_CRAR/simple_maze_env.py
Expand Up @@ -2,13 +2,12 @@
"""
import numpy as np
import cv2

from deer.base_classes import Environment

import matplotlib
#matplotlib.use('agg')
matplotlib.use('qt5agg')
# matplotlib.use('qt5agg')
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -95,10 +94,10 @@ def act(self, action):
self._pos_agent[1]=self._pos_agent[1]+1

# There is no reward in this simple environment
self.reward = 0
reward = 0

self._mode_score += self.reward
return self.reward
self._mode_score += reward
return reward

def summarizePerformance(self, test_data_set, learning_algo, *args, **kwargs):
""" Plot of the low-dimensional representation of the environment built by the model
Expand Down

0 comments on commit bdb383c

Please sign in to comment.