-
Notifications
You must be signed in to change notification settings - Fork 1
/
envs.py
69 lines (58 loc) · 1.89 KB
/
envs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
import matplotlib.pyplot as plt
"""
init()
reset()
render()
step(action) -> obs, reward, done, garbage
"""
class ObservationSpace:
def __init__(self, shape):
self.shape = shape
class ActionSpace:
def __init__(self, n):
self.n = n
class GradEnv:
def __init__(self):
self.reset()
def reset(self):
self.time = 0
self.array = np.fromfunction(lambda r, c: np.clip(1, (r + c) * 2, 255), (50, 50))
self.player = np.array([25, 25])
self.action_space = ActionSpace(4)
self.observation_space = ObservationSpace(self.get_array(reshape=True).shape)
return self.get_array(reshape=True)
def render(self):
# @source https://stackoverflow.com/questions/3823752/display-image-as-grayscale-using-matplotlib
plt.imshow(self.get_array(reshape=False), cmap='gray', vmin=0, vmax=255)
plt.colorbar()
plt.ion()
plt.show()
plt.pause(0.1)
plt.close()
def step(self, action):
self.time += 1
if action == 0:
self.player[0] += 1
elif action == 1:
self.player[0] -= 1
elif action == 2:
self.player[1] += 1
elif action == 3:
self.player[1] -= 1
else:
raise ValueError(f'{action} is not valid')
self.clip_player()
return self.get_array(reshape=True), self.get_reward(), self.get_done(), None
def get_array(self, reshape):
array = np.copy(self.array)
array[tuple(self.player)] = 255
if reshape:
array = array.reshape((50, 50, 1))
return array
def get_reward(self):
return self.array[tuple(self.player)] / 255.0 * 10.0
def get_done(self):
return self.time >= 100
def clip_player(self):
self.player = np.clip(self.player, 0, self.array.shape[0] - 1)