-
Notifications
You must be signed in to change notification settings - Fork 0
/
env.py
55 lines (49 loc) · 2.01 KB
/
env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from unityagents import UnityEnvironment
import numpy as np
class CollectBanana():
def __init__(self, file_name):
self.base = UnityEnvironment(file_name)
# get the default brain
self.brain_name = self.base.brain_names[0]
self.brain = self.base.brains[self.brain_name]
self.action_size = self.brain.vector_action_space_size
self.train_mode = True
self.last_frame = None
self.last2_frame = None
self.last3_frame = None
self.reset(True)
self.state_size = self.state.shape
def get_state(self):
# state size is 1,84,84,3
# Rearrange from NHWC to NCHW
frame = np.transpose(self.env_info.visual_observations[0], (0,3,1,2))[:,:,:,:] #cut the image partially
frame_size = frame.shape # 1,3,84,84
#print(frame_size)
# NCDHW
nframes = 4
self.state = np.zeros((1, frame_size[1], nframes, frame_size[2], frame_size[3]))
#print(self.state.shape)
self.state[0, :, 0, :, :] = frame
if not(self.last_frame is None):
self.state[0, :, 1, :, :] = self.last_frame
if not(self.last2_frame is None):
self.state[0, :, 2, :, :] = self.last2_frame
if not (self.last3_frame is None):
self.state[0, :, 3, :, :] = self.last3_frame
self.last3_frame = self.last2_frame
self.last2_frame = self.last_frame
self.last_frame = frame
def reset(self, train_mode):
self.env_info = self.base.reset(train_mode=train_mode)[self.brain_name]
self.get_state()
return self.state
def render(self):
pass
def step(self, action):
self.env_info = self.base.step(action)[self.brain_name] # send the action to the environment
self.get_state()
reward = self.env_info.rewards[0] # get the reward
done = self.env_info.local_done[0] # see if episode has finished
return self.state, reward, done, None #info is none
def close(self):
self.base.close()