Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Remove reward on first timestep
  • Loading branch information
cswinter committed Oct 19, 2019
1 parent b83a1ff commit 25909ee
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 23 deletions.
17 changes: 12 additions & 5 deletions gym_codecraft/envs/codecraft_vec_env.py
Expand Up @@ -82,7 +82,7 @@ def __init__(self, num_envs, num_self_play, objective, action_delay, stagger=Tru
self.stagger = stagger
self.fair = fair
self.game_length = 3 * 60 * 60
self.custom_map = lambda: None
self.custom_map = lambda _: None
self.last_map = None
self.randomize = randomize
if objective == Objective.ARENA_TINY:
Expand Down Expand Up @@ -141,12 +141,12 @@ def reset(self):
self.games.append((game_id, 0))
self.eplen.append(1)
self.eprew.append(0)
self.score.append(0)
self.score.append(None)
if self_play:
self.games.append((game_id, 1))
self.eplen.append(1)
self.eprew.append(0)
self.score.append(0)
self.score.append(None)
return self.observe()[0]

def step_async(self, actions):
Expand Down Expand Up @@ -212,6 +212,8 @@ def observe(self):
else:
raise Exception(f"Unknown objective {self.objective}")

if self.score[i] is None:
self.score[i] = score
reward = score - self.score[i]
self.score[i] = score
self.eprew[i] += reward
Expand All @@ -232,10 +234,15 @@ def observe(self):
# obs[stride * i:stride * (i + 1)] = codecraft.observation_to_np(observation)

dones.append(1.0)
infos.append({'episode': {'r': self.eprew[i], 'l': self.eplen[i], 'index': i}})
infos.append({'episode': {
'r': self.eprew[i],
'l': self.eplen[i],
'index': i,
'score': self.score[i],
}})
self.eplen[i] = 1
self.eprew[i] = 0
self.score[i] = 0
self.score[i] = None
else:
self.eplen[i] += 1
dones.append(0.0)
Expand Down
36 changes: 18 additions & 18 deletions main.py
Expand Up @@ -264,9 +264,9 @@ def eval(policy, hps, device, total_steps):
stagger=False,
fair=True)

returns = []
returnsr = []
returns1m = []
scores = []
scores_r = []
scores_1m = []
lengths = []
obs = env.reset()
evens = list([2 * i for i in range(hps.eval_envs // 2)])
Expand Down Expand Up @@ -294,36 +294,36 @@ def eval(policy, hps, device, total_steps):
for info in infos:
index = info['episode']['index']
if index in policy_envs:
ret = info['episode']['r']
score = info['episode']['score']
length = info['episode']['l']
returns.append(ret)
scores.append(score)
lengths.append(length)
if index % 2 == 0:
if index + 1 in opp_random_envs:
returnsr.append(ret)
scores_r.append(score)
else:
returns1m.append(ret)
scores_1m.append(score)
else:
if index - 1 in opp_random_envs:
returnsr.append(ret)
scores_r.append(score)
else:
returns1m.append(ret)
scores_1m.append(score)

env.close()

returns = np.array(returns)
returnsr = np.array(returnsr)
returns1m = np.array(returns1m)
scores = np.array(scores)
scores_r = np.array(scores_r)
scores_1m = np.array(scores_1m)

wandb.log({
'eval_mean_ret': returns.mean(),
'eval_max_ret': returns.max(),
'eval_min_ret': returns.min(),
'eval_mean_ret_vs_random': returnsr.mean(),
'eval_mean_ret_vs_1M': returns1m.mean()
'eval_mean_score': scores.mean(),
'eval_max_score': scores.max(),
'eval_min_score': scores.min(),
'eval_mean_score_vs_random': scores_r.mean(),
'eval_mean_score_vs_1M': scores_1m.mean()
}, step=total_steps)

print(f'Eval: {returns.mean()}')
print(f'Eval: {scores.mean()}')


def load_policy(name):
Expand Down

0 comments on commit 25909ee

Please sign in to comment.