Skip to content

Commit

Permalink
Merge pull request #6 from lolz0r/master
Browse files Browse the repository at this point in the history
Made compatible with python 2.7, added option for ADAM optimizer, add…
  • Loading branch information
atgambardella committed Jun 1, 2017
2 parents 8f76dde + 55e28dd commit dda74f4
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 19 deletions.
3 changes: 3 additions & 0 deletions main.py
Expand Up @@ -18,6 +18,8 @@
help='learning rate decay')
parser.add_argument('--sigma', type=float, default=0.05, metavar='SD',
help='noise standard deviation')
parser.add_argument('--useAdam', action='store_true',
help='bool to determine if to use adam optimizer')
parser.add_argument('--wd', type=float, default=0.996, metavar='WD',
help='amount of weight decay')
parser.add_argument('--n', type=int, default=40, metavar='N',
Expand All @@ -38,6 +40,7 @@
help='Just render the env, no training')



if __name__ == '__main__':
args = parser.parse_args()
assert args.n % 2 == 0
Expand Down
101 changes: 82 additions & 19 deletions train.py
Expand Up @@ -5,13 +5,17 @@
import numpy as np

import torch
import torch.legacy.optim as legacyOptim

import torch.nn.functional as F
import torch.multiprocessing as mp
from torch.autograd import Variable

from envs import create_atari_env
from model import ES

import matplotlib.pyplot as plt


def do_rollouts(args, models, random_seeds, return_queue, env, are_negative):
"""
Expand Down Expand Up @@ -73,6 +77,11 @@ def perturb_model(args, model, random_seed, env):
anti_v += torch.from_numpy(args.sigma*-eps).float()
return [new_model, anti_model]

optimConfig = []
averageReward = []
maxReward = []
minReward = []
episodeCounter = []

def gradient_update(args, synced_model, returns, random_seeds, neg_list,
num_eps, num_frames, chkpt_dir, unperturbed_results):
Expand All @@ -84,12 +93,12 @@ def fitness_shaping(returns):
sorted_returns_backwards = sorted(returns)[::-1]
lamb = len(returns)
shaped_returns = []
denom = sum([max(0, math.log2(lamb/2 + 1) -
math.log2(sorted_returns_backwards.index(r) + 1))
denom = sum([max(0, math.log(lamb/2 + 1, 2) -
math.log(sorted_returns_backwards.index(r) + 1), 2)
for r in returns])
for r in returns:
num = max(0, math.log2(lamb/2 + 1) -
math.log2(sorted_returns_backwards.index(r) + 1))
num = max(0, math.log(lamb/2 + 1, 2) -
math.log(sorted_returns_backwards.index(r) + 1), 2)

This comment has been minimized.

Copy link
@lolz0r

lolz0r Jun 1, 2017

Contributor

the (math.log (..., 2)) is being written as (math.log(...), 2) ... it is a wonder this worked at all! I will have to fix this when I get home!

This comment has been minimized.

Copy link
@lolz0r

lolz0r Jun 1, 2017

Contributor
shaped_returns.append(num/denom + 1/lamb)
return shaped_returns

Expand Down Expand Up @@ -120,26 +129,80 @@ def unperturbed_rank(returns, unperturbed_results):
'Learning rate: %f\n'
'Total num frames seen: %d\n'
'Unperturbed reward: %f\n'
'Unperturbed rank: %s\n\n' %
'Unperturbed rank: %s\n'
'Using Adam: %r\n\n' %
(num_eps, np.mean(returns), np.var(returns), max(returns),
min(returns), batch_size,
args.max_episode_length, args.sigma, args.lr, num_frames,
unperturbed_results, rank_diag))
unperturbed_results, rank_diag, args.useAdam))

averageReward.append(np.mean(returns))
episodeCounter.append(num_eps)
maxReward.append(max(returns))
minReward.append(min(returns))

pltAvg, = plt.plot(episodeCounter, averageReward, label='average')
pltMax, = plt.plot(episodeCounter, maxReward, label='max')
pltMin, = plt.plot(episodeCounter, minReward, label='min')

plt.ylabel('rewards')
plt.xlabel('episode num')
plt.legend(handles=[pltAvg, pltMax,pltMin])

fig1 = plt.gcf()

plt.draw()
fig1.savefig('graph.png', dpi=100)

# For each model, generate the same random numbers as we did
# before, and update parameters. We apply weight decay once.
for i in range(args.n):
np.random.seed(random_seeds[i])
multiplier = -1 if neg_list[i] else 1
reward = shaped_returns[i]
for k, v in synced_model.es_params():
eps = np.random.normal(0, 1, v.size())
v += torch.from_numpy(args.lr/(args.n*args.sigma) *
(reward*multiplier*eps)).float()
for k, v in synced_model.es_params():
v *= args.wd
args.lr *= args.lr_decay
torch.save(synced_model.state_dict(),
os.path.join(chkpt_dir, 'latest.pth'))
if args.useAdam:
globalGrads = None
for i in range(args.n):
np.random.seed(random_seeds[i])
multiplier = -1 if neg_list[i] else 1
reward = shaped_returns[i]

localGrads = []
idx = 0
for k, v in synced_model.es_params():
eps = np.random.normal(0, 1, v.size())
grad = torch.from_numpy((args.n*args.sigma) * (reward*multiplier*eps)).float()

localGrads.append(grad)

if len(optimConfig) == idx:
optimConfig.append({ 'learningRate' : args.lr })
idx = idx + 1

if globalGrads == None:
globalGrads = localGrads
else:
for i in range(len(globalGrads)):
globalGrads[i] = torch.add(globalGrads[i], localGrads[i])

idx = 0
for k, v in synced_model.es_params():
r, _ = legacyOptim.adam( lambda x: (1, -globalGrads[idx]), v , optimConfig[idx])
v.copy_(r)
idx = idx + 1
else:
# For each model, generate the same random numbers as we did
# before, and update parameters. We apply weight decay once.
for i in range(args.n):
np.random.seed(random_seeds[i])
multiplier = -1 if neg_list[i] else 1
reward = shaped_returns[i]
for k, v in synced_model.es_params():
eps = np.random.normal(0, 1, v.size())
v += torch.from_numpy(args.lr/(args.n*args.sigma) *
(reward*multiplier*eps)).float()
for k, v in synced_model.es_params():
v *= args.wd
args.lr *= args.lr_decay

#torch.save(synced_model.state_dict(),
# os.path.join(chkpt_dir, 'latest.pth'))
return synced_model


Expand Down

0 comments on commit dda74f4

Please sign in to comment.