Skip to content

Commit

Permalink
Made compatible with python 2.7, added option for ADAM optimizer, add…
Browse files Browse the repository at this point in the history
…ed enhanced progress plotting via matplotlib
  • Loading branch information
kinke committed Jun 1, 2017
1 parent 8f76dde commit eb2a1e5
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 19 deletions.
3 changes: 3 additions & 0 deletions main.py
Expand Up @@ -18,6 +18,8 @@
help='learning rate decay')
parser.add_argument('--sigma', type=float, default=0.05, metavar='SD',
help='noise standard deviation')
parser.add_argument('--useAdam', action='store_true',
help='bool to determine if to use adam optimizer')
parser.add_argument('--wd', type=float, default=0.996, metavar='WD',
help='amount of weight decay')
parser.add_argument('--n', type=int, default=40, metavar='N',
Expand All @@ -38,6 +40,7 @@
help='Just render the env, no training')



if __name__ == '__main__':
args = parser.parse_args()
assert args.n % 2 == 0
Expand Down
101 changes: 82 additions & 19 deletions train.py
Expand Up @@ -5,13 +5,17 @@
import numpy as np

import torch
import torch.legacy.optim as legacyOptim

import torch.nn.functional as F
import torch.multiprocessing as mp
from torch.autograd import Variable

from envs import create_atari_env
from model import ES

import matplotlib.pyplot as plt


def do_rollouts(args, models, random_seeds, return_queue, env, are_negative):
"""
Expand Down Expand Up @@ -73,6 +77,11 @@ def perturb_model(args, model, random_seed, env):
anti_v += torch.from_numpy(args.sigma*-eps).float()
return [new_model, anti_model]

optimConfig = []
averageReward = []
maxReward = []
minReward = []
episodeCounter = []

def gradient_update(args, synced_model, returns, random_seeds, neg_list,
num_eps, num_frames, chkpt_dir, unperturbed_results):
Expand All @@ -84,12 +93,12 @@ def fitness_shaping(returns):
sorted_returns_backwards = sorted(returns)[::-1]
lamb = len(returns)
shaped_returns = []
denom = sum([max(0, math.log2(lamb/2 + 1) -
math.log2(sorted_returns_backwards.index(r) + 1))
denom = sum([max(0, math.log(lamb/2 + 1, 2) -
math.log(sorted_returns_backwards.index(r) + 1), 2)
for r in returns])
for r in returns:
num = max(0, math.log2(lamb/2 + 1) -
math.log2(sorted_returns_backwards.index(r) + 1))
num = max(0, math.log(lamb/2 + 1, 2) -
math.log(sorted_returns_backwards.index(r) + 1), 2)
shaped_returns.append(num/denom + 1/lamb)
return shaped_returns

Expand Down Expand Up @@ -120,26 +129,80 @@ def unperturbed_rank(returns, unperturbed_results):
'Learning rate: %f\n'
'Total num frames seen: %d\n'
'Unperturbed reward: %f\n'
'Unperturbed rank: %s\n\n' %
'Unperturbed rank: %s\n'
'Using Adam: %r\n\n' %
(num_eps, np.mean(returns), np.var(returns), max(returns),
min(returns), batch_size,
args.max_episode_length, args.sigma, args.lr, num_frames,
unperturbed_results, rank_diag))
unperturbed_results, rank_diag, args.useAdam))

averageReward.append(np.mean(returns))
episodeCounter.append(num_eps)
maxReward.append(max(returns))
minReward.append(min(returns))

pltAvg, = plt.plot(episodeCounter, averageReward, label='average')
pltMax, = plt.plot(episodeCounter, maxReward, label='max')
pltMin, = plt.plot(episodeCounter, minReward, label='min')

plt.ylabel('rewards')
plt.xlabel('episode num')
plt.legend(handles=[pltAvg, pltMax,pltMin])

fig1 = plt.gcf()

plt.draw()
fig1.savefig('graph.png', dpi=100)

# For each model, generate the same random numbers as we did
# before, and update parameters. We apply weight decay once.
for i in range(args.n):
np.random.seed(random_seeds[i])
multiplier = -1 if neg_list[i] else 1
reward = shaped_returns[i]
for k, v in synced_model.es_params():
eps = np.random.normal(0, 1, v.size())
v += torch.from_numpy(args.lr/(args.n*args.sigma) *
(reward*multiplier*eps)).float()
for k, v in synced_model.es_params():
v *= args.wd
args.lr *= args.lr_decay
torch.save(synced_model.state_dict(),
os.path.join(chkpt_dir, 'latest.pth'))
if args.useAdam:
globalGrads = None
for i in range(args.n):
np.random.seed(random_seeds[i])
multiplier = -1 if neg_list[i] else 1
reward = shaped_returns[i]

localGrads = []
idx = 0
for k, v in synced_model.es_params():
eps = np.random.normal(0, 1, v.size())
grad = torch.from_numpy((args.n*args.sigma) * (reward*multiplier*eps)).float()

localGrads.append(grad)

if len(optimConfig) == idx:
optimConfig.append({ 'learningRate' : args.lr })
idx = idx + 1

if globalGrads == None:
globalGrads = localGrads
else:
for i in range(len(globalGrads)):
globalGrads[i] = torch.add(globalGrads[i], localGrads[i])

idx = 0
for k, v in synced_model.es_params():
r, _ = legacyOptim.adam( lambda x: (1, -globalGrads[idx]), v , optimConfig[idx])
v.copy_(r)
idx = idx + 1
else:
# For each model, generate the same random numbers as we did
# before, and update parameters. We apply weight decay once.
for i in range(args.n):
np.random.seed(random_seeds[i])
multiplier = -1 if neg_list[i] else 1
reward = shaped_returns[i]
for k, v in synced_model.es_params():
eps = np.random.normal(0, 1, v.size())
v += torch.from_numpy(args.lr/(args.n*args.sigma) *
(reward*multiplier*eps)).float()
for k, v in synced_model.es_params():
v *= args.wd
args.lr *= args.lr_decay

#torch.save(synced_model.state_dict(),
# os.path.join(chkpt_dir, 'latest.pth'))
return synced_model


Expand Down

0 comments on commit eb2a1e5

Please sign in to comment.