-
Notifications
You must be signed in to change notification settings - Fork 1
/
blocksworld2-batch-boltzmann.py
executable file
·235 lines (193 loc) · 13.4 KB
/
blocksworld2-batch-boltzmann.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse, glob, os, random, shutil, subprocess, sys, thread, time
import pp
g_dir = 'experiment-bw2-boltzmann'
g_plotters = ['./blocksworld2.py']
g_error = False
g_base_command = "./blocks_world_2 --output experiment --discount-rate 0.9 --eligibility-trace-decay-rate 0.3 --learning-rate 0.03 --secondary-learning-rate 0.01 --policy on-policy --num-blocks-min 3 --num-blocks-max 5 --exploration boltzmann --inverse-temperature 50 --inverse-temperature-episodic-increment 5"
g_ep_tuples = []
g_ep_tuples.append(("lfa", " --rules rules/blocks-world-2-nonrrl.carli"))
g_ep_tuples.append(("value-none", "--split-test value --unsplit-test none --split-update-count 20 --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("catde-none", "--split-test catde --unsplit-test none --split-update-count 20 --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("policy-none", "--split-test policy --unsplit-test none --split-update-count 20 --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("value-value-none", "--split-test value --unsplit-test value --split-update-count 20 --unsplit-update-count 30 --resplit-bias none --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("catde-catde-none", "--split-test catde --unsplit-test catde --split-update-count 20 --unsplit-update-count 30 --resplit-bias none --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("policy-policy-none", "--split-test policy --unsplit-test policy --split-update-count 20 --unsplit-update-count 30 --resplit-bias none --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("value-value-bkls", "--split-test value --unsplit-test value --split-update-count 20 --unsplit-update-count 30 --resplit-bias blacklist --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("catde-catde-bkls", "--split-test catde --unsplit-test catde --split-update-count 20 --unsplit-update-count 30 --resplit-bias blacklist --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("policy-policy-bkls", "--split-test policy --unsplit-test policy --split-update-count 20 --unsplit-update-count 30 --resplit-bias blacklist --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("value-value-bst", "--split-test value --unsplit-test value --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("catde-catde-bst", "--split-test catde --unsplit-test catde --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("policy-policy-bst", "--split-test policy --unsplit-test policy --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("value-value-c50", "--split-test value --unsplit-test value --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --concrete-update-count 50 --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("catde-catde-c50", "--split-test catde --unsplit-test catde --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --concrete-update-count 50 --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("policy-policy-c50", "--split-test policy --unsplit-test policy --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --concrete-update-count 50 --rules rules/blocks-world-2.carli"))
g_ep_tuples.append(("lfa-d", " --rules rules/blocks-world-2-distractors-but-not-defective-nonrrl.carli"))
g_ep_tuples.append(("value-none-d", "--split-test value --unsplit-test none --split-update-count 20 --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("catde-none-d", "--split-test catde --unsplit-test none --split-update-count 20 --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("policy-none-d", "--split-test policy --unsplit-test none --split-update-count 20 --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("value-value-none-d", "--split-test value --unsplit-test value --split-update-count 20 --unsplit-update-count 30 --resplit-bias none --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("catde-catde-none-d", "--split-test catde --unsplit-test catde --split-update-count 20 --unsplit-update-count 30 --resplit-bias none --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("policy-policy-none-d", "--split-test policy --unsplit-test policy --split-update-count 20 --unsplit-update-count 30 --resplit-bias none --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("value-value-bkls-d", "--split-test value --unsplit-test value --split-update-count 20 --unsplit-update-count 30 --resplit-bias blacklist --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("catde-catde-bkls-d", "--split-test catde --unsplit-test catde --split-update-count 20 --unsplit-update-count 30 --resplit-bias blacklist --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("policy-policy-bkls-d", "--split-test policy --unsplit-test policy --split-update-count 20 --unsplit-update-count 30 --resplit-bias blacklist --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("value-value-bst-d", "--split-test value --unsplit-test value --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("catde-catde-bst-d", "--split-test catde --unsplit-test catde --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("policy-policy-bst-d", "--split-test policy --unsplit-test policy --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("value-value-c50-d", "--split-test value --unsplit-test value --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --concrete-update-count 50 --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("catde-catde-c50-d", "--split-test catde --unsplit-test catde --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --concrete-update-count 50 --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
g_ep_tuples.append(("policy-policy-c50-d", "--split-test policy --unsplit-test policy --split-update-count 20 --unsplit-update-count 30 --resplit-bias boost --concrete-update-count 50 --rules rules/blocks-world-2-distractors-but-not-defective.carli"))
parser = argparse.ArgumentParser(description='Run Blocks World 2 experiments.')
parser.add_argument('-j', '--jobs', metavar='N', type=int,
action='store',
help='number of experiments to run in parallel')
parser.add_argument('-r', '--runs', metavar='N', type=int,
action='store', default=1,
help='number of runs per experiment')
parser.add_argument('-n', '--num-steps', metavar='N', type=int,
action='store', default=50000,
help='number of steps per run')
args = parser.parse_args()
if args.jobs is None:
args.jobs = 'autodetect'
if not os.path.isdir(g_dir):
os.mkdir(g_dir)
seeds = []
seeds_file = g_dir + '/seeds'
if os.path.isfile(seeds_file):
f = open(seeds_file, 'r')
for seed in f:
seeds.append(int(seed))
f.close()
if len(seeds) != args.runs:
if len(seeds) > 0:
raise Exception('Number of seeds differs from number of runs.')
else:
seeds = []
for i in range(0, args.runs):
seeds.append(random.randint(0,65535))
f = open(seeds_file, 'w')
for seed in seeds:
f.write(str(seed) + '\n')
f.close()
print str(seeds) + '\n'
class Experiment:
def __init__(self, num_steps, seed, stderr, stdout, rules, experiment, vfm):
self.num_steps = num_steps
self.seed = seed
self.stderr = stderr
self.stdout = stdout
self.rules = rules
self.experiment = experiment
self.vfm = vfm
self.errorcode = 0
def get_args(self):
args = self.experiment.split()
args.extend(['--num-steps', str(self.num_steps),
'--seed', str(self.seed),
'--stderr', self.stderr,
'--stdout', self.stdout,
'--rules-out', self.rules])
if self.vfm:
args.extend(['--value-function-map-filename', self.vfm])
return args
def print_args(self):
args = self.get_args()
cmd = args[0]
for arg in args[1:]:
cmd += ' ' + arg
return cmd
def run(self):
args = self.get_args()
#print self.print_args()
try:
subprocess.check_call(args, stdin=None, stdout=None, stderr=None)
except subprocess.CalledProcessError, e:
g_error = True
print "Called Process Error:", e.cmd, "=", e.returncode, ":", e.output
self.errorcode = e.returncode
return self
def resolution(split):
depth = (split - 1) / 2
size = str(pow(2, depth))
return size + 'x' + size
g_dirs = []
g_experiments = []
for ep_tuple in g_ep_tuples:
dir = g_dir + '/' + ep_tuple[0]
if not os.path.isdir(dir):
os.mkdir(dir)
g_dirs.append(dir)
for seed in seeds:
stderr = dir + '/blocksworld-' + str(seed) + '.err'
stdout = dir + '/blocksworld-' + str(seed) + '.out'
rules = dir + '/blocksworld-' + str(seed) + '.carli'
vfm = dir + '/blocksworld-' + str(seed) + '.vfm'
experiment = Experiment(args.num_steps, seed, stderr, stdout, rules, g_base_command + ' ' + ep_tuple[1], None)
g_experiments.append(experiment)
print experiment.print_args()
class Progress:
def __init__(self, experiments):
self.lock = thread.allocate_lock()
self.count = {}
self.finished = {}
for experiment in experiments:
try:
self.count[experiment.experiment] += 1
#print "Updated count of " + experiment.experiment + " = " + str(self.count[experiment.experiment])
except KeyError:
self.count[experiment.experiment] = 1
#print "New count of " + experiment.experiment + " = " + str(self.count[experiment.experiment])
self.finished[experiment.experiment] = 0
def just_finished(self, experiment):
global g_error
self.lock.acquire()
if experiment.errorcode:
g_error = True
else:
self.finished[experiment.experiment] += 1
self.lock.release()
def just_finished_plot(self, args):
self.lock.acquire()
self.finished['plots'] += 1
self.lock.release()
def all_finished(self, experiment):
self.lock.acquire()
num = self.count[experiment]
fin = self.finished[experiment]
#print 'Finished ' + str(fin) + ' of ' + str(num)
self.lock.release()
return fin >= num
class Plots:
def __init__(self):
self.experiment = 'plots'
plots = []
for i in range(len(g_ep_tuples) * len(g_plotters)):
plots.append(Plots())
def syscall(args):
subprocess.check_call(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
def take_fives(group):
while not g_error:
job_server.print_stats()
#print "Group = " + group
if progress.all_finished(group):
break
else:
time.sleep(5)
if not g_error:
job_server.wait(group)
job_server = pp.Server(args.jobs)
progress = Progress(g_experiments + plots)
start_time = time.time()
#for experiment in g_experiments:
#experiment.run()
jobs = [(job_server.submit(Experiment.run, (experiment,), (), ('subprocess', 'thread'), callback=progress.just_finished, group=experiment.experiment)) for experiment in g_experiments]
for ep_tuple, dir in zip(g_ep_tuples, g_dirs):
take_fives(g_base_command + ' ' + ep_tuple[1])
for plotter in g_plotters:
args = [plotter] + glob.glob(dir + '/*.out')
jobs.append(job_server.submit(syscall, (args,), (), ('subprocess', 'thread',), callback=progress.just_finished_plot, group='plots'))
take_fives('plots')
print 'Total time elapsed: ', time.time() - start_time, 'seconds'