-
Notifications
You must be signed in to change notification settings - Fork 1
/
repl.py
253 lines (220 loc) · 10.7 KB
/
repl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/usr/bin/env python
r'''
python repl.py --response runs/response/model.pkl \
--selection runs/selection/model.pkl \
'''
from stanza.research import config
config.redirect_output() # NOQA: imports not at top of file
import random
import pickle
from stanza.research import output
from stanza.monitoring import progress
import agents
from vectorizers import all_possible_subcounts
import thutils
parser = config.get_options_parser()
parser.add_argument('--agent_a', default='HumanAgent', choices=agents.AGENTS,
help='Class name for agent A in dialogue simulation.')
parser.add_argument('--agent_b', default='TwoModelAgent', choices=agents.AGENTS,
help='Class name for agent B in dialogue simulation.')
parser.add_argument('--device_a', default=[], nargs='*',
help='The devices to use in PyTorch for model A ("cpu" or "gpu[0-n]"). If None, '
'pick a free-ish device automatically. Should be the same number and '
'appear in the same order as in --load_a.')
parser.add_argument('--device_b', default=[], nargs='*',
help='The devices to use in PyTorch for model B ("cpu" or "gpu[0-n]"). If None, '
'pick a free-ish device automatically. Should be the same number and '
'appear in the same order as in --load_b.')
parser.add_argument('--load_a', metavar='MODEL_FILE', default=[], nargs='*',
help='Model pickle file to load for agent A (two files as different arguments ='
' response.pkl selection.pkl in the case of TwoModelAgent).')
parser.add_argument('--load_b', metavar='MODEL_FILE', default=[], nargs='*',
help='Model pickle file to load for agent B (two files as different arguments ='
' response.pkl selection.pkl in the case of TwoModelAgent).')
parser.add_argument('--goal_directed_a', type=config.boolean, default=False,
help='If `True`, use goal-directed decoding (maximum reward over rollouts) '
'in model A.')
parser.add_argument('--goal_directed_b', type=config.boolean, default=False,
help='If `True`, use goal-directed decoding (maximum reward over rollouts) '
'in model B.')
parser.add_argument('--both_sides_a', type=config.boolean, default=False,
help='If `True`, the response model for A should simulate both sides of the '
'dialogue goal-directed decoding (requires --goal_directed_a).')
parser.add_argument('--both_sides_b', type=config.boolean, default=False,
help='If `True`, the response model for B should simulate both sides of the '
'dialogue goal-directed decoding (requires --goal_directed_b).')
parser.add_argument('--contexts', metavar='CONTEXT_FILE', default='data/selfplay.txt',
help='Text file giving game contexts, each consisting of two lines in the '
'format "na va nb vb nc vc".')
parser.add_argument('--verbosity', type=int, default=0,
help='Amount of debugging output to print: >=5 = show all model outputs, '
"<5 = don't.")
def repl():
options = config.options()
models_a = []
models_b = []
assert len(options.load_a) == len(options.device_a), (options.load_a, options.device_a)
assert len(options.load_b) == len(options.device_b), (options.load_b, options.device_b)
assert (options.goal_directed_a or not options.both_sides_a), \
'--both_sides_a requires --goal_directed_a'
assert (options.goal_directed_b or not options.both_sides_b), \
'--both_sides_b requires --goal_directed_b'
for a_filename, device in zip(options.load_a, options.device_a):
if options.verbosity >= 2:
print(f'Loading {a_filename}...')
with open(a_filename, 'rb') as infile:
with thutils.device_context(device):
models_a.append(pickle.load(infile))
assert norm_device(models_a[-1].options.device) == norm_device(device), \
(a_filename, models_a[-1].options.device)
for b_filename, device in zip(options.load_b, options.device_b):
if options.verbosity >= 2:
print(f'Loading {b_filename}...')
with open(b_filename, 'rb') as infile:
with thutils.device_context(device):
models_b.append(pickle.load(infile))
assert norm_device(models_b[-1].options.device) == norm_device(device), \
(b_filename, models_b[-1].options.device)
agent_a = agents.AGENTS[options.agent_a](options, models_a)
agent_b = agents.AGENTS[options.agent_b](options, models_b)
agent_a.start()
agent_b.start()
games = []
outcomes_a = []
dialogues = []
deals_a = []
generate_games(options.contexts)
try:
if options.verbosity >= 1:
progress.start_task('Game', len(GAMES))
for i, game in enumerate(generate_games(options.contexts)):
if options.verbosity >= 1:
progress.progress(i)
if options.verbosity >= 3:
print('[Game {}]'.format(i))
games.append(game)
agent_a.new_game(game)
agent_b.new_game([game[0], game[2], game[1]])
dialogue = []
proposal_a = None
a_goes = (random.randint(0, 1) == 1)
while True:
current_agent = agent_a if a_goes else agent_b
other_agent = agent_b if a_goes else agent_a
goal_dir = options.goal_directed_a if a_goes else options.goal_directed_b
both_sides = options.both_sides_a if a_goes else options.both_sides_b
response = current_agent.act(goal_directed=goal_dir, both_sides=both_sides)
current_agent.commit(response)
other_agent.observe(response)
if isinstance(response, list):
response = response if a_goes else agents.invert_proposal(response, game)
if proposal_a is None:
proposal_a = response
else:
break
else:
dialogue.append(('A: ' if a_goes else 'B: ') + response)
a_goes = not a_goes
outcome_a, outcome_b = agents.compute_outcome(game, proposal_a, response)
agent_a.outcome(outcome_a)
agent_b.outcome(outcome_b)
outcomes_a.append(outcome_a)
dialogues.append(dialogue)
if outcome_a[0] == agents.AGREE:
deals_a.append(proposal_a)
else:
deals_a.append(None)
except KeyboardInterrupt:
pass
finally:
if options.verbosity >= 1:
progress.end_task()
analyze_games(games, outcomes_a, dialogues, deals_a)
GAMES = []
def generate_games(context_filename):
global GAMES
if not GAMES:
with open(context_filename, 'r') as infile:
lines = [[int(e) for e in line.split()]
for line in infile
if line.strip()]
for i in range(0, len(lines), 2):
pair = lines[i:i+2]
if len(pair) != 2:
continue
a, b = pair
assert [a[0], a[2], a[4]] == [b[0], b[2], b[4]], (a, b)
GAMES.append([[a[0], a[2], a[4]],
[a[1], a[3], a[5]],
[b[1], b[3], b[5]]])
random.shuffle(GAMES)
return iter(GAMES)
def analyze_games(games, outcomes, dialogues, deals_a):
import numpy as np
results = {}
results['sim.num_games'] = len(games)
agreement = [int(outcome[0] == agents.AGREE) for outcome in outcomes]
results['sim.agreement.mean'] = float(np.mean(agreement))
results['sim.agreement.std'] = float(np.std(agreement))
results['sim.agreement.sum'] = float(np.sum(agreement))
rewards_a = [outcome[1] for outcome in outcomes]
results['sim.rewards_a.mean'] = float(np.mean(rewards_a))
results['sim.rewards_a.std'] = float(np.std(rewards_a))
results['sim.rewards_a.sum'] = float(np.sum(rewards_a))
rewards_a_agree = [outcome[1] for outcome in outcomes if outcome[0] == agents.AGREE]
results['sim.rewards_a_agree.mean'] = float(np.mean(rewards_a_agree))
results['sim.rewards_a_agree.std'] = float(np.std(rewards_a_agree))
results['sim.rewards_a_agree.sum'] = float(np.sum(rewards_a_agree))
rewards_b = [outcome[2] for outcome in outcomes]
results['sim.rewards_b.mean'] = float(np.mean(rewards_b))
results['sim.rewards_b.std'] = float(np.std(rewards_b))
results['sim.rewards_b.sum'] = float(np.sum(rewards_b))
rewards_b_agree = [outcome[2] for outcome in outcomes if outcome[0] == agents.AGREE]
results['sim.rewards_b_agree.mean'] = float(np.mean(rewards_b_agree))
results['sim.rewards_b_agree.std'] = float(np.std(rewards_b_agree))
results['sim.rewards_b_agree.sum'] = float(np.sum(rewards_b_agree))
pareto_optimal = [int(is_pareto_optimal(game, deal_a))
for game, deal_a, outcome_a in zip(games, deals_a, outcomes)
if outcome_a[0] == agents.AGREE]
results['sim.pareto_optimal.mean'] = float(np.mean(pareto_optimal))
results['sim.pareto_optimal.std'] = float(np.std(pareto_optimal))
results['sim.pareto_optimal.sum'] = float(np.sum(pareto_optimal))
output.output_results(results, 'sim')
config.dump_pretty(results, 'results.sim.json')
def is_pareto_optimal(game, deal_a):
'''
>>> is_pareto_optimal([[2, 2, 2], [3, 2, 0], [0, 3, 2]], [2, 2, 0])
True
>>> is_pareto_optimal([[2, 2, 2], [3, 2, 0], [0, 3, 2]], [0, 2, 0])
False
>>> is_pareto_optimal([[4, 1, 1], [1, 0, 6], [0, 6, 4]], [4, 0, 1])
True
>>> is_pareto_optimal([[4, 1, 1], [1, 0, 6], [0, 6, 4]], [4, 0, 0])
True
>>> is_pareto_optimal([[4, 1, 1], [1, 0, 6], [0, 6, 4]], [1, 0, 1])
False
'''
counts, values_a, values_b = game
_, reward_a, reward_b = agents.compute_outcome(game, deal_a, deal_a)[0]
for alt_deal in all_possible_subcounts(game[0]):
_, alt_reward_a, alt_reward_b = agents.compute_outcome(game, alt_deal, alt_deal)[0]
if alt_reward_a > reward_a and alt_reward_b >= reward_b:
return False
if alt_reward_a >= reward_a and alt_reward_b > reward_b:
return False
return True
def norm_device(device):
if device is None:
device = ''
device = device.lower()
if device.startswith('cuda'):
device = 'gpu' + device[4:]
if device in ('cpu', 'none'):
device = ''
return device
if __name__ == '__main__':
try:
repl()
except KeyboardInterrupt:
print('')
pass