-
Notifications
You must be signed in to change notification settings - Fork 0
/
lifter_diver.py
156 lines (130 loc) · 5.76 KB
/
lifter_diver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import world
import random
import math
from heapq import *
def wrc(weighted_choices):
total_weight = sum(w for (w, c) in weighted_choices)
x = random.random()*total_weight
for (w, c) in weighted_choices:
x -= w
if x < 0:
return c
return c
def world_to_map_str(w):
return ''.join(''.join(row) for row in w.map)
class Node(object):
def __init__(self, parent_node, w, command_history):
self.parent_node = parent_node
self.world = w
self.command_history = command_history
self.unexplored_commands = list(self.world.valid_moves())
if 'A' in self.unexplored_commands:
self.unexplored_commands.remove('A') # safe optimization
#print 'unexplored_commands', self.unexplored_commands
self.child_nodes = {} # map from command character to node
self.score = self.world.score()
self.max_child_score = self.score
def pprint(self, indent=0, depth_left=-1):
print '%s[%s] %d %d %s' % (' '*indent, self.command_history, self.score, self.max_child_score, 'DONE' if self.world.is_done() else '')
if depth_left == 0:
return
for cmd, child in self.child_nodes.items():
if child:
child.pprint(indent+2, depth_left-1)
else:
print '%s[%s] None' % (' '*(indent+2), self.command_history+cmd)
if __name__ == "__main__":
initial_world = world.read_world([])
best_score = 0
best_commands = ''
node_count = 0
map_to_node = {} # key is stringified map, value is node
debug_mode = False
def debug(s):
if debug_mode:
print s
def add_node(parent, w, map_str, command_history):
global node_count, best_score, best_commands
n = Node(parent, w, command_history)
if best_score is None or n.score > best_score:
print 'NEWBEST'
best_score = n.score
best_commands = n.command_history
map_to_node[map_str] = n
node_count += 1
# if not w.is_done():
# explorable_nodes.append(n)
# heappush(explore_heapq, (-n.score, n))
return n
def attempt_dive(start):
cursor = start
greedy_mode = False
while True:
if cursor.world.is_done():
return None # failed dive
if random.random() < 0.05:
greedy_mode = True
if greedy_mode:
scored_children = [(c.max_child_score, c) for c in cursor.child_nodes.values() if c]
if scored_children:
scored_children.sort(reverse=True)
cursor = scored_children[0][1]
continue
diveable_children = [c for c in cursor.child_nodes.values() if c]
num_possible_children = len(diveable_children) + len(cursor.unexplored_commands)
if num_possible_children == 0:
return None
if random.random() < (float(len(diveable_children))/num_possible_children):
cursor = random.choice(diveable_children)
else:
break # just stay here and explore one of the unexplored possibilities
return cursor
root = add_node(None, initial_world, world_to_map_str(initial_world), '')
itercount = 0
while True:
if debug_mode or ((itercount % 1000) == 0):
print '%d nodes' % node_count
print 'best score %d for [%s]' % (best_score, best_commands)
#root.pprint(indent=0, depth_left=3)
itercount += 1
while True:
dive_result = attempt_dive(root)
if dive_result:
debug('dive result [%s]' % dive_result.command_history)
from_node = dive_result
break
debug('dive failed')
next_command = random.choice(from_node.unexplored_commands)
from_node.unexplored_commands.remove(next_command)
debug(' trying command %s from node %s' % (next_command, from_node.command_history))
assert next_command not in from_node.child_nodes
next_world = from_node.world.move(next_command)
# see if next world is already in some node
next_map_str = world_to_map_str(next_world)
matched_node = map_to_node.get(next_map_str)
if matched_node is not None and next_world.num_moves >= matched_node.world.num_moves:
# this command lead to a map we've already seen, with more moves, so it's useless
debug(' dominated by [%s]' % matched_node.command_history)
from_node.child_nodes[next_command] = None # mark this edge as useless
else:
# we're going to make a new node
debug(' adding new node for command %s' % next_command)
new_node = add_node(from_node, next_world, next_map_str, from_node.command_history+next_command)
from_node.child_nodes[next_command] = new_node
# if we outdid another node, need to make its parent point to None instead of it
if matched_node:
for c, n in matched_node.parent_node.child_nodes.iteritems():
if n is matched_node:
debug('reassigning parent [%s] edge %s to be None, outdid by [%s]' % (matched_node.parent_node.command_history, c, new_node.command_history))
matched_node.parent_node.child_nodes[c] = None
break
else:
assert False, "didn't find it"
# update max scores up chain as necessary
ms = new_node.score
p = new_node.parent_node
while True:
if not p or ms <= p.max_child_score:
break
p.max_child_score = ms
p = p.parent_node