Skip to content

Commit

Permalink
fixed a critical error and improved the faul injector. the test works…
Browse files Browse the repository at this point in the history
… like a charm now
  • Loading branch information
Andrei Savu committed May 23, 2011
1 parent 51d909e commit 13471cd
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 8 deletions.
6 changes: 3 additions & 3 deletions README
Expand Up @@ -86,7 +86,7 @@ Start a ZooKeeper quorum and the fault injector:
$ cd test_5_trunk $ cd test_5_trunk
$ ./clean.sh $ ./clean.sh
$ ./start.sh $ ./start.sh
$ python fail.py $ python fail.py leader


Push 500 elements: Push 500 elements:


Expand All @@ -96,8 +96,8 @@ Check 500 elements (also checks that the sequence is strictly increasing):


$ ./check_all.py $ ./check_all.py


It should work like a charm most of the time! :) Keep in mind that you It should work like a charm! :) Keep in mind that you are not going to experience
are not going to experience this amount of critical failures in production. this amount of critical failures in production.


License License
------- -------
Expand Down
20 changes: 18 additions & 2 deletions test_5_trunk/fail.py
Expand Up @@ -16,13 +16,29 @@


def main(): def main():
nodes = get_nodes() nodes = get_nodes()
leader = True if len(sys.argv) == 2 and sys.argv[1] == 'leader' else False
while True: while True:
n = random.choice(nodes) if leader:
n = find_leader(nodes)
else:
n = random.choice(nodes)
kill_node(n) kill_node(n)
time.sleep(5)
start_node(n) start_node(n)
wait_join_cluster(n) wait_join_cluster(n)
time.sleep(1) time.sleep(5)


def find_leader(nodes):
for host, port in nodes:
try:
s = socket.socket()
s.connect((host, int(port)))
s.send('stat')
if 'Mode: leader' in s.recv(1024):
return (host, port)
finally:
s.close()

def kill_node(n): def kill_node(n):
print 'stoping node %s ...' % (n,) print 'stoping node %s ...' % (n,)
host, port = n host, port = n
Expand Down
6 changes: 3 additions & 3 deletions zkmq.py
Expand Up @@ -78,9 +78,9 @@ def __getattr__(self, name):
""" Pass-Through with connection handle and retry on ConnectionLossException """ """ Pass-Through with connection handle and retry on ConnectionLossException """
value = getattr(zookeeper, name) value = getattr(zookeeper, name)
if callable(value): if callable(value):
if name in ('create', 'set'): # if name in ('create', 'set'):
# this may be too much - needs testing # this may be too much - needs testing
value = self._force_async(value) # value = self._force_async(value)
return functools.partial( return functools.partial(
retry_on(zookeeper.ConnectionLossException)(value), retry_on(zookeeper.ConnectionLossException)(value),
self._handle self._handle
Expand Down Expand Up @@ -108,7 +108,7 @@ def ensure_exists(self, name, data = ''):
def create_sequence(self, name, data): def create_sequence(self, name, data):
""" A safe way of creating an ephemeral node. Worst case scenario """ A safe way of creating an ephemeral node. Worst case scenario
you will end-up creating multiple empty znodes """ you will end-up creating multiple empty znodes """
name = self.create(name, data, name = self.create(name, '',
[ZOO_OPEN_ACL_UNSAFE], [ZOO_OPEN_ACL_UNSAFE],
zookeeper.SEQUENCE zookeeper.SEQUENCE
) )
Expand Down

0 comments on commit 13471cd

Please sign in to comment.