Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
brucespang committed May 6, 2014
0 parents commit 02384d0
Show file tree
Hide file tree
Showing 5 changed files with 221 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*\~
*.pyc
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Avalanche

Avalanche is a script that injects random, repeatable network faults on specific ports. It is useful for testing distributed systems.

## Running

sudo ./avalanche

## Possible Faults:

By default, Avalanche inserts a fault with probability specified in settings.py (p_fault). Given that a fault is inserted, one of the following faults is picked with the probability specified in the config file:

- High latency
- 100% packet loss
- Smaller percentage of packet loss
- Reorder packets
116 changes: 116 additions & 0 deletions avalanche
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#! /usr/bin/env python

import os
import sys
import json
import time
import atexit
import random
import logging
import settings
import subprocess
from optparse import OptionParser

VERSION = "0.1.0"

log = logging.getLogger('avalanche')
log.addHandler(logging.StreamHandler())

def die(msg):
log.error(msg)
exit(1)

def call(cmd, exit_on_fail=True):
log.debug(cmd)
res = subprocess.call(cmd, shell=True, stderr=subprocess.STDOUT)
if res != 0 and exit_on_fail:
die("error: subprocess returned %d (not 0)"%(res))
return res

def tc(iface, args):
return "tc qdisc add dev %s parent 1:3 handle 30: %s"%(iface, args)

def generate_fault():
"""pick a fault from faults uniformly at random with probability p_fault"""

if isinstance(settings.faults, list):
faults = {a:1.0/len(settings.faults) for a in settings.faults}
elif isinstance(settings.faults, dict):
faults = settings.faults
else:
die("can't parse faults")

# iterate through each action, and check to see if the random number fell into a range
# corresponding to the probability of the action. if we don't pick an action,
# choose to undo all actions.
r = random.uniform(0,1)
s = 0
for a,p in faults.items():
# pick an action with uniform probability equal to P(fault)/(number of faults)
s += settings.p_fault*p
if s >= r:
return a()
return None

def clear_faults():
"""undo all currently enabled faults"""
if settings.debug: return
for iface in settings.interfaces:
call("tc qdisc del dev %s root"%(iface), exit_on_fail=False)

def cleanup(active_faults):
"""run on exit"""
if active_faults:
log.info("Cleaning up...")
clear_faults()
log.info("Exiting.")

if __name__ == "__main__":
parser = OptionParser()
parser.add_option("-d", "--debug", dest="debug", default=False,
action="store_true", help="log the faults, but do not inject them")
parser.add_option("-v", "--version", dest="version", default=False,
action="store_true", help="print the avalanche version and exit")
(opts, args) = parser.parse_args()

if opts.version:
print VERSION
exit(0)

log.setLevel(settings.log_level)

if opts.debug:
settings.debug = opts.debug
log.setLevel(logging.DEBUG)

if sum([v for v in settings.faults.values()]) != 1:
die("fault probabilities don't sum to 1")

active_faults = []
atexit.register(lambda: cleanup(active_faults))

log.info("Starting Avalanche v%s"%(VERSION))
log.info("seed=%d,delay=%dms,ports=%s"%(settings.seed, settings.delay,str(settings.ports)))

random.seed(settings.seed)
while True:
if active_faults:
clear_faults()
active_faults = []

fault = generate_fault()
if fault:
active_faults.append(fault)
log.info("fault: %s"%(fault.desc()))

if not settings.debug:
for iface in settings.interfaces:
call("tc qdisc add dev %s root handle 1: prio"%(iface))
call(tc(iface, fault.action()))
for port in settings.ports:
call("tc filter add dev %s parent 1:0 protocol ip u32 match ip dport %d 0xffff flowid 1:3"%(iface, port))
call("tc filter add dev %s parent 1:0 protocol ip u32 match ip sport %d 0xffff flowid 1:3"%(iface, port))
else:
log.info("fault: none")

time.sleep(settings.delay)
55 changes: 55 additions & 0 deletions faults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import random

class Partition:
# Partition the current server from all other servers

def action(self):
return "netem loss 100%"

def desc(self):
return "network partition"

class PacketLoss:
# Drop packets with some probability

def __init__(self):
# percentage probability of dropping a packet
self.loss = random.randint(5, 10)

def action(self):
return "netem loss %d%%"%(self.loss)

def desc(self):
return "drop packets with probability %d%%"%(self.loss)

class Latency:
# Add latency to all packets

def __init__(self):
# per-packet delay in ms
self.latency = random.randint(100, 1000)

def action(self):
return "netem delay %dms"%(self.latency)

def desc(self):
return "delay of %dms"%(self.latency)

class Reorder:
# Reorder packets

def __init__(self):
# probability of continuing the delay
self.correlation = 50

# initial packet delay
self.delay = 10

# probability of reordering a packet
self.reorder = random.randint(10, 75)

def action(self):
return "netem delay %sms reorder %d%% %d%%" % (self.delay, 100-self.reorder, self.correlation)

def desc(self):
return "reorder after delay of %dms with probability %d and correlation %d" % (self.delay, 100-self.reorder, self.correlation)
32 changes: 32 additions & 0 deletions settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import logging
from faults import *

# Seed for the random number generator. This makes tests repeatable.
seed = 1

# Time between faults in seconds
delay = 1

# Probability of a fault occuring.
p_fault = 0.5

# if debug is true, log which fault we would do, but don't inject the fault.
debug = False

# only inject faults on these interfaces
interfaces = ["eth0"]

# only inject faults on these ports
ports = [2001]

# level of logging
log_level = logging.INFO

# List of faults to execute. If it's a list, the probability of each fault
# is uniform. If it's a hash the probability of each fault is the hash value.
faults = {
Partition: 0.2,
PacketLoss: 0.2,
Latency: 0.3,
Reorder: 0.3,
}

0 comments on commit 02384d0

Please sign in to comment.