This repository has been archived by the owner on Jan 9, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 02384d0
Showing
5 changed files
with
221 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
*\~ | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Avalanche | ||
|
||
Avalanche is a script that injects random, repeatable network faults on specific ports. It is useful for testing distributed systems. | ||
|
||
## Running | ||
|
||
sudo ./avalanche | ||
|
||
## Possible Faults: | ||
|
||
By default, Avalanche inserts a fault with probability specified in settings.py (p_fault). Given that a fault is inserted, one of the following faults is picked with the probability specified in the config file: | ||
|
||
- High latency | ||
- 100% packet loss | ||
- Smaller percentage of packet loss | ||
- Reorder packets |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#! /usr/bin/env python | ||
|
||
import os | ||
import sys | ||
import json | ||
import time | ||
import atexit | ||
import random | ||
import logging | ||
import settings | ||
import subprocess | ||
from optparse import OptionParser | ||
|
||
VERSION = "0.1.0" | ||
|
||
log = logging.getLogger('avalanche') | ||
log.addHandler(logging.StreamHandler()) | ||
|
||
def die(msg): | ||
log.error(msg) | ||
exit(1) | ||
|
||
def call(cmd, exit_on_fail=True): | ||
log.debug(cmd) | ||
res = subprocess.call(cmd, shell=True, stderr=subprocess.STDOUT) | ||
if res != 0 and exit_on_fail: | ||
die("error: subprocess returned %d (not 0)"%(res)) | ||
return res | ||
|
||
def tc(iface, args): | ||
return "tc qdisc add dev %s parent 1:3 handle 30: %s"%(iface, args) | ||
|
||
def generate_fault(): | ||
"""pick a fault from faults uniformly at random with probability p_fault""" | ||
|
||
if isinstance(settings.faults, list): | ||
faults = {a:1.0/len(settings.faults) for a in settings.faults} | ||
elif isinstance(settings.faults, dict): | ||
faults = settings.faults | ||
else: | ||
die("can't parse faults") | ||
|
||
# iterate through each action, and check to see if the random number fell into a range | ||
# corresponding to the probability of the action. if we don't pick an action, | ||
# choose to undo all actions. | ||
r = random.uniform(0,1) | ||
s = 0 | ||
for a,p in faults.items(): | ||
# pick an action with uniform probability equal to P(fault)/(number of faults) | ||
s += settings.p_fault*p | ||
if s >= r: | ||
return a() | ||
return None | ||
|
||
def clear_faults(): | ||
"""undo all currently enabled faults""" | ||
if settings.debug: return | ||
for iface in settings.interfaces: | ||
call("tc qdisc del dev %s root"%(iface), exit_on_fail=False) | ||
|
||
def cleanup(active_faults): | ||
"""run on exit""" | ||
if active_faults: | ||
log.info("Cleaning up...") | ||
clear_faults() | ||
log.info("Exiting.") | ||
|
||
if __name__ == "__main__": | ||
parser = OptionParser() | ||
parser.add_option("-d", "--debug", dest="debug", default=False, | ||
action="store_true", help="log the faults, but do not inject them") | ||
parser.add_option("-v", "--version", dest="version", default=False, | ||
action="store_true", help="print the avalanche version and exit") | ||
(opts, args) = parser.parse_args() | ||
|
||
if opts.version: | ||
print VERSION | ||
exit(0) | ||
|
||
log.setLevel(settings.log_level) | ||
|
||
if opts.debug: | ||
settings.debug = opts.debug | ||
log.setLevel(logging.DEBUG) | ||
|
||
if sum([v for v in settings.faults.values()]) != 1: | ||
die("fault probabilities don't sum to 1") | ||
|
||
active_faults = [] | ||
atexit.register(lambda: cleanup(active_faults)) | ||
|
||
log.info("Starting Avalanche v%s"%(VERSION)) | ||
log.info("seed=%d,delay=%dms,ports=%s"%(settings.seed, settings.delay,str(settings.ports))) | ||
|
||
random.seed(settings.seed) | ||
while True: | ||
if active_faults: | ||
clear_faults() | ||
active_faults = [] | ||
|
||
fault = generate_fault() | ||
if fault: | ||
active_faults.append(fault) | ||
log.info("fault: %s"%(fault.desc())) | ||
|
||
if not settings.debug: | ||
for iface in settings.interfaces: | ||
call("tc qdisc add dev %s root handle 1: prio"%(iface)) | ||
call(tc(iface, fault.action())) | ||
for port in settings.ports: | ||
call("tc filter add dev %s parent 1:0 protocol ip u32 match ip dport %d 0xffff flowid 1:3"%(iface, port)) | ||
call("tc filter add dev %s parent 1:0 protocol ip u32 match ip sport %d 0xffff flowid 1:3"%(iface, port)) | ||
else: | ||
log.info("fault: none") | ||
|
||
time.sleep(settings.delay) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import random | ||
|
||
class Partition: | ||
# Partition the current server from all other servers | ||
|
||
def action(self): | ||
return "netem loss 100%" | ||
|
||
def desc(self): | ||
return "network partition" | ||
|
||
class PacketLoss: | ||
# Drop packets with some probability | ||
|
||
def __init__(self): | ||
# percentage probability of dropping a packet | ||
self.loss = random.randint(5, 10) | ||
|
||
def action(self): | ||
return "netem loss %d%%"%(self.loss) | ||
|
||
def desc(self): | ||
return "drop packets with probability %d%%"%(self.loss) | ||
|
||
class Latency: | ||
# Add latency to all packets | ||
|
||
def __init__(self): | ||
# per-packet delay in ms | ||
self.latency = random.randint(100, 1000) | ||
|
||
def action(self): | ||
return "netem delay %dms"%(self.latency) | ||
|
||
def desc(self): | ||
return "delay of %dms"%(self.latency) | ||
|
||
class Reorder: | ||
# Reorder packets | ||
|
||
def __init__(self): | ||
# probability of continuing the delay | ||
self.correlation = 50 | ||
|
||
# initial packet delay | ||
self.delay = 10 | ||
|
||
# probability of reordering a packet | ||
self.reorder = random.randint(10, 75) | ||
|
||
def action(self): | ||
return "netem delay %sms reorder %d%% %d%%" % (self.delay, 100-self.reorder, self.correlation) | ||
|
||
def desc(self): | ||
return "reorder after delay of %dms with probability %d and correlation %d" % (self.delay, 100-self.reorder, self.correlation) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import logging | ||
from faults import * | ||
|
||
# Seed for the random number generator. This makes tests repeatable. | ||
seed = 1 | ||
|
||
# Time between faults in seconds | ||
delay = 1 | ||
|
||
# Probability of a fault occuring. | ||
p_fault = 0.5 | ||
|
||
# if debug is true, log which fault we would do, but don't inject the fault. | ||
debug = False | ||
|
||
# only inject faults on these interfaces | ||
interfaces = ["eth0"] | ||
|
||
# only inject faults on these ports | ||
ports = [2001] | ||
|
||
# level of logging | ||
log_level = logging.INFO | ||
|
||
# List of faults to execute. If it's a list, the probability of each fault | ||
# is uniform. If it's a hash the probability of each fault is the hash value. | ||
faults = { | ||
Partition: 0.2, | ||
PacketLoss: 0.2, | ||
Latency: 0.3, | ||
Reorder: 0.3, | ||
} |