-
Notifications
You must be signed in to change notification settings - Fork 3
/
eval_gamma.py
122 lines (104 loc) · 4.25 KB
/
eval_gamma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
evaluate the gamma distribution with different shape settings
"""
import pandas as pd
import os
from localutils import changedetect as dc, benchmark as bch, misc as ms
import logging
import ConfigParser
import traceback
import multiprocessing
import argparse
import numpy as np
METHOD = ['cpt_gamma%1', 'cpt_gamma%10', 'cpt_gamma%20', 'cpt_gamma%30', 'cpt_gamma%50', 'cpt_gamma%80',
'cpt_gamma%adpt', 'cpt_np', 'cpt_poisson']
PENALTY = ["AIC", "BIC", "MBIC", "Hannan-Quinn"]
WINDOW = 2 # perform evaluation with window size equaling 2
MINSEGLEN = 3
def worker(f):
f_base = os.path.basename(f)
r = []
logging.info("handling %s" % f)
trace = pd.read_csv(f, sep=';')
if type(trace['rtt'][0]) is str:
trace = pd.read_csv(f, sep=';', decimal=',')
fact = trace['cp']
fact = [i for i, v in enumerate(fact) if v == 1] # fact in format of data index
logging.debug("%s : change counts %d" % (f_base, len(fact)))
for m, p in [(x, y) for x in METHOD for y in PENALTY]:
logging.info("%s: evaluating %s with %s" % (f_base, m, p))
if 'gamma' in m:
mm = m.split('%')
method_caller = getattr(dc, 'cpt_gamma')
if 'adpt' in mm[1]:
shape = np.sqrt(np.mean([i for i in trace['rtt'] if 0 < i < 1000]))
detect = method_caller(trace['rtt'], p, MINSEGLEN, shape=shape)
else:
shape = ms.type_convert(mm[1])
detect = method_caller(trace['rtt'], p, MINSEGLEN, shape=shape)
else:
method_caller = getattr(dc, m)
detect = method_caller(trace['rtt'], p, MINSEGLEN)
b = bch.evaluation_window_weighted(trace['rtt'], fact, detect, WINDOW)
r.append((os.path.basename(f), len(trace), len(fact),
b['tp'], b['fp'], b['fn'],
b['precision'], b['recall'], b['score'], b['dis'], m, p))
logging.debug('%r' % b)
return r
def worker_wrapper(args):
try:
return worker(args)
except Exception:
logging.critical("Exception in worker.")
traceback.print_exc()
raise
def main():
# logging setting
logging.basicConfig(filename='cpt_evaluation.log', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S %z')
# load data collection configuration from config file in the same folder
config = ConfigParser.ConfigParser()
if not config.read('./config'):
logging.critical("Config file ./config is missing.")
return
# load the configured directory where collected data shall be saved
try:
data_dir = config.get("dir", "data")
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
logging.critical("config for data storage is not right.")
return
# check if the directory is there
if not os.path.exists(data_dir):
logging.critical("data folder %s does not exisit." % data_dir)
return
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--directory",
help="benchmark changepoint methods using the traces from the specified directory.",
action="store")
parser.add_argument("-f", "--filename",
help="file name for output.",
action="store")
args = parser.parse_args()
if not args.directory or not args.filename:
print args.help
return
else:
trace_dir = args.directory
outfile = args.filename
if not os.path.exists(trace_dir):
print "%s doesn't existe." % trace_dir
files = []
for f in os.listdir(trace_dir):
if f.endswith('.csv') and not f.startswith('~'):
files.append(os.path.join(trace_dir,f))
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
res = pool.map(worker_wrapper, files)
with open(os.path.join(data_dir, outfile), 'w') as fp:
fp.write(';'.join(
['file', 'len', 'changes', 'tp', 'fp', 'fn', 'precision', 'recall', 'score', 'dis', 'method', 'penalty']) + '\n')
for ck in res:
for line in ck:
fp.write(";".join([str(i) for i in line]) + '\n')
if __name__ == '__main__':
main()