Skip to content

Commit

Permalink
Merge 5dfad36 into 9e00e46
Browse files Browse the repository at this point in the history
  • Loading branch information
Duncan Macleod committed May 17, 2016
2 parents 9e00e46 + 5dfad36 commit 1becccf
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 44 deletions.
47 changes: 32 additions & 15 deletions bin/hveto
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import datetime
import sys
from socket import getfqdn
from getpass import getuser
from math import ceil

try:
import configparser
Expand Down Expand Up @@ -341,26 +342,34 @@ while True:
logger.info("-- Processing round %d --" % round.n)

# calculate significances for this round
def _find_max_significance(channel):
return core.find_max_significance(primary['time'], auxiliary[channel],
channel, snrs, windows,
round.livetime)
def _find_max_significance(channels):
aux = dict((c, auxiliary[c]) for c in channels)
return core.find_max_significance(primary, aux, pchannel,
snrs, windows, round.livetime)
if args.nproc > 1: # multiprocessing
pool = multiprocessing.Pool(processes=args.nproc)
results = pool.map(_find_max_significance, auxiliary.keys())
# separate channel list into chunks and process each chunk
allchans = auxiliary.keys()
pool = multiprocessing.Pool(processes=min(args.nproc, len(allchans)))
n = int(ceil(len(allchans) / args.nproc))
chunks = [allchans[i:i+n] for i in range(0, len(allchans), n)]
results = pool.map(_find_max_significance, chunks)
pool.close()
winners, sigsets = zip(*results)
# find winner of chunk winners
winner = sorted(winners, key=lambda w: w.significance)[-1]
# flatten sets of significances into one list
newsignificances = sigsets[0]
for subdict in sigsets[1:]:
newsignificances.update(subdict)
else: # single process
results = map(_find_max_significance, auxiliary.keys())
winner, newsignificances = core.find_max_significance(
primary, auxiliary, pchannel, snrs, windows, round.livetime)

# find winner
winner = sorted(results, key=lambda x: x.significance)[-1]
logger.info("Round %d winner: %s" % (round.n, winner.name))

# plot significance drop here for the last round
# only now do we actually have the new data to calculate significance
# drop
newsignificances = dict([(c, results[i].significance)
for i, c in enumerate(auxiliary)])
if round.n > 1:
png = pngname % 'SIG_DROP'
plot.significance_drop(png, oldsignificances, newsignificances,
Expand All @@ -371,8 +380,8 @@ while True:

# break out of the loop if the significance is below the stopping point
if winner.significance < minsig:
logger.info(" Maximum signifiance below stopping point")
logger.debug(" (%.2f < %.2f)" % (winner.significance, minsig))
logger.info("Maximum signifiance below stopping point")
logger.debug(" (%.2f < %.2f)" % (winner.significance, minsig))
logger.info("-- Rounds complete! --")
break

Expand All @@ -389,6 +398,7 @@ while True:
description="winner=%s, window=%s, snr=%s" % (
winner.name, winner.window, winner.snr))
segments[flag.name] = flag
logger.debug("Generated veto segments for round %d" % round.n)

# link events before veto for plotting
before = primary
Expand All @@ -398,7 +408,9 @@ while True:
primary, vetoed = core.veto(primary, round.vetoes)
pevents.append(primary)
pvetoed.append(vetoed)
logger.debug("Applied vetoes to primary")
use = core.count_used(vetoed, round.vetoes)
logger.debug("Calculated use percentage")

# record results
round.winner = winner
Expand All @@ -417,9 +429,10 @@ while True:

for channel in auxiliary:
auxiliary[channel], v = core.veto(auxiliary[channel], round.vetoes)
logger.debug("Applied vetoes to auxiliary channels")

# log results
logger.info(""" Results for round %d
logger.info("""Results for round %d
winner : %s
significance : %s
snr : %s
Expand Down Expand Up @@ -450,7 +463,11 @@ cum. deadtime : %s""" % (
['WINNER', 'VETOED', 'RAW'],
[winner.events, vetoed, primary]):
f = trigfile % tag
write_ascii(f, arr)
if 'frequency' in arr.dtype.fields:
columns = ['time', 'frequency', 'snr']
else:
columns = ['time', 'mchirp', 'snr']
write_ascii(f, arr, columns=columns)
logger.debug("Round %d %s events written to %s"
% (round.n, tag.lower(), f))
round.files[tag] = f
Expand Down
125 changes: 105 additions & 20 deletions hveto/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@
"""Core of the HierarchichalVeto algorithm
"""

import itertools
import re
from math import (log, exp, log10)
from bisect import (bisect_left, bisect_right)

import numpy
from numpy.lib.recfunctions import stack_arrays

from scipy.special import (gammainc, gammaln)

Expand Down Expand Up @@ -70,8 +73,68 @@ def deadtime(self):

# -- core methods ------------------------------------------------------------

def find_max_significance(primary, auxiliary, channel,
snrs, windows, livetime):
def find_all_coincidences(triggers, channel, snrs, windows):
"""Find the number of coincs between each auxiliary channel and the primary
Parameters
----------
primary : `numpy.ndarray`
an array of times for the primary channel
auxiliary : `numpy.recarray`
an array of triggers for a set of auxiliary channels
snrs : `list` of `float`
the SNR thresholds to use
window : `list` of `float`
the time windows to use
"""
windows = sorted(windows, reverse=True)
snrs = sorted(snrs)
coincs = dict((p, {}) for p in itertools.product(windows, snrs))

for i, x in enumerate(triggers):
if x['channel'] != channel:
continue
t = x['time']
channels = dict((key, set()) for key in coincs)
j = i - 1
segs = [Segment(t-dt/2., t+dt/2.) for dt in windows]

# define coincidence test
def add_if_coinc(event):
if event['channel'] == channel:
return
in_seg = filter(lambda s: event['time'] in s, segs)
if not in_seg: # no triggers in window
return
for k, w in enumerate(in_seg):
for snr in filter(lambda s: event['snr'] >= s, snrs):
channels[(windows[k], snr)].add(event['channel'])
return 1

# search left half-window
while j >= 0:
if not add_if_coinc(triggers[j]):
break
j -= 1
j = i + 1
# search right half-window
while j < triggers.shape[0]:
if not add_if_coinc(triggers[j]):
break
j += 1

# count 'em up
for p, cset in channels.items():
for c in cset:
try:
coincs[p][c] += 1
except KeyError:
coincs[p][c] = 1

return coincs


def find_max_significance(primary, auxiliary, channel, snrs, windows, livetime):
"""Find the maximum Hveto significance for this primary-auxiliary pair
Parameters
Expand All @@ -91,19 +154,29 @@ def find_max_significance(primary, auxiliary, channel,
the parameters and segments generated by the (snr, dt) with the
highest significance
"""
winner = HvetoWinner(name=channel, significance=-1)
if isinstance(primary, numpy.recarray):
primary = primary['time']
for snr in snrs:
b = auxiliary[auxiliary['snr'] >= snr]
for dt in windows:
_, sig = coinc_significance(
primary, b['time'], dt=dt, livetime=livetime)
rec = stack_arrays([primary] + auxiliary.values(), usemask=False,
asrecarray=True, autoconvert=True)
rec.sort(order='time')
coincs = find_all_coincidences(rec, channel, snrs, windows)
winner = HvetoWinner(name='unknown', significance=-1)
sigs = dict((c, -1) for c in auxiliary)
for p, cdict in coincs.items():
dt, snr = p
for chan in cdict:
mu = (primary.shape[0] * (auxiliary[chan]['snr'] >= snr).sum() *
dt / livetime)
try:
sig = significance(coincs[p][chan], mu)
except KeyError:
sig == 0
if sig > sigs[chan]:
sigs[chan] = sig
if sig > winner.significance:
winner.significance = sig
winner.name = chan
winner.snr = snr
winner.window = dt
return winner
winner.significance = sig
return winner, sigs


class HvetoWinner(object):
Expand Down Expand Up @@ -214,20 +287,32 @@ def veto(table, segmentlist):
"""Remove events from a table based on a segmentlist
"""
times = table['time']
def _not_vetoed(t):
return t not in segmentlist
try:
keep = numpy.vectorize(_not_vetoed)(table['time'])
except IndexError: # empty table
return table, table
a = segmentlist[0][0]
b = segmentlist[-1][1]
keep = numpy.ones(times.shape[0], dtype=bool)
for i, t in enumerate(times):
if t < a:
continue
if t > b:
break
if t in segmentlist:
keep[i] = False
return table[keep], table[~keep]


def count_used(table, segmentlist):
"""Calculate the number of segments overlapping a set of times
"""
use = 0
times = table['time']
segmentlist = sorted(segmentlist, key=lambda x: x[0])
i = 0
for seg in segmentlist:
if veto(table, type(segmentlist)([seg]))[1].size:
use += 1
for j, t in enumerate(times[i:]):
if t < seg[0]:
continue
if t < seg[1]:
use += 1
i += j
break
return use
39 changes: 30 additions & 9 deletions hveto/triggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ def get_triggers(channel, etg, segments, cache=None, snr=None, frange=None,
if issubclass(Table, key):
columns = COLUMNS[key][:]
break
if 'channel' in columns:
columns.pop('channel')

# find triggers
if cache is None:
Expand Down Expand Up @@ -95,25 +97,42 @@ def get_triggers(channel, etg, segments, cache=None, snr=None, frange=None,

# format table as numpy.recarray
recarray = trigs.to_recarray(columns=columns)
addfields = {}
dropfields = []

# rename columns for convenience later
if tablename.endswith('_inspiral'):
tcols = ['end_time', 'end_time_ns']
elif tablename.endswith('_burst'):
tcols = ['peak_time', 'peak_time_ns']
# append channel to all events
columns.append('channel')
addfields['channel'] = numpy.repeat(channel, recarray.shape[0])

# rename frequency column
if tablename.endswith('_burst'):
recarray = recfunctions.rename_fields(
recarray, {'peak_frequency': 'frequency'})
idx = columns.index('peak_frequency')
columns.pop(idx)
columns.insert(idx, 'frequency')

# map time to its own column
if tablename.endswith('_inspiral'):
tcols = ['end_time', 'end_time_ns']
elif tablename.endswith('_burst'):
tcols = ['peak_time', 'peak_time_ns']
else:
tcols = None
if tcols:
times = recarray[tcols[0]] + recarray[tcols[1]] * 1e-9
recarray = recfunctions.rec_append_fields(
recarray, 'time', times, times.dtype)
recarray = recfunctions.rec_drop_fields(recarray, tcols)
addfields['time'] = times
dropfields.extend(tcols)
columns = ['time'] + columns[2:]

# add and remove fields as required
if addfields:
names, data = zip(*addfields.items())
recarray = recfunctions.rec_append_fields(recarray, names, data)
recarray = recfunctions.rec_drop_fields(recarray, dropfields)

# sort by time
if 'time' in recarray.dtype.fields:
recarray.sort(order='time')

# filter
Expand Down Expand Up @@ -174,7 +193,7 @@ def find_auxiliary_channels(etg, gps='*', ifo='*', cache=None):
return sorted(out)


def write_ascii(outfile, recarray, fmt='%s', **kwargs):
def write_ascii(outfile, recarray, fmt='%s', columns=None, **kwargs):
"""Write a `numpy.recarray` to file as ASCII
Parameters
Expand All @@ -191,6 +210,8 @@ def write_ascii(outfile, recarray, fmt='%s', **kwargs):
numpy.savetxt
for details on the writer, including the `fmt` keyword argument
"""
if columns:
recarray = recarray[columns]
kwargs.setdefault('header', ' '.join(recarray.dtype.names))
numpy.savetxt(outfile, recarray, fmt=fmt, **kwargs)
return outfile

0 comments on commit 1becccf

Please sign in to comment.