Merge 5dfad36 into 9e00e46

gwdetchar · May 17, 2016 · 1becccf · 1becccf
2 parents 9e00e46 + 5dfad36
commit 1becccf
Show file tree

Hide file tree

Showing 3 changed files with 167 additions and 44 deletions.
diff --git a/bin/hveto b/bin/hveto
@@ -34,6 +34,7 @@ import datetime
 import sys
 from socket import getfqdn
 from getpass import getuser
+from math import ceil
 
 try:
     import configparser
@@ -341,26 +342,34 @@ while True:
     logger.info("-- Processing round %d --" % round.n)
 
     # calculate significances for this round
-    def _find_max_significance(channel):
-        return core.find_max_significance(primary['time'], auxiliary[channel],
-                                          channel, snrs, windows,
-                                          round.livetime)
+    def _find_max_significance(channels):
+        aux = dict((c, auxiliary[c]) for c in channels)
+        return core.find_max_significance(primary, aux, pchannel,
+                                          snrs, windows, round.livetime)
     if args.nproc > 1:  # multiprocessing
-        pool = multiprocessing.Pool(processes=args.nproc)
-        results = pool.map(_find_max_significance, auxiliary.keys())
+        # separate channel list into chunks and process each chunk
+        allchans = auxiliary.keys()
+        pool = multiprocessing.Pool(processes=min(args.nproc, len(allchans)))
+        n = int(ceil(len(allchans) / args.nproc))
+        chunks = [allchans[i:i+n] for i in range(0, len(allchans), n)]
+        results = pool.map(_find_max_significance, chunks)
         pool.close()
+        winners, sigsets = zip(*results)
+        # find winner of chunk winners
+        winner = sorted(winners, key=lambda w: w.significance)[-1]
+        # flatten sets of significances into one list
+        newsignificances = sigsets[0]
+        for subdict in sigsets[1:]:
+            newsignificances.update(subdict)
     else:  # single process
-        results = map(_find_max_significance, auxiliary.keys())
+        winner, newsignificances = core.find_max_significance(
+            primary, auxiliary, pchannel, snrs, windows, round.livetime)
 
-    # find winner
-    winner = sorted(results, key=lambda x: x.significance)[-1]
     logger.info("Round %d winner: %s" % (round.n, winner.name))
 
     # plot significance drop here for the last round
     #   only now do we actually have the new data to calculate significance
     #   drop
-    newsignificances = dict([(c, results[i].significance)
-                            for i, c in enumerate(auxiliary)])
     if round.n > 1:
         png = pngname % 'SIG_DROP'
         plot.significance_drop(png, oldsignificances, newsignificances,
@@ -371,8 +380,8 @@ while True:
 
     # break out of the loop if the significance is below the stopping point
     if winner.significance < minsig:
-        logger.info("    Maximum signifiance below stopping point")
-        logger.debug("        (%.2f < %.2f)" % (winner.significance, minsig))
+        logger.info("Maximum signifiance below stopping point")
+        logger.debug("    (%.2f < %.2f)" % (winner.significance, minsig))
         logger.info("-- Rounds complete! --")
         break
 
@@ -389,6 +398,7 @@ while True:
         description="winner=%s, window=%s, snr=%s" % (
             winner.name, winner.window, winner.snr))
     segments[flag.name] = flag
+    logger.debug("Generated veto segments for round %d" % round.n)
 
     # link events before veto for plotting
     before = primary
@@ -398,7 +408,9 @@ while True:
     primary, vetoed = core.veto(primary, round.vetoes)
     pevents.append(primary)
     pvetoed.append(vetoed)
+    logger.debug("Applied vetoes to primary")
     use = core.count_used(vetoed, round.vetoes)
+    logger.debug("Calculated use percentage")
 
     # record results
     round.winner = winner
@@ -417,9 +429,10 @@ while True:
 
     for channel in auxiliary:
         auxiliary[channel], v = core.veto(auxiliary[channel], round.vetoes)
+    logger.debug("Applied vetoes to auxiliary channels")
 
     # log results
-    logger.info("""    Results for round %d
+    logger.info("""Results for round %d
 winner :          %s
 significance :    %s
 snr :             %s
@@ -450,7 +463,11 @@ cum. deadtime :   %s""" % (
             ['WINNER', 'VETOED', 'RAW'],
             [winner.events, vetoed, primary]):
         f = trigfile % tag
-        write_ascii(f, arr)
+        if 'frequency' in arr.dtype.fields:
+            columns = ['time', 'frequency', 'snr']
+        else:
+            columns = ['time', 'mchirp', 'snr']
+        write_ascii(f, arr, columns=columns)
         logger.debug("Round %d %s events written to %s"
                      % (round.n, tag.lower(), f))
         round.files[tag] = f

diff --git a/hveto/core.py b/hveto/core.py
@@ -19,10 +19,13 @@
 """Core of the HierarchichalVeto algorithm
 """
 
+import itertools
+import re
 from math import (log, exp, log10)
 from bisect import (bisect_left, bisect_right)
 
 import numpy
+from numpy.lib.recfunctions import stack_arrays
 
 from scipy.special import (gammainc, gammaln)
 
@@ -70,8 +73,68 @@ def deadtime(self):
 
 # -- core methods ------------------------------------------------------------
 
-def find_max_significance(primary, auxiliary, channel,
-                          snrs, windows, livetime):
+def find_all_coincidences(triggers, channel, snrs, windows):
+    """Find the number of coincs between each auxiliary channel and the primary
+
+    Parameters
+    ----------
+    primary : `numpy.ndarray`
+        an array of times for the primary channel
+    auxiliary : `numpy.recarray`
+        an array of triggers for a set of auxiliary channels
+    snrs : `list` of `float`
+        the SNR thresholds to use
+    window : `list` of `float`
+        the time windows to use
+    """
+    windows = sorted(windows, reverse=True)
+    snrs = sorted(snrs)
+    coincs = dict((p, {}) for p in itertools.product(windows, snrs))
+
+    for i, x in enumerate(triggers):
+        if x['channel'] != channel:
+            continue
+        t = x['time']
+        channels = dict((key, set()) for key in coincs)
+        j = i - 1
+        segs = [Segment(t-dt/2., t+dt/2.) for dt in windows]
+
+        # define coincidence test
+        def add_if_coinc(event):
+            if event['channel'] == channel:
+                return
+            in_seg = filter(lambda s: event['time'] in s, segs)
+            if not in_seg:  # no triggers in window
+                return
+            for k, w in enumerate(in_seg):
+                for snr in filter(lambda s: event['snr'] >= s, snrs):
+                    channels[(windows[k], snr)].add(event['channel'])
+            return 1
+
+        # search left half-window
+        while j >= 0:
+            if not add_if_coinc(triggers[j]):
+                break
+            j -= 1
+        j = i + 1
+        # search right half-window
+        while j < triggers.shape[0]:
+            if not add_if_coinc(triggers[j]):
+                break
+            j += 1
+
+        # count 'em up
+        for p, cset in channels.items():
+            for c in cset:
+                try:
+                    coincs[p][c] += 1
+                except KeyError:
+                    coincs[p][c] = 1
+
+    return coincs
+
+
+def find_max_significance(primary, auxiliary, channel, snrs, windows, livetime):
     """Find the maximum Hveto significance for this primary-auxiliary pair
 
     Parameters
@@ -91,19 +154,29 @@ def find_max_significance(primary, auxiliary, channel,
         the parameters and segments generated by the (snr, dt) with the
         highest significance
     """
-    winner = HvetoWinner(name=channel, significance=-1)
-    if isinstance(primary, numpy.recarray):
-        primary = primary['time']
-    for snr in snrs:
-        b = auxiliary[auxiliary['snr'] >= snr]
-        for dt in windows:
-            _, sig = coinc_significance(
-               primary, b['time'], dt=dt, livetime=livetime)
+    rec = stack_arrays([primary] + auxiliary.values(), usemask=False,
+                       asrecarray=True, autoconvert=True)
+    rec.sort(order='time')
+    coincs = find_all_coincidences(rec, channel, snrs, windows)
+    winner = HvetoWinner(name='unknown', significance=-1)
+    sigs = dict((c, -1) for c in auxiliary)
+    for p, cdict in coincs.items():
+        dt, snr = p
+        for chan in cdict:
+            mu = (primary.shape[0] * (auxiliary[chan]['snr'] >= snr).sum() *
+                  dt / livetime)
+            try:
+                sig = significance(coincs[p][chan], mu)
+            except KeyError:
+                sig == 0
+            if sig > sigs[chan]:
+                sigs[chan] = sig
             if sig > winner.significance:
-                winner.significance = sig
+                winner.name = chan
                 winner.snr = snr
                 winner.window = dt
-    return winner
+                winner.significance = sig
+    return winner, sigs
 
 
 class HvetoWinner(object):
@@ -214,20 +287,32 @@ def veto(table, segmentlist):
     """Remove events from a table based on a segmentlist
     """
     times = table['time']
-    def _not_vetoed(t):
-        return t not in segmentlist
-    try:
-        keep = numpy.vectorize(_not_vetoed)(table['time'])
-    except IndexError:  # empty table
-        return table, table
+    a = segmentlist[0][0]
+    b = segmentlist[-1][1]
+    keep = numpy.ones(times.shape[0], dtype=bool)
+    for i, t in enumerate(times):
+        if t < a:
+            continue
+        if t > b:
+            break
+        if t in segmentlist:
+            keep[i] = False
     return table[keep], table[~keep]
 
 
 def count_used(table, segmentlist):
     """Calculate the number of segments overlapping a set of times
     """
     use = 0
+    times = table['time']
+    segmentlist = sorted(segmentlist, key=lambda x: x[0])
+    i = 0
     for seg in segmentlist:
-        if veto(table, type(segmentlist)([seg]))[1].size:
-            use += 1
+        for j, t in enumerate(times[i:]):
+            if t < seg[0]:
+                continue
+            if t < seg[1]:
+                use += 1
+            i += j
+            break
     return use
diff --git a/hveto/triggers.py b/hveto/triggers.py
@@ -68,6 +68,8 @@ def get_triggers(channel, etg, segments, cache=None, snr=None, frange=None,
             if issubclass(Table, key):
                 columns = COLUMNS[key][:]
                 break
+    if 'channel' in columns:
+        columns.pop('channel')
 
     # find triggers
     if cache is None:
@@ -95,25 +97,42 @@ def get_triggers(channel, etg, segments, cache=None, snr=None, frange=None,
 
     # format table as numpy.recarray
     recarray = trigs.to_recarray(columns=columns)
+    addfields = {}
+    dropfields = []
 
-    # rename columns for convenience later
-    if tablename.endswith('_inspiral'):
-        tcols = ['end_time', 'end_time_ns']
-    elif tablename.endswith('_burst'):
-        tcols = ['peak_time', 'peak_time_ns']
+    # append channel to all events
+    columns.append('channel')
+    addfields['channel'] = numpy.repeat(channel, recarray.shape[0])
+
+    # rename frequency column
+    if tablename.endswith('_burst'):
         recarray = recfunctions.rename_fields(
             recarray, {'peak_frequency': 'frequency'})
         idx = columns.index('peak_frequency')
         columns.pop(idx)
         columns.insert(idx, 'frequency')
+
+    # map time to its own column
+    if tablename.endswith('_inspiral'):
+        tcols = ['end_time', 'end_time_ns']
+    elif tablename.endswith('_burst'):
+        tcols = ['peak_time', 'peak_time_ns']
     else:
         tcols = None
     if tcols:
         times = recarray[tcols[0]] + recarray[tcols[1]] * 1e-9
-        recarray = recfunctions.rec_append_fields(
-            recarray, 'time', times, times.dtype)
-        recarray = recfunctions.rec_drop_fields(recarray, tcols)
+        addfields['time'] = times
+        dropfields.extend(tcols)
         columns = ['time'] + columns[2:]
+
+    # add and remove fields as required
+    if addfields:
+        names, data = zip(*addfields.items())
+        recarray = recfunctions.rec_append_fields(recarray, names, data)
+        recarray = recfunctions.rec_drop_fields(recarray, dropfields)
+
+    # sort by time
+    if 'time' in recarray.dtype.fields:
         recarray.sort(order='time')
 
     # filter
@@ -174,7 +193,7 @@ def find_auxiliary_channels(etg, gps='*', ifo='*', cache=None):
     return sorted(out)
 
 
-def write_ascii(outfile, recarray, fmt='%s', **kwargs):
+def write_ascii(outfile, recarray, fmt='%s', columns=None, **kwargs):
     """Write a `numpy.recarray` to file as ASCII
 
     Parameters
@@ -191,6 +210,8 @@ def write_ascii(outfile, recarray, fmt='%s', **kwargs):
     numpy.savetxt
         for details on the writer, including the `fmt` keyword argument
     """
+    if columns:
+        recarray = recarray[columns]
     kwargs.setdefault('header', ' '.join(recarray.dtype.names))
     numpy.savetxt(outfile, recarray, fmt=fmt, **kwargs)
     return outfile