diff --git a/cnvlib/bintest.py b/cnvlib/bintest.py
new file mode 100644
index 00000000..60ed9f7d
--- /dev/null
+++ b/cnvlib/bintest.py
@@ -0,0 +1,93 @@
+"""Z-test for single-bin copy number alterations."""
+import logging
+
+import numpy as np
+import pandas as pd
+from scipy.stats import norm
+
+from . import params, segfilters
+
+
+def do_bintest(cnarr, segments=None, alpha=0.005, target_only=False):
+    """Get a probability for each bin based on its Z-score.
+
+    Adds a column w/ p-values to the input .cnr.
+
+    Returns either (without `segments`) bins where the probability < `alpha`, or
+    (with `segments`) segments with those significant bin regions spiked in.
+    """
+    cnarr = cnarr.copy()
+    # Subtract segment means, if given, to report only the CNA bins that
+    # weren't already detected (including exon-size CNAs within a 
+    # larger-scale, smaller-amplitude CNA)
+    cnarr['log2'] = cnarr.residuals(segments)
+
+    if target_only:
+        antitarget_idx = cnarr['gene'].isin(params.ANTITARGET_ALIASES)
+        if antitarget_idx.any():
+            logging.info("Ignoring %d off-target bins", antitarget_idx.sum())
+            # NB: bins no longer match the original input
+            cnarr = cnarr[~antitarget_idx]
+
+    cnarr['p_bintest'] = z_prob(cnarr)
+    is_sig = cnarr['p_bintest'] < alpha
+    logging.info("Significant hits in {}/{} bins ({:.3g}%)"
+                 .format(is_sig.sum(), len(is_sig),
+                         100 * is_sig.sum() / len(is_sig)))
+
+    if segments:
+        if is_sig.any():
+            # Splice significant hits into the given segments
+            # NB: residuals() above ensures hits all occur within segments
+            cnarr['is_sig'] = is_sig
+            chunks = []
+            for segment, seghits in cnarr.by_ranges(segments, keep_empty=True):
+                if seghits['is_sig'].any():
+                    # Merge each run of adjacent non-significant bins within this
+                    # segment, leaving the significant hits as single-bin segments
+                    levels = seghits['is_sig'].cumsum() * seghits['is_sig']
+                    chunks.append(seghits.data
+                                  .assign(_levels=levels)
+                                  .groupby('_levels', sort=False)
+                                  .apply(segfilters.squash_region)
+                                  .reset_index(drop=True))
+                else:
+                    # Keep this segment as-is
+                    chunks.append(pd.DataFrame.from_records([segment],
+                                                            columns=segments.data.columns))
+            return cnarr.as_dataframe(pd.concat(chunks, sort=False))
+        else:
+            # Nothing to do
+            return segments
+    else:
+        # May be empty
+        hits = cnarr[is_sig]
+        return hits
+
+
+def z_prob(cnarr):
+    """Calculate z-test p-value at each bin."""
+    # Bin weights ~ 1-variance; bin log2 values already centered at 0.0
+    sd = np.sqrt(1 - cnarr['weight'])
+    # Convert to Z-scores
+    z = cnarr['log2'] / sd
+    # Two-sided survival function (1-CDF) probability
+    p = 2. * norm.cdf(-np.abs(z))
+    # Similar to the above -- which is better?
+    # p2 = 2 * norm.pdf(cnarr['log2'], loc=0, scale=sd)
+    # if not np.allclose(p, p2):
+    #     print("Max diff:", np.abs(p - p2).max())
+    #     print("Median diff:", np.median(np.abs(p - p2)))
+    #     print("Ratio:", (p / p2).mean())
+    # Correct for multiple hypothesis tests
+    return p_adjust_bh(p)
+
+
+def p_adjust_bh(p):
+    """Benjamini-Hochberg p-value correction for multiple hypothesis testing."""
+    p = np.asfarray(p)
+    by_descend = p.argsort()[::-1]
+    by_orig = by_descend.argsort()
+    steps = float(len(p)) / np.arange(len(p), 0, -1)
+    q = np.minimum(1, np.minimum.accumulate(steps * p[by_descend]))
+    return q[by_orig]
diff --git a/cnvlib/commands.py b/cnvlib/commands.py
index edd1ba95..57384f05 100644
--- a/cnvlib/commands.py
+++ b/cnvlib/commands.py
@@ -26,10 +26,10 @@
 from skgenome import tabio, GenomicArray as _GA
 from skgenome.rangelabel import to_label
 
-from . import (access, antitarget, autobin, batch, call, core, coverage,
-               diagram, export, fix, heatmap, import_rna, importers, metrics,
-               parallel, reference, reports, scatter, segmentation, segmetrics,
-               target)
+from . import (access, antitarget, autobin, batch, bintest, call, core,
+               coverage, diagram, export, fix, heatmap, import_rna, importers,
+               metrics, parallel, reference, reports, scatter, segmentation,
+               segmetrics, target)
 from .cmdutil import (load_het_snps, read_cna, verify_sample_sex,
                       write_tsv, write_text, write_dataframe)
 
@@ -1360,6 +1360,34 @@ def _cmd_segmetrics(args):
 P_segmetrics.set_defaults(func=_cmd_segmetrics)
 
 
+# bintest -----------------------------------------------------------------------
+
+do_bintest = public(bintest.do_bintest)
+
+def _cmd_bintest(args):
+    """Z-test for single-bin copy number alterations."""
+    cnarr = read_cna(args.cnarray)
+    segments = read_cna(args.segment) if args.segment else None
+    sig = do_bintest(cnarr, segments, args.alpha, args.target)
+    if len(sig):
+        tabio.write(sig, args.output or sys.stdout)
+
+
+P_bintest = AP_subparsers.add_parser('bintest', help=_cmd_bintest.__doc__)
+P_bintest.add_argument('cnarray',
+        help="Bin-level log2 ratios (.cnr file), as produced by 'fix'.")
+P_bintest.add_argument('-s', '--segment', metavar="FILENAME",
+        help="""Segmentation calls (.cns), the output of the
+                'segment' command).""")
+P_bintest.add_argument("-a", "--alpha", type=float, default=0.005,
+        help="Significance threhold. [Default: %(default)s]")
+P_bintest.add_argument("-t", "--target", action="store_true",
+        help="Test target bins only; ignore off-target bins.")
+P_bintest.add_argument("-o", "--output",
+        help="Output filename.")
+P_bintest.set_defaults(func=_cmd_bintest)
+
+
 # _____________________________________________________________________________
 # Other I/O and compatibility
 
diff --git a/cnvlib/segfilters.py b/cnvlib/segfilters.py
index 8a1d6610..ac32715f 100644
--- a/cnvlib/segfilters.py
+++ b/cnvlib/segfilters.py
@@ -106,6 +106,9 @@ def squash_region(cnarr):
             else:
                 out['cn1'] = np.median(cnarr['cn1'])
             out['cn2'] = out['cn'] - out['cn1']
+    if 'p_bintest' in cnarr:
+        # Only relevant for single-bin segments, but this seems safe/conservative
+        out['p_bintest'] = cnarr['p_bintest'].max()
     return pd.DataFrame(out)
 
 
diff --git a/cnvlib/ztest.py b/cnvlib/ztest.py
deleted file mode 100644
index 3357f141..00000000
--- a/cnvlib/ztest.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""Z-test for single-bin copy number alterations."""
-import logging
-
-import numpy as np
-from scipy.stats import norm
-
-from . import params
-
-
-def do_ztest(cnarr, segments=None, is_male_reference=False,
-             is_sample_female=None, alpha=0.005, target_only=False):
-    """Get a probability for each bin based on its Z-score.
-
-    Return bins where the probability < `alpha`.
-    """
-    if segments:
-        # Subtract segment means to report only the CNA bins that weren't
-        # already detected (including exon-size CNAs within a larger-scale,
-        # smaller-amplitude CNA)
-        cnarr['log2'] = cnarr.residuals(segments)
-    else:
-        # Account for non-diploid sex chromosomes
-        cnarr['log2'] -= cnarr.expect_flat_log2(is_male_reference)
-        if is_sample_female:
-            # chrX has same ploidy as autosomes; chrY is just unusable noise
-            cnarr = cnarr[cnarr.chromosome != cnarr._chr_y_label]
-        else:
-            # 1/2 #copies of each sex chromosome
-            is_chr_xy = cnarr.chromosome.isin((cnarr._chr_x_label,
-                                               cnarr._chr_y_label))
-            cnarr[is_chr_xy, 'log2'] += 1.0
-
-    if target_only:
-        antitarget_idx = cnarr['gene'].isin(params.ANTITARGET_ALIASES)
-        if antitarget_idx.any():
-            logging.info("Ignoring", antitarget_idx.sum(), "off-target bins")
-            cnarr = cnarr[~antitarget_idx]
-
-    cnarr['ztest'] = z_prob(cnarr)
-    is_sig = cnarr['ztest'] < alpha
-    logging.info("Significant hits in {}/{} bins ({:.3g}%)"
-                 .format(is_sig.sum(), len(is_sig),
-                         100 * is_sig.sum() / len(is_sig)))
-    return cnarr[is_sig]
-
-
-def z_prob(cnarr):
-    # Bin weights ~ 1-variance; bin log2 values already centered at 0.0
-    sd = np.sqrt(1 - cnarr['weight'])
-    # Convert to Z-scores
-    z = cnarr['log2'] / sd
-    # Two-sided survival function (1-CDF) probability
-    p = 2. * norm.cdf(-np.abs(z))
-    # Similar to the above -- which is better?
-    # p2 = 2 * norm.pdf(cnarr['log2'], loc=0, scale=sd)
-    # if not np.allclose(p, p2):
-    #     print("Max diff:", np.abs(p - p2).max())
-    #     print("Median diff:", np.median(np.abs(p - p2)))
-    #     print("Ratio:", (p / p2).mean())
-    # Correct for multiple hypothesis tests
-    return p_adjust_bh(p)
-
-
-def p_adjust_bh(p):
-    """Benjamini-Hochberg p-value correction for multiple hypothesis testing."""
-    p = np.asfarray(p)
-    by_descend = p.argsort()[::-1]
-    by_orig = by_descend.argsort()
-    steps = float(len(p)) / np.arange(len(p), 0, -1)
-    q = np.minimum(1, np.minimum.accumulate(steps * p[by_descend]))
-    return q[by_orig]
diff --git a/scripts/cnv_ztest.py b/scripts/cnv_ztest.py
deleted file mode 100755
index a92bdbd4..00000000
--- a/scripts/cnv_ztest.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env python
-"""Z-test for single-bin copy number alterations."""
-import argparse
-import logging
-import sys
-
-import cnvlib
-from cnvlib.cmdutil import verify_sample_sex
-from cnvlib.ztest import do_ztest
-from skgenome import tabio
-
-logging.basicConfig(level=logging.INFO, format="%(message)s")
-
-
-def _cmd_ztest(args):
-    cnarr = cnvlib.read(args.cnarr)
-    if args.segment:
-        segments = cnvlib.read(args.segment)
-        is_sample_female = None
-    else:
-        segments = None
-        is_sample_female = verify_sample_sex(cnarr, args.sample_sex,
-                                             args.male_reference)
-    sig = do_ztest(cnarr, segments, args.male_reference, is_sample_female,
-                   args.alpha, args.target)
-    if len(sig):
-        tabio.write(sig, args.output or sys.stdout)
-
-
-
-if __name__ == '__main__':
-    logging.basicConfig(level=logging.INFO, format="%(message)s")
-
-    AP = argparse.ArgumentParser(description=__doc__)
-    AP.add_argument("cnarr", help=".cnr file")
-    AP.add_argument('-s', '--segment', metavar="FILENAME",
-                    help="""Segmentation calls (.cns), the output of the
-                    'segment' command).""")
-
-    AP.add_argument("-a", "--alpha", type=float, default=0.005,
-                    help="Significance threhold. [Default: %(default)s]")
-    AP.add_argument("-t", "--target", action="store_true",
-                    help="Test target bins only; ignore off-target bins.")
-    AP.add_argument('-y', '--male-reference', '--haploid-x-reference',
-                    action='store_true',
-                    help="""Assume inputs were normalized to a male reference
-                    (i.e. female samples will have +1 log-coverage of chrX;
-                    otherwise male samples would have -1 chrX).""")
-    AP.add_argument('-x', '--sample-sex',
-                    choices=('m', 'y', 'male', 'Male',
-                             'f', 'x', 'female', 'Female'),
-                    help="""Specify the sample's chromosomal sex as male or
-                    female.  (Otherwise guessed from X and Y coverage).""")
-
-    AP.add_argument("-o", "--output",
-                    help="Output filename.")
-    _cmd_ztest(AP.parse_args())
diff --git a/test/bintest.makefile b/test/bintest.makefile
new file mode 100644
index 00000000..a605868e
--- /dev/null
+++ b/test/bintest.makefile
@@ -0,0 +1,15 @@
+# bin-level testing
+# Pick up from clustering .cnr results
+
+cnr_of_interest=TR_101.clustered.cnr TR_77.clustered.cnr TR_55.clustered.cnr TR_42.clustered.cnr
+cns_of_interest=$(cnr_of_interest:.cnr=.cns)
+# TR_101.clustered.cnr TR_77.clustered.cnr TR_55.clustered.cnr TR_42.clustered.cnr
+out_cns=$(cnr_of_interest:.cnr=.bintest.cns)
+
+all: $(out_cns)
+
+$(out_cns): %.bintest.cns: %.cns %.cnr
+	cnvkit.py bintest -s $^ -t -o $@
+
+$(cns_of_interest): %.cns: %.cnr
+	cnvkit.py segment $< -o $@
diff --git a/test/test_cnvlib.py b/test/test_cnvlib.py
index 6abb3787..df95940a 100755
--- a/test/test_cnvlib.py
+++ b/test/test_cnvlib.py
@@ -13,10 +13,10 @@
 
 import cnvlib
 # Import all modules as a smoke test
-from cnvlib import (access, antitarget, autobin, batch, cnary, commands, core,
-                    coverage, diagram, export, fix, import_rna, importers,
-                    metrics, params, plots, reference, reports, segmentation,
-                    segmetrics, smoothing, vary)
+from cnvlib import (access, antitarget, autobin, batch, bintest, cnary,
+                    commands, core, coverage, diagram, export, fix, import_rna,
+                    importers, metrics, params, plots, reference, reports,
+                    segmentation, segmetrics, smoothing, vary)
 
 
 class CNATests(unittest.TestCase):