etal · etal · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -12,8 +12,8 @@ COPY environment-dev.yml /tmp/
 RUN mamba env create -f /tmp/environment-dev.yml && mamba clean -afy
 
 # Make conda env the default
-ENV PATH=/opt/conda/envs/cnvkit-dev/bin:$PATH
-SHELL ["conda", "run", "-n", "cnvkit-dev", "/bin/bash", "-c"]
+ENV PATH=/opt/conda/envs/cnvkit/bin:$PATH
+SHELL ["conda", "run", "-n", "cnvkit", "/bin/bash", "-c"]
 
 # Install CNVkit in editable mode will happen via postCreateCommand
 WORKDIR /workspaces/cnvkit
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -8,7 +8,7 @@
   "customizations": {
     "vscode": {
       "settings": {
-        "python.defaultInterpreterPath": "/opt/conda/envs/cnvkit-dev/bin/python",
+        "python.defaultInterpreterPath": "/opt/conda/envs/cnvkit/bin/python",
         "python.testing.pytestEnabled": true,
         "python.testing.pytestArgs": [
           "test"

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -13,6 +13,7 @@ CNVkit is a command-line toolkit and Python library for detecting copy number va
 
 - **Bug fixes and new features**: Write a failing test first, then implement.
 - **Edge cases**: Before finishing, verify behavior for empty inputs, NaN/missing values, and single-element arrays.
+- **NaN weight safety**: `np.average(values, weights=w)` and `numpy.sum(w)` propagate NaN; use `np.nansum` for weight sums and filter `~np.isnan(wt)` before `np.average`. Note that `pandas.Series.sum()` skips NaN by default — prefer explicit `np.nansum` for clarity.
 - **User-facing changes**: Update the relevant docs in `doc/*.rst`.
 - **Clinical impact**: When reviewing changes, consider whether the changeset alters numerical output or output file formats (`.cnr`, `.cns`, `.cnn`, SEG, VCF). Flag any such changes explicitly, as downstream clinical pipelines may depend on exact output stability.
 
@@ -24,11 +25,11 @@ Use 'bd' for task tracking.
 
 **Conda (recommended):**
 ```bash
-conda env create -f environment-dev.yml   # Python 3.11, all deps, R, testing tools
-conda activate cnvkit-dev
+conda env create -f environment-dev.yml   # All deps, R, testing tools
+conda activate cnvkit
 ```
 
-**Note:** The conda env must be activated before running pytest, mypy, or other dev tools -- they are not installed globally. The conda env includes R with DNAcopy for segmentation.
+**Note:** The conda env must be activated before running pytest, mypy, or other dev tools -- they are not installed globally. The conda env includes R with DNAcopy for segmentation. Use `conda activate cnvkit && <command>` in scripts; `conda run` is unreliable here.
 
 ### Testing
 
@@ -63,6 +64,7 @@ mypy cnvlib/batch.py              # Check a single file
 - Generator functions must use `-> Generator[YieldType, SendType, ReturnType]`, not `-> Iterator` or `-> None`
 - When a variable changes type (e.g. `str` → `list[int]`), rename it to avoid shadowing (e.g. `copies` → `copy_strs`)
 - Use `assert x is not None` to narrow `Optional` types when control flow guarantees non-None
+- `numpy.bool_` is not assignable to `bool` in mypy -- use `# type: ignore[assignment]` or widen parameters to `bool | bool_ | None`
 
 ### Code Quality & Security
 
@@ -114,7 +116,7 @@ Core dependencies are declared in `requirements/core.txt`; `min.txt` pins exact
 - All `zip()` calls use explicit `strict=True` or `strict=False` (PEP 618)
 
 ### Imports in Tests
-- `test/test_commands.py` has a top-level `from cnvlib import (...)` block that serves as both a smoke test and the shared import set. Add new `cnvlib` submodule imports there rather than as local imports inside individual test methods.
+- `test/test_commands.py` and `test/test_cnvlib.py` each have a top-level `from cnvlib import (...)` block that serves as both a smoke test and the shared import set. Add new `cnvlib` submodule imports there rather than as local imports inside individual test methods.
 
 ### Variable Naming
 - The codebase uses `bam_fname` or `sample_fname` for file paths that can be either BAM or bedGraph files

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -29,7 +29,7 @@ Thank you for your interest in contributing to CNVkit! This document provides gu
    **Option A: Conda (recommended)**
    ```bash
    conda env create -f environment-dev.yml
-   conda activate cnvkit-dev
+   conda activate cnvkit
    pip install -e '.[test]'
    ```
 

diff --git a/README.rst b/README.rst
@@ -196,7 +196,7 @@ Quick start for development::
 
     # Option 1: Using conda (recommended)
     conda env create -f environment-dev.yml
-    conda activate cnvkit-dev
+    conda activate cnvkit
     pip install -e '.[test]'
 
     # Option 2: Using pip

diff --git a/cnvlib/cnary.py b/cnvlib/cnary.py
@@ -358,7 +358,7 @@ def squash_rows(name, rows):
     def shift_xx(
         self,
         is_haploid_x_reference: bool = False,
-        is_xx: bool_ | None = None,
+        is_xx: bool | bool_ | None = None,
         diploid_parx_genome: str | None = None,
     ) -> CopyNumArray:
         """Adjust chrX log2 ratios to match the ploidy of the reference sex.

diff --git a/cnvlib/reports.py b/cnvlib/reports.py
@@ -136,7 +136,7 @@ def do_genemetrics(
     min_probes: int = 3,
     skip_low: bool = False,
     is_haploid_x_reference: bool = False,
-    is_sample_female: None = None,
+    is_sample_female: bool | None = None,
     diploid_parx_genome: str | None = None,
     location_stats: tuple[()] | list[str] = (),
     spread_stats: tuple[()] | list[str] = (),
@@ -334,6 +334,7 @@ def compute_gene_stats(
     """Compute statistics for bins within a gene.
 
     Similar to segmetrics.do_segmetrics, but for gene-level bins.
+    Bins with NaN weights are excluded before computing interval statistics.
 
     Parameters
     ----------
@@ -398,8 +399,14 @@ def compute_gene_stats(
             func = stat_funcs[statname]
             stats_dict[statname] = func(deviations)
 
-    # Interval statistics
+    # Interval statistics -- filter NaN weights to avoid poisoning np.average
     weights = bins["weight"].to_numpy() if "weight" in bins else np.ones(len(bins_log2))
+    valid = ~np.isnan(weights)
+    if not valid.all():
+        bins_log2 = bins_log2[valid]
+        weights = weights[valid]
+    if len(bins_log2) == 0:
+        return stats_dict
     if "ci" in interval_stats:
         ci_lo, ci_hi = stat_funcs["ci"](bins_log2, weights)
         stats_dict["ci_lo"] = ci_lo
@@ -424,6 +431,8 @@ def group_by_genes(
 ) -> Iterator[pd.Series]:
     """Group probe and coverage data by gene.
 
+    Bins with NaN weights are excluded from weight sums and depth averages.
+
     Return an iterable of genes, in chromosomal order, associated with their
     location and coverages:
 
@@ -443,13 +452,15 @@ def group_by_genes(
         outrow["probes"] = len(rows)
         if "weight" in rows:
             wt = rows["weight"]
-            outrow["weight"] = wt.sum()
+            outrow["weight"] = float(np.nansum(wt.to_numpy()))
             if "depth" in rows:
                 valid = ~np.isnan(wt)
                 if valid.any():
                     outrow["depth"] = np.average(
                         rows["depth"][valid], weights=wt[valid]
                     )
+                else:
+                    outrow["depth"] = rows["depth"].mean()
         elif "depth" in rows:
             outrow["depth"] = rows["depth"].mean()
 

diff --git a/cnvlib/segfilters.py b/cnvlib/segfilters.py
@@ -100,6 +100,9 @@ def enumerate_changes(levels: Series) -> Series:
 def squash_region(cnarr: DataFrame) -> DataFrame:
     """Reduce a CopyNumArray to 1 row, keeping fields sensible.
 
+    Bins with NaN weights are excluded from weighted averages and medians;
+    if all weights are NaN, falls back to unweighted aggregation.
+
     All columns present in the input are preserved.  Core columns (chromosome,
     start, end, log2, gene, probes, weight) and well-known optional columns
     (depth, baf, cn, cn1/cn2, p_bintest) use purpose-built aggregation.  Any
@@ -109,11 +112,13 @@ def squash_region(cnarr: DataFrame) -> DataFrame:
     segments.
     """
     assert "weight" in cnarr
-    region_weight = cnarr["weight"].sum()
+    wt = cnarr["weight"].to_numpy()
+    valid_wt = ~np.isnan(wt)
+    region_weight = float(np.nansum(wt))
 
     def _wavg(col: str) -> float:
-        if region_weight > 0:
-            return float(np.average(cnarr[col], weights=cnarr["weight"]))
+        if region_weight > 0 and valid_wt.any():
+            return float(np.average(cnarr[col].to_numpy()[valid_wt], weights=wt[valid_wt]))
         return float(np.mean(cnarr[col]))
 
     out: dict = {
@@ -130,13 +135,15 @@ def _wavg(col: str) -> float:
     if "baf" in cnarr:
         out["baf"] = _wavg("baf")
     if "cn" in cnarr:
-        if region_weight > 0:
-            out["cn"] = weighted_median(cnarr["cn"], cnarr["weight"])
+        if region_weight > 0 and valid_wt.any():
+            cn_arr = cnarr["cn"].to_numpy()
+            out["cn"] = weighted_median(cn_arr[valid_wt], wt[valid_wt])
         else:
             out["cn"] = np.median(cnarr["cn"])
         if "cn1" in cnarr:
-            if region_weight > 0:
-                out["cn1"] = weighted_median(cnarr["cn1"], cnarr["weight"])
+            if region_weight > 0 and valid_wt.any():
+                cn1_arr = cnarr["cn1"].to_numpy()
+                out["cn1"] = weighted_median(cn1_arr[valid_wt], wt[valid_wt])
             else:
                 out["cn1"] = np.median(cnarr["cn1"])
             out["cn2"] = out["cn"] - out["cn1"]
@@ -185,7 +192,8 @@ def bic(segarr: CopyNumArray) -> CopyNumArray:
     Uses the Bayesian Information Criterion to test whether two adjacent
     same-chromosome segments are better modeled as separate or merged.
     Iteratively merges the pair with the most negative delta-BIC until
-    no pair benefits from merging.
+    no pair benefits from merging. Segments with NaN or zero weights use
+    a median-RSS fallback to avoid poisoning the BIC computation.
 
     Variance is estimated from the ``stdev`` column if present (from
     ``segmetrics --spread stdev``), otherwise from the ``weight`` column.
@@ -221,8 +229,13 @@ def bic(segarr: CopyNumArray) -> CopyNumArray:
         rss = sd**2 * n
     elif "weight" in data.columns:
         w = data["weight"].to_numpy(dtype=float)
-        # Variance ~ n/weight, so RSS = n * variance = n^2/weight
-        needs_fallback = w == 0
+        # Variance ~ n/weight, so RSS = n * variance = n^2/weight.
+        # Segments with zero or NaN weights (e.g. from older .cnr files with
+        # missing weight values) can't contribute a meaningful variance
+        # estimate, so fall back to the median RSS of valid segments -- this
+        # avoids division by zero/NaN while keeping the BIC comparison neutral
+        # (neither strongly favoring nor opposing merges involving those segments).
+        needs_fallback = (w == 0) | np.isnan(w)
         valid = ~needs_fallback
         if valid.any():
             fallback_rss = float(np.median(n[valid] ** 2 / w[valid]))

diff --git a/cnvlib/segmentation/__init__.py b/cnvlib/segmentation/__init__.py
@@ -343,7 +343,7 @@ def transfer_fields(
 
     Segment gene name is the comma-separated list of bin gene names. Segment
     weight is the sum of bin weights, and depth is the (weighted) mean of bin
-    depths.
+    depths. Bins with NaN weights are excluded from the sums and averages.
 
     Also: Post-process segmentation output.
 
@@ -402,9 +402,14 @@ def make_null_segment(chrom, orig_start, orig_end):
 
     for i, bin_idx in enumerate(iter_slices(cdata, segments.data, "outer", False)):
         if bin_weights is not None:
-            seg_wt = bin_weights[bin_idx].sum()
+            wt = bin_weights[bin_idx]
+            # Use nansum so NaN weights don't propagate into .cns output
+            seg_wt = float(np.nansum(wt))
             if seg_wt > 0:
-                seg_dp = np.average(bin_depths[bin_idx], weights=bin_weights[bin_idx])
+                valid = ~np.isnan(wt)
+                seg_dp = np.average(
+                    bin_depths[bin_idx][valid], weights=wt[valid]
+                )
             else:
                 seg_dp = 0.0
         else:

diff --git a/environment-dev.yml b/environment-dev.yml
@@ -1,4 +1,4 @@
-name: cnvkit-dev
+name: cnvkit
 channels:
   - conda-forge
   - bioconda