From 0f1c079fb87d60470aa812f5dfda69d260364fd2 Mon Sep 17 00:00:00 2001 From: Jasper Krauter <12259417+jeeeesper@users.noreply.github.com> Date: Wed, 6 May 2026 11:58:28 +0300 Subject: [PATCH 1/2] feat(py): add typed Python wrapper for tsalign bindings Introduce `tsalign` package with typed dataclasses (`AlignmentResult`, `AlignmentStep`, etc.), a high-level `align()` function, and `py.typed` marker. Update README with full API docs and usage examples. Bump pyproject.toml to wire up the new package. --- python_bindings/README.md | 132 ++++++++++- python_bindings/pyproject.toml | 2 + python_bindings/python/tsalign/__init__.py | 249 +++++++++++++++++++++ python_bindings/python/tsalign/_types.py | 73 ++++++ python_bindings/python/tsalign/py.typed | 0 python_bindings/src/lib.rs | 2 +- 6 files changed, 453 insertions(+), 5 deletions(-) create mode 100644 python_bindings/python/tsalign/__init__.py create mode 100644 python_bindings/python/tsalign/_types.py create mode 100644 python_bindings/python/tsalign/py.typed diff --git a/python_bindings/README.md b/python_bindings/README.md index 9656383..b10347e 100644 --- a/python_bindings/README.md +++ b/python_bindings/README.md @@ -2,9 +2,133 @@ [![PyPI](https://img.shields.io/pypi/v/tsalign)](https://pypi.org/project/tsalign/) -These bindings are still very minimal and are subject to improvement and/or breaking changes with future versions. +Python bindings for the template switch aligner. Aligns two DNA sequences +while detecting template switches — short-range translocations where a query +region is copied from (or aligns to) a different location, possibly on the +reverse complement strand. -## Usage -Install with `pip install tsalign`. +## Installation -The most important function is `tsalign.align(reference, query, **settings)`. On the object that is returned, you can e.g. call `.stats()` or `.cigar()`. +```bash +pip install tsalign +``` + +## Quick start + +```python +import tsalign + +result = tsalign.align("ACGTACGT", "ACGACGT") +print(result.cigar()) # compact alignment string +print(result.stats()) # cost, duration, node counts, … +``` + +## Aligner options + +Create an `Aligner` once and reuse it for many sequences: + +```python +aligner = tsalign.Aligner( + min_length_strategy="preprocess_lookahead", # default: "lookahead" + chaining_strategy="lower_bound", # default: "none" + total_length_strategy="maximise", # default: "maximise" + no_ts=False, # set True for plain gap-affine +) + +result = aligner.align("ACGTACGT", "ACGACGT") +``` + +## Custom cost configuration + +Costs are specified in `.tsa` format. Use `sample_tsa_config/config.tsa` +as a starting point and consult the main repository README for a description +of each parameter. + +```python +aligner = tsalign.Aligner(costs_file="sample_tsa_config/config.tsa") +result = aligner.align("ACGTACGT", "ACGACGT") +``` + +You can also pass the cost string directly: + +```python +with open("my_costs.tsa") as f: + cost_str = f.read() +aligner = tsalign.Aligner(costs=cost_str) +``` + +## Restricting the alignment range + +Use `AlignmentRange` to align only a window of the input sequences: + +```python +from tsalign import Aligner, AlignmentRange + +aligner = Aligner() +result = aligner.align( + "NNNACGTACGTNNN", + "ACGACGT", + range=AlignmentRange(reference_start=3, reference_end=11), +) +print(result.cigar()) +``` + +Individual start/limit keyword arguments are also accepted when `range` is +not provided: + +```python +result = aligner.align( + "NNNACGTACGTNNN", + "ACGACGT", + reference_start=3, + reference_limit=11, +) +``` + +## Working with alignment operations + +`alignment.alignments()` returns a typed list of `(count, op)` pairs: + +```python +from tsalign import align, TemplateSwitchEntranceOp, TemplateSwitchExitOp + +result = align(reference, query) +for count, op in result.alignments(): + if isinstance(op, TemplateSwitchEntranceOp): + print(f"Template switch: {op.direction}, primary={op.primary}, offset={op.first_offset}") + elif isinstance(op, TemplateSwitchExitOp): + print(f"Exit, anti-primary gap: {op.anti_primary_gap}") + else: + # SimpleAlignmentOp — a basic edit in the primary or secondary track + print(f"{count}x {op.kind}") +``` + +## Visualisation + +```python +result = tsalign.align(reference, query) +result.viz_template_switches() # prints ASCII art to stdout +``` + +## Limiting search resources + +```python +result = aligner.align( + reference, + query, + cost_limit=100, # return None if cost would exceed this + memory_limit=500_000, # return None if memory exceeds this number of bytes +) +if result is None: + print("No alignment found within limits") +``` + +## Accepted sequence types + +Any object whose `str()` representation is a valid DNA string (ACGTN) is +accepted — including `Bio.Seq`: + +```python +from Bio.Seq import Seq +result = tsalign.align(Seq("ACGTACGT"), Seq("ACGACGT")) +``` diff --git a/python_bindings/pyproject.toml b/python_bindings/pyproject.toml index 4d18e36..39c96e6 100644 --- a/python_bindings/pyproject.toml +++ b/python_bindings/pyproject.toml @@ -13,3 +13,5 @@ classifiers = [ dynamic = ["version"] [tool.maturin] features = ["pyo3/extension-module"] +module-name = "tsalign._tsalign" +python-source = "python" diff --git a/python_bindings/python/tsalign/__init__.py b/python_bindings/python/tsalign/__init__.py new file mode 100644 index 0000000..819879f --- /dev/null +++ b/python_bindings/python/tsalign/__init__.py @@ -0,0 +1,249 @@ +from __future__ import annotations + +import pathlib +from typing import List, Optional, Tuple, Union + +from tsalign._tsalign import Aligner as _Aligner # type: ignore[import] +from tsalign._tsalign import TSPairwiseAlignment as _TSPairwiseAlignment # type: ignore[import] +from tsalign._types import ( + AlignmentOp, + AlignmentRange, + SimpleAlignmentOp, + TemplateSwitchEntranceOp, + TemplateSwitchExitOp, + _parse_op, +) + +_ALIGNER_KWARG_NAMES = frozenset( + {"no_ts", "min_length_strategy", "chaining_strategy", "total_length_strategy", "costs", "costs_file"} +) + +__all__ = [ + "Aligner", + "Alignment", + "align", + "AlignmentRange", + "AlignmentOp", + "SimpleAlignmentOp", + "TemplateSwitchEntranceOp", + "TemplateSwitchExitOp", +] + + +class Alignment: + """Result of a pairwise alignment that may contain template switches. + + Obtain an instance via :meth:`Aligner.align` or the module-level :func:`align`. + """ + + def __init__(self, inner: _TSPairwiseAlignment) -> None: + self._inner = inner + + def cigar(self) -> Optional[str]: + """CIGAR string of the alignment. + + Template switch operations are encoded as extended CIGAR tokens. + Returns ``None`` if no valid alignment target was found. + """ + return self._inner.cigar() + + def stats(self) -> dict: + """Dictionary of alignment statistics. + + Keys include: ``cost``, ``cost_per_base``, ``duration_seconds``, + ``opened_nodes``, ``closed_nodes``, ``template_switch_amount``, and + nested ``result`` and ``sequences`` dicts. + """ + return self._inner.stats() + + def alignments(self) -> Optional[List[Tuple[int, AlignmentOp]]]: + """Compact list of alignment operations. + + Each entry is ``(count, op)`` where ``count`` is the repetition count and + ``op`` is one of: + + - :class:`SimpleAlignmentOp` — a basic edit (match, substitution, + insertion, deletion) in the primary or secondary track. + - :class:`TemplateSwitchEntranceOp` — start of a template switch. + - :class:`TemplateSwitchExitOp` — end of a template switch. + + Returns ``None`` if the alignment has no valid target. + """ + raw = self._inner.alignments() + if raw is None: + return None + return [(count, _parse_op(op)) for count, op in raw] + + def viz_template_switches(self) -> None: + """Print an ASCII visualisation of template switch jumps to stdout.""" + self._inner.viz_template_switches() + + +class Aligner: + """Pairwise DNA sequence aligner with template switch detection. + + Template switches are short-range translocations where a query region + aligns to a different part of the reference, possibly on the reverse + complement strand. The aligner uses A* search under a configurable + gap-affine cost model. + + Parameters + ---------- + no_ts : bool + Disable template switch detection (plain gap-affine alignment). + Default: ``False``. + min_length_strategy : str + Strategy for enforcing the minimum template switch length. + One of ``"none"``, ``"lookahead"`` (default), ``"preprocess_price"``, + ``"preprocess_filter"``, ``"preprocess_lookahead"``. + chaining_strategy : str + A* lower-bound chaining strategy. + One of ``"none"`` (default), ``"lower_bound"``. + total_length_strategy : str + Total template switch length strategy. + One of ``"none"``, ``"maximise"`` (default). + costs : str, optional + Cost configuration as a raw ``.tsa``-format string. + costs_file : str or Path, optional + Path to a ``.tsa`` cost configuration file. + Mutually exclusive with ``costs``. + See ``sample_tsa_config/config.tsa`` for a skeleton. + + Examples + -------- + Default settings:: + + aligner = tsalign.Aligner() + + Custom cost file:: + + aligner = tsalign.Aligner(costs_file="sample_tsa_config/config.tsa") + """ + + def __init__( + self, + *, + no_ts: bool = False, + min_length_strategy: str = "lookahead", + chaining_strategy: str = "none", + total_length_strategy: str = "maximise", + costs: Optional[str] = None, + costs_file: Optional[Union[str, pathlib.Path]] = None, + ) -> None: + if costs is not None and costs_file is not None: + raise ValueError("Provide at most one of 'costs' or 'costs_file'.") + if costs_file is not None: + costs = pathlib.Path(costs_file).read_text() + kwargs: dict = { + "no_ts": no_ts, + "min_length_strategy": min_length_strategy, + "chaining_strategy": chaining_strategy, + "total_length_strategy": total_length_strategy, + } + if costs is not None: + kwargs["costs"] = costs + self._inner = _Aligner(**kwargs) + + def align( + self, + reference: object, + query: object, + *, + reference_name: str = "reference", + query_name: str = "query", + range: Optional[AlignmentRange] = None, + reference_start: Optional[int] = None, + reference_limit: Optional[int] = None, + query_start: Optional[int] = None, + query_limit: Optional[int] = None, + cost_limit: Optional[int] = None, + memory_limit: Optional[int] = None, + ) -> Optional[Alignment]: + """Align two DNA sequences, accounting for template switches. + + Parameters + ---------- + reference : str-like + Reference sequence. Any object with a string representation is + accepted (e.g. ``Bio.Seq``). + query : str-like + Query sequence. + reference_name : str + Label for the reference sequence. Default: ``"reference"``. + query_name : str + Label for the query sequence. Default: ``"query"``. + range : AlignmentRange, optional + Coordinate window to align within. When provided, overrides the + individual ``reference_start`` / ``reference_limit`` / + ``query_start`` / ``query_limit`` arguments. + reference_start : int, optional + Start position in the reference (inclusive). Default: ``0``. + reference_limit : int, optional + End position in the reference (exclusive). Default: full length. + query_start : int, optional + Start position in the query (inclusive). Default: ``0``. + query_limit : int, optional + End position in the query (exclusive). Default: full length. + cost_limit : int, optional + Abandon the search and return ``None`` if the alignment cost would + exceed this value. + memory_limit : int, optional + Abandon the search and return ``None`` if the number of open A* + nodes exceeds this count. + + Returns + ------- + Alignment or None + ``None`` if no valid alignment was found within the given limits. + """ + if range is not None: + reference_start = range.reference_start + reference_limit = range.reference_end + query_start = range.query_start + query_limit = range.query_end + + inner = self._inner.align( + reference, + query, + reference_name, + query_name, + reference_start, + reference_limit, + query_start, + query_limit, + cost_limit, + memory_limit, + ) + return Alignment(inner) if inner is not None else None + + +def align( + reference: object, + query: object, + **kwargs: object, +) -> Optional[Alignment]: + """Align two DNA sequences in a single call. + + A convenience wrapper that creates a temporary :class:`Aligner` and + immediately calls :meth:`~Aligner.align`. + + Keyword arguments that belong to the aligner (``no_ts``, + ``min_length_strategy``, ``chaining_strategy``, ``total_length_strategy``, + ``costs``, ``costs_file``) are forwarded to the constructor; all remaining + keyword arguments are forwarded to :meth:`~Aligner.align`. + + Returns + ------- + Alignment or None + ``None`` if no valid alignment was found within the given limits. + + Examples + -------- + :: + + result = tsalign.align("ACGTACGT", "ACGACGT") + print(result.cigar()) + """ + aligner_kwargs = {k: v for k, v in kwargs.items() if k in _ALIGNER_KWARG_NAMES} + align_kwargs = {k: v for k, v in kwargs.items() if k not in _ALIGNER_KWARG_NAMES} + return Aligner(**aligner_kwargs).align(reference, query, **align_kwargs) diff --git a/python_bindings/python/tsalign/_types.py b/python_bindings/python/tsalign/_types.py new file mode 100644 index 0000000..5cfb433 --- /dev/null +++ b/python_bindings/python/tsalign/_types.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Union + + +@dataclass +class AlignmentRange: + """Coordinate bounds for a pairwise alignment.""" + + reference_start: int = 0 + reference_end: "int | None" = None + query_start: int = 0 + query_end: "int | None" = None + + +@dataclass +class SimpleAlignmentOp: + """A basic edit operation in the primary or secondary alignment track. + + The ``kind`` attribute is one of: ``PrimaryMatch``, ``PrimarySubstitution``, + ``PrimaryInsertion``, ``PrimaryDeletion``, ``PrimaryFlankMatch``, + ``PrimaryFlankSubstitution``, ``PrimaryFlankInsertion``, ``PrimaryFlankDeletion``, + ``SecondaryMatch``, ``SecondarySubstitution``, ``SecondaryInsertion``, + ``SecondaryDeletion``. + """ + + kind: str + + +@dataclass +class TemplateSwitchEntranceOp: + """Entrance into a template switch region.""" + + kind: str # always "TemplateSwitchEntrance" + first_offset: int + primary: str # "Reference" or "Query" + secondary: str # "Reference" or "Query" + direction: str # "Forward" or "Reverse" + equal_cost_range: dict # {min_start, max_start, min_end, max_end} + + +@dataclass +class TemplateSwitchExitOp: + """Exit from a template switch region.""" + + kind: str # always "TemplateSwitchExit" + anti_primary_gap: int + + +AlignmentOp = Union[SimpleAlignmentOp, TemplateSwitchEntranceOp, TemplateSwitchExitOp] + + +def _parse_op(raw: object) -> AlignmentOp: + if isinstance(raw, str): + return SimpleAlignmentOp(kind=raw) + if isinstance(raw, dict): + if "TemplateSwitchEntrance" in raw: + d = raw["TemplateSwitchEntrance"] + return TemplateSwitchEntranceOp( + kind="TemplateSwitchEntrance", + first_offset=d["first_offset"], + primary=d["primary"], + secondary=d["secondary"], + direction=d["direction"], + equal_cost_range=d["equal_cost_range"], + ) + if "TemplateSwitchExit" in raw: + return TemplateSwitchExitOp( + kind="TemplateSwitchExit", + anti_primary_gap=raw["TemplateSwitchExit"]["anti_primary_gap"], + ) + raise ValueError(f"Unknown alignment op: {raw!r}") diff --git a/python_bindings/python/tsalign/py.typed b/python_bindings/python/tsalign/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/python_bindings/src/lib.rs b/python_bindings/src/lib.rs index d450f6a..fdccc62 100644 --- a/python_bindings/src/lib.rs +++ b/python_bindings/src/lib.rs @@ -143,7 +143,7 @@ impl TSAligner { } /// Bindings for the `lib_tsalign` library. -#[pymodule] +#[pymodule(name = "_tsalign")] fn tsalign(m: &Bound<'_, PyModule>) -> PyResult<()> { pyo3_log::init(); m.add_class::()?; From 1ee25a36bb00b792e680cb7e326bc7d15b695b70 Mon Sep 17 00:00:00 2001 From: Jasper Krauter <12259417+jeeeesper@users.noreply.github.com> Date: Wed, 6 May 2026 12:07:19 +0300 Subject: [PATCH 2/2] fix(py): fix release tag name for python bindings --- .github/workflows/python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 2c34e63..36d2582 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -113,7 +113,7 @@ jobs: release: name: Release - if: ${{ startsWith(github.ref, 'refs/tags/python_bindings-v') }} + if: ${{ startsWith(github.ref, 'refs/tags/py_lib_tsalign-v') }} runs-on: ubuntu-latest needs: [linux, windows, macos, sources] permissions: