# `sigalign` usage in python

In [1]:
from sigalign import Reference, Aligner

## 1. Build `Reference`

In [2]:
reference = Reference(
    targets = [
        ("target_1", "ACACAGATCGCAAACTCACAATTGTATTTCTTTGCCACCTGGGCATATACTTTTTGCGCCCCCTCATTTA"),
        ("target_2", "TCTGGGGCCATTGTATTTCTTTGCCAGCTGGGGCATATACTTTTTCCGCCCCCTCATTTACGCTCATCAC"),
    ], indexing_option= {
        "sasr": 2,      # Suffix array sampling ratio
        "lts": 10_000,  # Lookup table size (Bytes)
    },
)
# `Reference` can be built from FASTA file
# reference = Reference.from_fasta_file(fasta_file_path)

In [3]:
print("# Reference Status")
print(f" - Num_targets: {reference.num_targets}")
print(f" - Estimated_total_bytes: {reference.estimated_size / 1024:.2f} KiB")

# Reference Status
 - Num_targets: 2
 - Estimated_total_bytes: 31.44 KiB


## 2. Initialize `Aligner`

In [4]:
aligner = Aligner(
    4,     # Mismatch penalty
    6,     # Gap-open penalty
    2,     # Gap-extend penalty
    50,    # Minimum length
    0.2,   # Maximum penalty per length
)

In [5]:
print("# Aligner Status")
print("  - Penalties")
print(f"    - Mismatch penalty: {aligner.px}")
print(f"    - Gap-open penalty: {aligner.po}")
print(f"    - Gap-extend penalty: {aligner.pe}")
print("  - Similarity Cutoffs")
print(f"    - Minimum length: {aligner.ml}")
print(f"    - Maximum penalty per length: {aligner.mpl:.1f}")
print(f"  - Mode is {'Local' if aligner.is_local_mode else 'Semi-global'}")

# Aligner Status
  - Penalties
    - Mismatch penalty: 4
    - Gap-open penalty: 6
    - Gap-extend penalty: 2
  - Similarity Cutoffs
    - Minimum length: 50
    - Maximum penalty per length: 0.2
  - Mode is Local


## 3. Perform Alignment

In [6]:
query = "CAAACTCACAATTGTATTTCTTTGCCAGCTGGGCATATACTTTTTCCGCCCCCTCATTTAACTTCTTGGA"

results = aligner.align_query(reference, query)

# FASTA file can be used:
# result = aligner.align_fasta_file(
#     reference,
#     fasta_file_path,
# )

print(results)

QueryResult(num_target_results=2)


In [7]:
for target_result in results:
    print(f"# Target index: {target_result.index}")
    for idx, alignment in enumerate(target_result.alignments):
        print(f"  - Result: {idx+1}")
        print(f"    - Penalty: {alignment.penalty}")
        print(f"    - Length: {alignment.length}")
        print(f"    - Query position: {alignment.query_position}")
        print(f"    - Target position: {alignment.target_position}")

# Target index: 0
  - Result: 1
    - Penalty: 8
    - Length: 60
    - Query position: (0, 60)
    - Target position: (10, 70)
# Target index: 1
  - Result: 1
    - Penalty: 8
    - Length: 51
    - Query position: (10, 60)
    - Target position: (9, 60)


In [8]:
# Convert results as json or dict
import json
json.loads(results.to_json())

[{'index': 0,
  'label': 'target_1',
  'alignments': [{'penalty': 8,
    'length': 60,
    'query_position': [0, 60],
    'target_position': [10, 70],
    'operations': [{'case': 'M', 'count': 27},
     {'case': 'S', 'count': 1},
     {'case': 'M', 'count': 17},
     {'case': 'S', 'count': 1},
     {'case': 'M', 'count': 14}]}]},
 {'index': 1,
  'label': 'target_2',
  'alignments': [{'penalty': 8,
    'length': 51,
    'query_position': [10, 60],
    'target_position': [9, 60],
    'operations': [{'case': 'M', 'count': 23},
     {'case': 'I', 'count': 1},
     {'case': 'M', 'count': 27}]}]}]

In [9]:
# Convert results as table
import pandas as pd
df = pd.DataFrame(
    results.to_table(),
    columns = [
        'index', 'label', 'penalty', 'length',
        'qstart', 'qend', 'tstart', 'tend', 'operations',
    ]
)
df

Unnamed: 0,index,label,penalty,length,qstart,qend,tstart,tend,operations
0,0,target_1,8,60,0,60,10,70,M27S1M17S1M14
1,1,target_2,8,51,10,60,9,60,M23I1M27
