# sigalign-py usage

In [1]:
from sigalign import SequenceStorage, Reference, Aligner

## 1. Define `SequenceStorage`

In [2]:
ss = SequenceStorage()
ss.add_record("record_1", "ACACAGATCGCAAACTCACAATTGTATTTCTTTGCCACCTGGGCATATACTTTTTGCGCCCCCTCATTTA")
ss.add_record("record_2", "TCTGGGGCCATTGTATTTCTTTGCCAGCTGGGGCATATACTTTTTCCGCCCCCTCATTTACGCTCATCAC")

## 2. Build `Reference`

In [3]:
reference = Reference(
    ss,
    klt_size = 2,
    sas_ratio = 2,
    comp_block = True,
)

In [4]:
# Status
print("# Status")
print(f" - record_count: {reference.record_count}")
print(f" - searchable_characters: {reference.searchable_characters}")
print(f" - estimated_total_bytes: {reference.estimated_total_bytes}")
# Compression level
print("# Compression level")
print(f" - klt_size: {reference.klt_size}")
print(f" - sas_ratio: {reference.sas_ratio}")
print(f" - bwt_block_size: {reference.bwt_block_size}")

# Status
 - record_count: 2
 - searchable_characters: ['A', 'C', 'G', 'T']
 - estimated_total_bytes: 1311
# Compression level
 - klt_size: 2
 - sas_ratio: 2
 - bwt_block_size: 128


## 3. Make `Aligner`

In [5]:
aligner = Aligner(
    4,
    6,
    2,
    50,
    0.2,
    is_local_mode = True,
)

In [6]:
print(f"penalties: {aligner.penalties}")
print(f"cutoffs: {aligner.cutoffs}")
print(f"pattern_size: {aligner.pattern_size}")
print(f"is_local_mode: {aligner.is_local_mode}")

penalties: [4, 6, 2]
cutoffs: (50, 0.20000000298023224)
pattern_size: 12
is_local_mode: True


## 4. Perform Alignment

In [7]:
query = "CAAACTCACAATTGTATTTCTTTGCCAGCTGGGCATATACTTTTTCCGCCCCCTCATTTAACTTCTTGGA"

result = aligner.align_query(reference, query)

In [8]:
import json
json.loads(result.to_json())

[{'index': 1,
  'label': 'record_2',
  'alignments': [{'penalty': 8,
    'length': 51,
    'position': {'record': [9, 60], 'query': [10, 60]},
    'operations': [{'case': 'Match', 'count': 23},
     {'case': 'Insertion', 'count': 1},
     {'case': 'Match', 'count': 27}]}]},
 {'index': 0,
  'label': 'record_1',
  'alignments': [{'penalty': 8,
    'length': 60,
    'position': {'record': [10, 70], 'query': [0, 60]},
    'operations': [{'case': 'Match', 'count': 27},
     {'case': 'Subst', 'count': 1},
     {'case': 'Match', 'count': 17},
     {'case': 'Subst', 'count': 1},
     {'case': 'Match', 'count': 14}]}]}]

In [9]:
import pandas as pd
df = pd.DataFrame(
    result.to_2d_array(),
    columns = [
        'index', 'label', 'penalty', 'length',
        'qstart', 'qend', 'rstart', 'rend', 'operations',
    ]
)
df

Unnamed: 0,index,label,penalty,length,qstart,qend,rstart,rend,operations
0,1,record_2,8,51,10,60,9,60,M23I1M27
1,0,record_1,8,60,0,60,10,70,M27S1M17S1M14
