# Needleman–Wunsch

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from IPython.display import display, Markdown, Latex

In [2]:
import sys
sys.path.insert(0, '..')

## Load Data

In [3]:
from phylo.main import load_snippets_data, load_snippets_md

In [4]:
data = load_snippets_data('mult')
md = load_snippets_md('mult')

---

## Classy

Python class `NW` forms a nice wrapper around base functions

In [5]:
from phylo.needleman_wunsch import NW

In [6]:
s1 = "hello"
s2 = "hllo"

In [7]:
nw = NW(s1, s2)
nw.solve()

#### See results

In [8]:
print(nw.aligned_s1)
print(nw.aligned_s2)

hello
h-llo


In [9]:
display(*nw.aligned_strings)

'hello'

'h▬llo'

In [10]:
display(*nw.get_aligned_strings_with_connector())

'hello'

'| |||'

'h▬llo'

In [11]:
nw.identity_score

('4/5', 0.8)

In [12]:
nw.gap_score

('1/5', 0.2)

In [13]:
print(nw.summarize())


#
# Aligned_sequences: 2
# Length: 5
# Identity:     4/5    (80 %)
# Gaps:         1/5    (20 %)
#

s1	hello
  	| |||
s2	h▬llo



#### See underlying matrix

In [14]:
print(nw.arr)

[[( 0, '.') (-1, 'H') (-2, 'H') (-3, 'H') (-4, 'H') (-5, 'H')]
 [(-1, 'V') ( 1, 'D') ( 0, 'H') (-1, 'H') (-2, 'H') (-3, 'H')]
 [(-2, 'V') ( 0, 'V') ( 0, 'D') ( 1, 'D') ( 0, 'D') (-1, 'H')]
 [(-3, 'V') (-1, 'V') (-1, 'D') ( 1, 'D') ( 2, 'D') ( 1, 'H')]
 [(-4, 'V') (-2, 'V') (-2, 'D') ( 0, 'V') ( 1, 'V') ( 3, 'D')]]


In [15]:
nw.matrix

Unnamed: 0,Unnamed: 1,h,e,l,l.1,o
,[. 0],[← -1],[← -2],[← -3],[← -4],[← -5]
h,[↑ -1],[↖ 1],[← 0],[← -1],[← -2],[← -3]
l,[↑ -2],[↑ 0],[↖ 0],[↖ 1],[↖ 0],[← -1]
l,[↑ -3],[↑ -1],[↖ -1],[↖ 1],[↖ 2],[← 1]
o,[↑ -4],[↑ -2],[↖ -2],[↑ 0],[↑ 1],[↖ 3]


---

## Functional

Funtions underlying NW work can be used independently, too.

In [16]:
from phylo.needleman_wunsch import (
    nw_algo, arr_to_frames, arr_to_table,
    traceback, identity_score, gap_score, 
)

In [17]:
s1 = "hello"
s2 = "hllo"

In [18]:
arr = nw_algo(s1, s2)
print(arr)

[[( 0, '.') (-1, 'H') (-2, 'H') (-3, 'H') (-4, 'H') (-5, 'H')]
 [(-1, 'V') ( 1, 'D') ( 0, 'H') (-1, 'H') (-2, 'H') (-3, 'H')]
 [(-2, 'V') ( 0, 'V') ( 0, 'D') ( 1, 'D') ( 0, 'D') (-1, 'H')]
 [(-3, 'V') (-1, 'V') (-1, 'D') ( 1, 'D') ( 2, 'D') ( 1, 'H')]
 [(-4, 'V') (-2, 'V') (-2, 'D') ( 0, 'V') ( 1, 'V') ( 3, 'D')]]


In [19]:
arr_to_table(arr, s1, s2)

Unnamed: 0,Unnamed: 1,h,e,l,l.1,o
,[. 0],[← -1],[← -2],[← -3],[← -4],[← -5]
h,[↑ -1],[↖ 1],[← 0],[← -1],[← -2],[← -3]
l,[↑ -2],[↑ 0],[↖ 0],[↖ 1],[↖ 0],[← -1]
l,[↑ -3],[↑ -1],[↖ -1],[↖ 1],[↖ 2],[← 1]
o,[↑ -4],[↑ -2],[↖ -2],[↑ 0],[↑ 1],[↖ 3]


In [20]:
a, b = traceback(arr['dir'], s1, s2)

In [21]:
print(a)
print(b)

hello
h-llo


In [22]:
identity_score(a, b)

('4/5', 0.8)

In [23]:
gap_score(a, b)

('1/5', 0.2)