In [1]:
from edit_dist import EditDistance, match_fuzzy

In [2]:
edit = EditDistance('slap', 'happy')

In [3]:
edit.align()

slap--
-happy


In [4]:
edit.print_seq()

['del', 'sub', 'non', 'non', 'add', 'add']


In other words, the minimally required edits to change `slap` to `happy` are:
- delete (s)
- substitute (h for l)
- none (a in slap matches a in happy)
- none (p in slap matches p in happy)
- add (p)
- add (y)

In [5]:
cost = edit.get_cost()
cost

5.0

E.g., given the default costs (1 point each per insertion/deletion, 2 for substitutions), the total cost is 5 (del, sub, add, add -> 1 + 2 + 1 + 1 = 5)

In [6]:
edit.set_words('mystery utopia', 'blustery dsytopia')
edit.align()

-mystery --utopia
blustery dsytopia


In [7]:
edit.print_seq()
edit.get_cost()

['add', 'sub', 'sub', 'non', 'non', 'non', 'non', 'non', 'non', 'add', 'add', 'sub', 'non', 'non', 'non', 'non', 'non']


5.0

In [8]:
found = 'greet jorb'
candidates = ['great herb', 'great job', 'greet bob', 'grate tarp']
best = match_fuzzy(found, candidates, verbose=True)
print('\nBest match:', best)

greet jorb
  -> great herb: 6.0
  -> great job: 3.0
  -> greet bob: 3.0
  -> grate tarp: 10.0

Best match: ['great job', 'greet bob']


In [9]:
best = match_fuzzy(found, candidates, verbose=True, del_cost=4, add_cost=4)
print('\nBest match:', best)

greet jorb
  -> great herb: 6.0
  -> great job: 6.0
  -> greet bob: 6.0
  -> grate tarp: 12.0

Best match: ['great herb', 'great job', 'greet bob']


In [10]:
# DNA Alignment:
gorilla = 'ATTGCCGGGTCTCTCAGGAGCTTATAA'
pan = 'ATTCGCCGAGGCTCTTCAGGGCTTATCA' 
homo = 'ATTCGCCCGGCTCTTCAGGGGCTTATTA'
eddie = EditDistance(pan, homo)
eddie.align()

ATTCGCC-GAGGCTCTTCAGGG-CTTATCA
ATTCGCCCG-G-CTCTTCAGGGGCTTATTA


In [11]:
edit.set_words(gorilla, homo)
edit.align()

ATT-GCC-GGGTCTCT-CAGGAG-CTTAT-AA
ATTCGCCCGG--CTCTTCAGG-GGCTTATTA-
