In [1]:
reset -fs

Levenshtein Distance
---

[Solution](https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python)
[Another solution](http://www.geeksforgeeks.org/dynamic-programming-set-5-edit-distance/)

In [6]:
def levenshtein(s1, s2):
    """Takes 2 strings, returns Levenshtein distance.
    
    See https://en.wikipedia.org/wiki/Levenshtein_distance
    """
    
    if len(s1) < len(s2): # If one word is shorter than the other then change the order (bookkeeping to be consistent)
        return levenshtein(s2, s1)
 
    if len(s2) == 0: # Make sure we get a word
        return len(s1) # If not the cost is simply dropping all the letters in one of the words, i.e. the length
 
    previous_row = range(len(s2)+1) # Create an array of length of the second word+1
   
    for i, c1 in enumerate(s1): # Interate through the first word 
        current_row = [i + 1]
        for j, c2 in enumerate(s2): # Interate through the second word
            insertions = previous_row[j + 1] + 1 
            deletions = current_row[j] + 1       # Take the current row value and add cost
            substitutions = previous_row[j] + (c1 != c2) # Take the previous row value and boolean difference
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
 
    return previous_row[-1]

In [7]:
from nltk.metrics.distance import edit_distance 

In [9]:
pairs = [('foo', 'poo'),
         ('intention', 'execution')]

for pair in pairs:
    assert levenshtein(*pair) == edit_distance(*pair)

-----

In [10]:
def levenshtein(s1, s2, cost_sub):
    """Takes 2 strings, returns Levenshtein distance.
    
    See https://en.wikipedia.org/wiki/Levenshtein_distance
    """
    
    if len(s1) < len(s2): # If one word is shorter than the other then change the order (bookkeeping to be consistent)
        return levenshtein(s2, s1)
 
    if len(s2) == 0: # Make sure we get a word
        return len(s1) # If not the cost is simply dropping all the letters in one of the words, i.e. the length
 
    previous_row = range(len(s2)+1) # Create an array of length of the second word+1
   
    for i, c1 in enumerate(s1): # Interate through the first word 
        current_row = [i + 1]
        for j, c2 in enumerate(s2): # Interate through the second word
            insertions = previous_row[j + 1] + 1 
            deletions = current_row[j] + 1       # Take the current row value and add cost
            substitutions = previous_row[j] + (c1 != c2)*cost_sub # Take the previous row value and boolean difference
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
 
    return previous_row[-1]

In [11]:
assert levenshtein('foo', 'poo', cost_sub=2) == 2
assert levenshtein('intention', 'execution', cost_sub=2) == 8

In [None]:
#TODO: Refactor `levenshtein` to use `numpy`. Compare runtime performance.

# Not finished
def levenshtein(source, target):
    if len(source) < len(target):
        return levenshtein(target, source)

    # So now we have len(source) >= len(target).
    if len(target) == 0:
        return len(source)

    # We call tuple() to force strings to be used as sequences
    # ('c', 'a', 't', 's') - numpy uses them as values by default.
    source = np.array(tuple(source))
    target = np.array(tuple(target))

    # We use a dynamic programming algorithm, but with the
    # added optimization that we only need the last two rows
    # of the matrix.
    previous_row = np.arange(target.size + 1)
    for s in source:
        # Insertion (target grows longer than source):
        current_row = previous_row + 1

        # Substitution or matching:
        # Target and source items are aligned, and either
        # are different (cost of 1), or are the same (cost of 0).
        current_row[1:] = np.minimum(
                current_row[1:],
                np.add(previous_row[:-1], target!=s))

        # Deletion (target grows shorter than source):
        current_row[1:] = np.minimum(
                current_row[1:],
                current_row[0:-1] + 1)

        previous_row = current_row

    return previous_row[-1]

Change making problem
--

In [14]:
def make_change(currency=[], value=0):
    """Given a list of coins, find the minimum number of coins and which ones.
    Use dynamic programming.
    
    >>> make_change(coins=[1, 5, 10], value=10)
    (1, [10]) # 1 coin, 10 denomination
    
    >>> make_change(coins=[1, 5, 10], value=15)
    (2, [10, 5]) # 2 coins, 10 and 5 in denomination
    
    >>> make_change(coins=[5, 10], value=3)
    No solution possible
    """

    table = [None]*(value+1) # Initialize the table
    table[0] = [] 
    for i in range(1, value+1):
        for current in currency:
            if current > i: continue
            elif not table[i] or len(table[i - current]) + 1 < len(table[i]):
                if table[i - current] != None:
                    table[i] = table[i - current][:]
                    table[i].append(current)

    if table[-1] != None:
        return len(table[-1]), table[-1] # Number of coins, which coins
    else:
        return 'No solution possible'

In [16]:
assert make_change(currency=[1, 5, 10], value=10) == (1, [10])
assert make_change(currency=[1, 5, 10], value=15) == (2, [10, 5])
assert make_change(currency=[1, 5, 10], value=30) == (3, [10, 10, 10])
assert make_change(currency=[1, 5, 21, 25], value=63) == (3, [21, 21, 21])
assert make_change(currency=[5, 10], value=3) == 'No solution possible'

remeber: [Python Tutor](http://www.pythontutor.com/)

In [25]:
"""__2__) Refactor `make_change` to generate all possible ways of making change (i.e., combinations) from the given coins. Then find minimum valid way. """
# Hint: 
# from itertools import combinations_with_replacement

# Not finished

from itertools import combinations_with_replacement as comb_w_replace

def make_change_combos(currency=[], value=0):
    """Given a list of coins, find the minimum number of coins and which ones.
    Generate all possible combinations and find min valid one.
    """
    valid_combinations = []
    for i in range(1, value//min(currency)+1):
        contenders = [list(x) for x in comb_w_replace(currency, i)]
        for contender in contenders:  
            # Check if valid and unique way to make change
            if (sum(contender) == value) and (tuple(contender) not in set(tuple(_) for _ in valid_combinations)):
                valid_combinations.append(contender)

    # Return the min value (if it exists)
    try:
        winner = sorted(valid_combinations, key=len)[0]
        return len(winner), winner # number of coins, which coins
    except IndexError:
        return "No solution possible"

In [24]:
assert make_change_combos(currency=[1, 5, 10], value=10) == (1, [10])
assert make_change_combos(currency=[1, 5, 10], value=15) == (2, [10, 5])
assert make_change_combos(currency=[1, 5, 10], value=30) == (3, [10, 10, 10])
assert make_change_combos(currency=[1, 5, 21, 25], value=63) == (3, [21, 21, 21])
assert make_change_combos(currency=[5, 10], value=3) == 'No solution possible'


AssertionError: 