In [224]:
import Levenshtein as lev

# Determines if two Levenshtein operations are adjacent based on:
# - Are within 1 index of each other
# - Are of the same type (insert, delete, replace)
def isAdjacent(a, b):
    c1 = a[0]
    i1 = a[1]
    c2 = b[0]
    i2 = b[1]
    if c1 != c2:
        return 0;
    if abs(int(i1) - int(i2)) > 1:
        return 0;
    return 1

# Aggregates adjacent Levenshtein operations into lists
def getCombinedOperations(ops):
    connorSets = []
    currentSet = []
    if len(ops) == 1:
        connorSets.append(ops)
    else:
        for i in range(0,len(ops)):
            currentOp = ops[i]
            if (i + 1) >= len(ops): #Indicates the last operation
                if 'prevOp' in locals(): #If there isn't a prevop, then the input set is length 1   
                    adj = isAdjacent(currentOp, prevOp)
                    if adj == 1:
                        currentSet.append(currentOp)
                        connorSets.append(currentSet)
                    else:
                        connorSets.append([currentOp])
                else: #This means that the incoming set is len(1), and shouldn't have made it here.
                    print("Error in dataset; this line should never be reached.")
            else: #Any non-last operation
                nextOp = ops[i+1]
                adj = isAdjacent(currentOp, nextOp)
                if adj == 1:
                    currentSet.append(currentOp)
                else:
                    if len(currentSet) == 0:
                        connorSets.append([currentOp])
                    else:
                        currentSet.append(currentOp)
                        connorSets.append(currentSet)
                        currentSet = []
            prevOp = currentOp
    return connorSets

# Combines aggregated Levenshtein operations into Connor operations
def getConnorOperations(csets, target):
    cOps = []
    for cs in csets:
        start = cs[0][1]
        end = cs[len(cs)-1][1]
        valString = ""
        for op in cs:
            if op[0] == 'insert' or op[0] == 'replace':
                v = target[op[2]]
                valString = valString + v
        cOp = [cs[0][0], start, end, valString]
        cOps.append(cOp)
    return cOps
        
#Workflow:
# (1) Get Levenshtein operations
# (2) Aggregate adjacent operations
# (3) Convert to Connor operations
# (4) TODO - All Connor operations can be expressed as a variety of replacements - this is the next step.

origin = "The quick brown fox"
target = "The lazy dog"

ops = lev.editops(origin, target)
aggregateSets = getCombinedOperations(ops)
connorOps = getConnorOperations(aggregateSets, target)
for o in connorOps:
    print(o)

['delete', 4, 10, '']
['replace', 11, 14, 'lazy']
['replace', 16, 16, 'd']
['replace', 18, 18, 'g']
