# Resources
* [Overview of diff](https://blog.jcoglan.com/2017/02/12/the-myers-diff-algorithm-part-1/)
* [Patience diff intro](https://blog.jcoglan.com/2017/09/19/the-patience-diff-algorithm/#:~:text=What%20it%20really%20is%20is,like%20Myers%20on%20those%20pieces.&text=Patience%20diff%20splits%20this%20problem,exactly%20once%20in%20both%20versions.)

## Python Notes
dictionary insert/delete complexity: O(1)

dictionary lookup: O(n)

list insert/delete complexity: O(n)

list lookup by index: O(1)


## Steps to Find Diff
1. Read files to be compared
2. Define Slice
3. Find unique matching lines
4. Sort unique matching lines to match the most number of lines without cross matching
5. Divide into smaller slices and go to 3

### First read the two files to be compared

In [19]:
with open('a.py','r') as f:
    a_file = f.read()

with open('b.py','r') as f:
    b_file = f.read()

a_file = a_file.split("\n")
b_file = b_file.split("\n")

### Define slice class ( a subset of both file after slicing)

In [20]:
class Slice:
    def __init__(self, a_low, a_high, b_low, b_high, a_data, b_data):
        self.a_low = a_low
        self.b_low = b_low
        self.a_data = a_data
        self.b_data = b_data
        

# initialize the slice, a is the whole content of file one and b is whole content of file two
start_slice = Slice(0, len(a_file), 0, len(b_file), a_file, b_file)

### Find matches(same in content) between A and B that are unique

In [51]:
def find_unique_match(slice):
    # line content: [number of appearances in A, number of appearances in B, position in A, position in B]
    line_counts = {}
    matches = []
    a = slice.a_data
    b = slice.b_data
    
    for i in range(len(a)):
        if a[i] not in line_counts:
            position = slice.a_low + i
            line_counts[a[i]] = [1,0,position,None]
        else:
            line_counts[a[i]][0] += 1
    
    for i in range(len(b)):
        # b[i] is not in line_counts indicates there's no matches
        if b[i] in line_counts:
            position = slice.b_low + i
            line_counts[b[i]][1] += 1
            line_counts[b[i]][3] = position
    
    for line in line_counts:
        if line_counts[line][:2] == [1,1]:
            matches.append((line_counts[line][2],line_counts[line][3], line))
    
    return matches # (position in A, position in B)

matches = find_unique_match(start_slice)
print(matches[6])
print(len(matches))
# for match in matches:
#     print(match)


(10, 2, '        csv_file = csv.writer(file)')
26


In [52]:
def patience_sort(matches):
    stacks = [matches[0][1]] 
    # each element in directed_matches: (position in B, position in B of the previous element on the stack)
    directed_matches = []
    longest_matches = []
    
    for match in matches[1:]:
        stack_index = bi_search(stacks, match[1])
        if stack_index == -1: # front of the stacks
            stacks[0] = match[1]
        elif stack_index == (len(stacks)-1): # end of the stacks
            stacks.append(match[1])
            directed_matches.append((match[1],stacks[stack_index]))
        elif stack_index >= 0: # middle of the stack
            stacks[stack_index+1] = match[1]
            directed_matches.append((match[1],stacks[stack_index]))

    # find the edge that contains the element at the end of stacks
    for match in directed_matches:
        if match[0] == stacks[-1]:
            last = match

    prev = [match for match in directed_matches if match[0] == last[1]]

    longest_matches.append(last)
    
    while len(prev) != 0:
        print(prev)
        last = prev[0]
        prev = [match for match in directed_matches if match[0] == last[1]]
        longest_matches.append(last)
    
    print(len(longest_matches))

def bi_search(stacks, b_pos):
    # find the index of closest match that is smaller than b_pos
    index = -1
    
    while index < (len(stacks)-1) and b_pos > stacks[index+1]: 
        index += 1
        
    return index

patience_sort(matches)

[(34, 32)]
[(32, 31)]
[(31, 30)]
[(30, 28)]
[(28, 27)]
[(27, 25)]
[(25, 24)]
[(24, 23)]
[(23, 22)]
[(22, 21)]
[(21, 19)]
[(19, 18)]
[(18, 17)]
[(17, 6)]
[(6, 5)]
[(5, 4)]
[(4, 3)]
[(3, 2)]
[(2, 1)]
[(1, 0)]
21
