In [None]:
#meta 4/17/2023 difflib — Helpers for computing deltas
# Refer to https://docs.python.org/3/library/difflib.html
#Use: apples to oranges

# myUtilities: Strings


-`difflib` https://docs.python.org/3/library/difflib.html  
helper for computing deltas  
use case: finding deltas in long error strings

In [2]:
import difflib

In [15]:
from difflib import Differ

differ_inst = Differ()
 
string1 = """This is a random string.
 Lets call it string 1. 
 This is so random
 """.splitlines(keepends=True)
 
string2 = """This is a random string.
 Lets call it string 2. 
 This is so random.
 Or mayble not, or is it.
 """.splitlines(keepends=True)
 
deltas = list(differ_inst.compare(string1,string2))
deltas

['  This is a random string.\n',
 '-  Lets call it string 1. \n',
 '?                      ^\n',
 '+  Lets call it string 2. \n',
 '?                      ^\n',
 '-  This is so random\n',
 '+  This is so random.\n',
 '?                   +\n',
 '+  Or mayble not, or is it.\n',
 '   ']

In [16]:
#a simple example
str1 = "anya"
str2 = "any0"
deltas = list(differ_inst.compare(str1,str2))
deltas

['  a', '  n', '  y', '- a', '+ 0']

In [17]:
for a,b in [(str1,str2)]:     
    print('{} => {}'.format(a,b))  
    for i,s in enumerate(difflib.ndiff(a, b)):
        #if s[0]==' ': continue
        if s[0]==' ':
            print(u'Common string "{}" to position {}'.format(s[-1],i)) 
        elif s[0]=='-':
            print(u'Delete "{}" from position {}'.format(s[-1],i))
        elif s[0]=='+':
            print(u'Add "{}" to position {}'.format(s[-1],i))    
    print()

anya => any0
Common string "a" to position 0
Common string "n" to position 1
Common string "y" to position 2
Delete "a" from position 3
Add "0" to position 4



## Match Strings
For exact string matches

`SequenceMatcher() object`  
- `get_matching_blocks`  
- `find_longest_match`

In [3]:
#a simple example
str1 = "Hey, teacher, leave them kids alone All in all, it's just another brick in the wall"
str2 = "Song by Pink Floyd Hey, teacher, leave them kids alone Titled Another Brick in the Wall All in all, it's just another brick in the wall part of movie The Wall"

In [4]:
#SequenceMatcher
match_seq = difflib.SequenceMatcher(a=str1,b=str2)

#all matches
match_seq.get_matching_blocks()

[Match(a=0, b=19, size=35),
 Match(a=35, b=87, size=48),
 Match(a=83, b=158, size=0)]

In [5]:
for match in match_seq.get_matching_blocks():
    print(f"Match object:{match}")
    print(f"Matching sequence list_one: {str1[match.a:match.a+match.size]}")
    print(f"Matching sequence list_two: {str2[match.b:match.b+match.size]}")
    print()


Match object:Match(a=0, b=19, size=35)
Matching sequence list_one: Hey, teacher, leave them kids alone
Matching sequence list_two: Hey, teacher, leave them kids alone

Match object:Match(a=35, b=87, size=48)
Matching sequence list_one:  All in all, it's just another brick in the wall
Matching sequence list_two:  All in all, it's just another brick in the wall

Match object:Match(a=83, b=158, size=0)
Matching sequence list_one: 
Matching sequence list_two: 



In [8]:
#longest match
match_longest = match_seq.find_longest_match(alo=0,ahi=len(str1),blo=0,bhi=len(str2))
print(match_longest)
str1[match_longest.a:match_longest.a+match_longest.size]

Match(a=35, b=87, size=48)


" All in all, it's just another brick in the wall"