In [22]:
# examples by perplexity ai

In [1]:
original_text, modified_text = [
    "Requirement already satisfied: typing-extensions in site-packages (from rich-click<2.0.0,>=1.6.1->redlines) (4.11.0)",
    "Requirement already satisfied: mdurl~=0.1 in site-packages (from markdown-it-py>=2.2.0->rich<14.0.0,>=13.3.5->redlines) (0.1.2)"
]

In [2]:
import re
from difflib import Differ, SequenceMatcher

tokenizer = re.compile(r"((?:[^()\s]+|[().?!-])\s*)")

def tokenize_text(text):
    return re.findall(tokenizer, text)

original_tokens = tokenize_text(original_text)
modified_tokens = tokenize_text(modified_text)

matcher = SequenceMatcher(None, original_tokens, modified_tokens)
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
    print(f"{tag:8}: {original_tokens[i1:i2]} -> {modified_tokens[j1:j2]}")

equal   : ['Requirement ', 'already ', 'satisfied: '] -> ['Requirement ', 'already ', 'satisfied: ']
replace : ['typing-extensions '] -> ['mdurl~=0.1 ']
equal   : ['in ', 'site-packages ', '(', 'from '] -> ['in ', 'site-packages ', '(', 'from ']
replace : ['rich-click<2.0.0,>=1.6.1->redlines'] -> ['markdown-it-py>=2.2.0->rich<14.0.0,>=13.3.5->redlines']
equal   : [') ', '('] -> [') ', '(']
replace : ['4.11.0'] -> ['0.1.2']
equal   : [')'] -> [')']


In [3]:
d = Differ()
comp = d.compare(
    original_text.split(' '),  # default goes by char; .splitlines(keepends=True) 
    modified_text.split(' ')
)
list(comp)

# difflib.ndiff(original_text.split(' '), modified_text.split(' ')) # same

['  Requirement',
 '  already',
 '  satisfied:',
 '- typing-extensions',
 '+ mdurl~=0.1',
 '  in',
 '  site-packages',
 '  (from',
 '- rich-click<2.0.0,>=1.6.1->redlines)',
 '- (4.11.0)',
 '+ markdown-it-py>=2.2.0->rich<14.0.0,>=13.3.5->redlines)',
 '+ (0.1.2)']

In [4]:
from difflib import SequenceMatcher

def similarity(a, b):
    return SequenceMatcher(None, a, b).ratio()  # 2.0 * matches / total; .quick_ratio(), .real_quick_ratio() (upper bound)

print(similarity("hello world", "yellow world"))
print(similarity("pear", "banana"))

0.8695652173913043
0.2


In [5]:
# unified_diff = patch

import difflib

def generate_patch(old_version, new_version):
    diff = difflib.unified_diff(old_version.splitlines(keepends=True),
                                new_version.splitlines(keepends=True),
                                fromfile='old_version', tofile='new_version')
    return ''.join(diff)

old_code = "def greet(name):\n    print('Hello, ' + name)\n"
new_code = "def greet(name):\n    print(f'Hello, {name}!')\n"

patch = generate_patch(old_code, new_code)
print(patch)

--- old_version
+++ new_version
@@ -1,2 +1,2 @@
 def greet(name):
-    print('Hello, ' + name)
+    print(f'Hello, {name}!')

