# Diff

In [13]:
!pip install loguru

Collecting loguru
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Downloading loguru-0.7.3-py3-none-any.whl (61 kB)
Installing collected packages: loguru
Successfully installed loguru-0.7.3


In [16]:
import difflib

In [18]:
def diff_strings(a: str, b: str, *, use_loguru_colors: bool = False) -> str:
    output = []
    matcher = difflib.SequenceMatcher(None, a, b)
    if use_loguru_colors:
        green = '<GREEN><black>'
        red = '<RED><black>'
        endgreen = '</black></GREEN>'
        endred = '</black></RED>'
    else:
        green = '\x1b[38;5;16;48;5;2m'
        red = '\x1b[38;5;16;48;5;1m'
        endgreen = '\x1b[0m'
        endred = '\x1b[0m'

    for opcode, a0, a1, b0, b1 in matcher.get_opcodes():
        if opcode == 'equal':
            output.append(a[a0:a1])
        elif opcode == 'insert':
            output.append(f'{green}{b[b0:b1]}{endgreen}')
        elif opcode == 'delete':
            output.append(f'{red}{a[a0:a1]}{endred}')
        elif opcode == 'replace':
            output.append(f'{green}{b[b0:b1]}{endgreen}')
            output.append(f'{red}{a[a0:a1]}{endred}')
    return ''.join(output)


In [19]:
s1 = "A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents."
s2 = "A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents."

In [20]:
for diff in difflib.ndiff([s1], [s2]):
    print(diff)

  A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents.


In [21]:
diff_strings(s1, s2, use_loguru_colors=True)

'A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents.'

## ChainMap

In [6]:
from collections import ChainMap
# Define two dictionaries
d1 = {"A": 1, "B": 2}
d2 = {"B": 3, "C": 4}
# Combine them using ChainMap
cm = ChainMap(d1, d2)
print(cm)  # Output: ChainMap({'A': 1, 'B': 2}, {'B': 3, 'C': 4})
print(cm["B"])  # Output: 2 (value from the first dictionary)

ChainMap({'A': 1, 'B': 2}, {'B': 3, 'C': 4})
2


In [10]:
from collections import ChainMap
import json

d1 = json.load(open('../data/checkpoints/documents-2024-12-08-1.json'))
d2 = json.load(open('../data/checkpoints/documents-2024-12-08-2.json'))

print(d1["validation_judge_Operative_Rules|llm_validation"])
print(d2["validation_judge_Operative_Rules|llm_validation"])

# Define two dictionaries
#d1 = {"A": 1, "B": 2}
#d2 = {"B": 3, "C": 4}
# Combine them using ChainMap
cm = ChainMap(d1["validation_judge_Operative_Rules|llm_validation"], d2["validation_judge_Operative_Rules|llm_validation"])
cm  # Output: ChainMap({'A': 1, 'B': 2}, {'B': 3, 'C': 4})


{'id': 'validation_judge_Operative_Rules', 'type': 'llm_validation', 'content': [{'doc_id': '§ 275.0-2', 'statement_id': '3', 'statement': "The Secretary of the Commission (Secretary) will promptly forward a copy to each named party by registered or certified mail at that party's last address filed with the Commission.", 'sources': ['(a)(2)'], 'semscore': 0.9073101908896187, 'similarity_score': 0.9, 'similarity_score_confidence': 0.9, 'transformation_accuracy': 0.85, 'grammar_syntax_accuracy': 0.95, 'findings': ["The transformed sentence maintains the core meaning of the original statement but changes 'will promptly forward' to 'must forward', which slightly alters the sense of obligation.", "The conditional clause 'if the copy is sent by registered or certified mail' is not present in the original statement, which affects the transformation accuracy.", 'The grammar and syntax of the transformed sentence are correct, but the addition of the conditional clause introduces a slight deviat

ChainMap({'id': 'validation_judge_Operative_Rules', 'type': 'llm_validation', 'content': [{'doc_id': '§ 275.0-2', 'statement_id': '3', 'statement': "The Secretary of the Commission (Secretary) will promptly forward a copy to each named party by registered or certified mail at that party's last address filed with the Commission.", 'sources': ['(a)(2)'], 'semscore': 0.9073101908896187, 'similarity_score': 0.9, 'similarity_score_confidence': 0.9, 'transformation_accuracy': 0.85, 'grammar_syntax_accuracy': 0.95, 'findings': ["The transformed sentence maintains the core meaning of the original statement but changes 'will promptly forward' to 'must forward', which slightly alters the sense of obligation.", "The conditional clause 'if the copy is sent by registered or certified mail' is not present in the original statement, which affects the transformation accuracy.", 'The grammar and syntax of the transformed sentence are correct, but the addition of the conditional clause introduces a slig

In [14]:
len(cm["content"])

6