# Diff

In [13]:
!pip install loguru

Collecting loguru
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Downloading loguru-0.7.3-py3-none-any.whl (61 kB)
Installing collected packages: loguru
Successfully installed loguru-0.7.3


In [16]:
import difflib

In [18]:
def diff_strings(a: str, b: str, *, use_loguru_colors: bool = False) -> str:
    output = []
    matcher = difflib.SequenceMatcher(None, a, b)
    if use_loguru_colors:
        green = '<GREEN><black>'
        red = '<RED><black>'
        endgreen = '</black></GREEN>'
        endred = '</black></RED>'
    else:
        green = '\x1b[38;5;16;48;5;2m'
        red = '\x1b[38;5;16;48;5;1m'
        endgreen = '\x1b[0m'
        endred = '\x1b[0m'

    for opcode, a0, a1, b0, b1 in matcher.get_opcodes():
        if opcode == 'equal':
            output.append(a[a0:a1])
        elif opcode == 'insert':
            output.append(f'{green}{b[b0:b1]}{endgreen}')
        elif opcode == 'delete':
            output.append(f'{red}{a[a0:a1]}{endred}')
        elif opcode == 'replace':
            output.append(f'{green}{b[b0:b1]}{endgreen}')
            output.append(f'{red}{a[a0:a1]}{endred}')
    return ''.join(output)


In [19]:
s1 = "A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents."
s2 = "A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents."

In [20]:
for diff in difflib.ndiff([s1], [s2]):
    print(diff)

  A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents.


In [21]:
diff_strings(s1, s2, use_loguru_colors=True)

'A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents.'

## ChainMap

In [6]:
from collections import ChainMap
# Define two dictionaries
d1 = {"A": 1, "B": 2}
d2 = {"B": 3, "C": 4}
# Combine them using ChainMap
cm = ChainMap(d1, d2)
print(cm)  # Output: ChainMap({'A': 1, 'B': 2}, {'B': 3, 'C': 4})
print(cm["B"])  # Output: 2 (value from the first dictionary)

ChainMap({'A': 1, 'B': 2}, {'B': 3, 'C': 4})
2


In [10]:
from collections import ChainMap
import json

d1 = json.load(open('../data/checkpoints/documents-2024-12-08-1.json'))
d2 = json.load(open('../data/checkpoints/documents-2024-12-08-2.json'))

print(d1["validation_judge_Operative_Rules|llm_validation"])
print(d2["validation_judge_Operative_Rules|llm_validation"])

# Define two dictionaries
#d1 = {"A": 1, "B": 2}
#d2 = {"B": 3, "C": 4}
# Combine them using ChainMap
cm = ChainMap(d1["validation_judge_Operative_Rules|llm_validation"], d2["validation_judge_Operative_Rules|llm_validation"])
cm  # Output: ChainMap({'A': 1, 'B': 2}, {'B': 3, 'C': 4})


{'id': 'validation_judge_Operative_Rules', 'type': 'llm_validation', 'content': [{'doc_id': '§ 275.0-2', 'statement_id': '3', 'statement': "The Secretary of the Commission (Secretary) will promptly forward a copy to each named party by registered or certified mail at that party's last address filed with the Commission.", 'sources': ['(a)(2)'], 'semscore': 0.9073101908896187, 'similarity_score': 0.9, 'similarity_score_confidence': 0.9, 'transformation_accuracy': 0.85, 'grammar_syntax_accuracy': 0.95, 'findings': ["The transformed sentence maintains the core meaning of the original statement but changes 'will promptly forward' to 'must forward', which slightly alters the sense of obligation.", "The conditional clause 'if the copy is sent by registered or certified mail' is not present in the original statement, which affects the transformation accuracy.", 'The grammar and syntax of the transformed sentence are correct, but the addition of the conditional clause introduces a slight deviat

ChainMap({'id': 'validation_judge_Operative_Rules', 'type': 'llm_validation', 'content': [{'doc_id': '§ 275.0-2', 'statement_id': '3', 'statement': "The Secretary of the Commission (Secretary) will promptly forward a copy to each named party by registered or certified mail at that party's last address filed with the Commission.", 'sources': ['(a)(2)'], 'semscore': 0.9073101908896187, 'similarity_score': 0.9, 'similarity_score_confidence': 0.9, 'transformation_accuracy': 0.85, 'grammar_syntax_accuracy': 0.95, 'findings': ["The transformed sentence maintains the core meaning of the original statement but changes 'will promptly forward' to 'must forward', which slightly alters the sense of obligation.", "The conditional clause 'if the copy is sent by registered or certified mail' is not present in the original statement, which affects the transformation accuracy.", 'The grammar and syntax of the transformed sentence are correct, but the addition of the conditional clause introduces a slig

In [14]:
len(cm["content"])

6

## defaultdict

In [3]:
from collections import defaultdict

# Função para criar um dicionário padrão para os termos
def create_term():
    return {
        "term": "",
        "classification": "",
        "confidence": 0.0,
        "reason": "",
        "extracted_confidence": 0.0,
        "extracted_reason": ""
    }

# Função para criar o dicionário padrão de elementos
def create_element():
    return {
        "id": 0,
        "title": "",
        "statement": "",
        "terms": [],
        "verb_symbols": [],
        "verb_symbols_extracted_confidence": [],
        "verb_symbols_extracted_reason": [],
        "classification": "",
        "confidence": 0.0,
        "reason": "",
        "sources": []
    }

# Criação do defaultdict de elementos
elements_defaultdict = defaultdict(create_element)

# Exemplo de uso
elements_defaultdict[1]["id"] = 1
elements_defaultdict[1]["title"] = "Service of process on non-resident entities"
elements_defaultdict[1]["statement"] = (
    "A person may serve process, pleadings, or other papers on a non-resident investment adviser, "
    "or on a non-resident general partner or non-resident managing agent of an investment adviser "
    "by serving any or all of its appointed agents."
)

# Adicionando termos com a função create_term
term1 = create_term()
term1["term"] = "Person"
term1["classification"] = "Common Noun"
term1["confidence"] = 0.9
term1["reason"] = "The term is a general reference to an individual or entity."
term1["extracted_confidence"] = 0.9
term1["extracted_reason"] = "The term is explicitly mentioned as the subject."

elements_defaultdict[1]["terms"].append(term1)

# Adicionando outros atributos
elements_defaultdict[1]["verb_symbols"] = ["serve", "by serving"]
elements_defaultdict[1]["verb_symbols_extracted_confidence"] = [0.9, 0.8]
# elements_defaultdict[1]["verb_symbols_extracted_reason"] = [
#     "The verb is explicitly mentioned as the action.",
#     "The verb phrase indicates the method of action."
# ]
elements_defaultdict[1]["classification"] = "Fact Type"
#elements_defaultdict[1]["confidence"] = 0.9
elements_defaultdict[1]["reason"] = "The statement describes a general procedure applicable to various entities."
elements_defaultdict[1]["sources"] = ["(a)"]

# Convertendo o defaultdict para dicionário comum, se necessário
result = dict(elements_defaultdict)

# Exibindo o resultado
result


{1: {'id': 1,
  'title': 'Service of process on non-resident entities',
  'statement': 'A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents.',
  'terms': [{'term': 'Person',
    'classification': 'Common Noun',
    'confidence': 0.9,
    'reason': 'The term is a general reference to an individual or entity.',
    'extracted_confidence': 0.9,
    'extracted_reason': 'The term is explicitly mentioned as the subject.'}],
  'verb_symbols': ['serve', 'by serving'],
  'verb_symbols_extracted_confidence': [0.9, 0.8],
  'verb_symbols_extracted_reason': [],
  'classification': 'Fact Type',
  'confidence': 0.0,
  'reason': 'The statement describes a general procedure applicable to various entities.',
  'sources': ['(a)']}}

## pydantic

In [8]:
import pydantic

class Term(pydantic.BaseModel):
    term: str
    classification: str
    confidence: float
    reason: str
    extracted_confidence: float = 0.0
    extracted_reason: str

class Element(pydantic.BaseModel):
    id: int
    title: str
    statement: str
    terms: list[Term] = []
    verb_symbols: list[str]
    verb_symbols_extracted_confidence: list[float] = []
    verb_symbols_extracted_reason: list[str] = []
    classification: str
    confidence: float = 0.0
    reason: str
    sources: list[str]

# Exemplo de uso
element = Element(
    id=1,
    title="Service of process on non-resident entities",
    statement=(
        "A person may serve process, pleadings, or other papers on a non-resident investment adviser, "
        "or on a non-resident general partner or non-resident managing agent of an investment adviser "
        "by serving any or all of its appointed agents."
    ),
    terms=[
        Term(
            term="Person",
            classification="Common Noun",
            confidence=0.9,
            reason="The term is a general reference to an individual or entity.",
            extracted_confidence=0.9,
            extracted_reason="The term is explicitly mentioned as the subject."
        )
    ],
    verb_symbols=["serve", "by serving"],
    verb_symbols_extracted_confidence=[0.9, 0.8],
    # verb_symbols_extracted_reason=[
    #     "The verb is explicitly mentioned as the action.",
    #     "The verb phrase indicates the method of action."
    # ],
    classification="Fact Type",
    #confidence=0.9,
    reason="The statement describes a general procedure applicable to various entities.",
    sources=["(a)"]
)

element.model_dump()

{'id': 1,
 'title': 'Service of process on non-resident entities',
 'statement': 'A person may serve process, pleadings, or other papers on a non-resident investment adviser, or on a non-resident general partner or non-resident managing agent of an investment adviser by serving any or all of its appointed agents.',
 'terms': [{'term': 'Person',
   'classification': 'Common Noun',
   'confidence': 0.9,
   'reason': 'The term is a general reference to an individual or entity.',
   'extracted_confidence': 0.9,
   'extracted_reason': 'The term is explicitly mentioned as the subject.'}],
 'verb_symbols': ['serve', 'by serving'],
 'verb_symbols_extracted_confidence': [0.9, 0.8],
 'verb_symbols_extracted_reason': [],
 'classification': 'Fact Type',
 'confidence': 0.0,
 'reason': 'The statement describes a general procedure applicable to various entities.',
 'sources': ['(a)']}

## latexify

source: https://github.com/google/latexify_py/blob/main/examples/latexify_examples.ipynb

In [2]:
!pip install latexify-py

Collecting latexify-py
  Downloading latexify_py-0.4.4-py3-none-any.whl.metadata (4.2 kB)
Collecting dill>=0.3.2 (from latexify-py)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Downloading latexify_py-0.4.4-py3-none-any.whl (38 kB)
Downloading dill-0.3.9-py3-none-any.whl (119 kB)
Installing collected packages: dill, latexify-py
Successfully installed dill-0.3.9 latexify-py-0.4.4


In [16]:
import math  # Optional
import numpy as np  # Optional
import latexify

In [13]:
@latexify.function
def solve(a, b, c):
  return (-b + math.sqrt(b**2 - 4*a*c)) / (2*a)

print(solve(1, 4, 3))  # Invoking the function works as expected.
print(solve)  # Printing the function shows the underlying LaTeX source.
solve  # Displays the expression.

# Writes the underlying LaTeX source into a file.
with open("compiled.tex", "w") as fp:
  print(solve, file=fp)


-1.0
\mathrm{solve}(a, b, c) = \frac{-b + \sqrt{ b^{2} - 4 a c }}{2 a}


In [12]:
# latexify.expression works similarly, but does not output the signature.
@latexify.expression
def solve(a, b, c):
  return (-b + math.sqrt(b**2 - 4*a*c)) / (2*a)

solve

<latexify.ipython_wrappers.LatexifiedFunction at 0x7f1264a49110>

In [14]:
# latexify.get_latex obtains the underlying LaTeX expression directly.
def solve(a, b, c):
  return (-b + math.sqrt(b**2 - 4*a*c)) / (2*a)

latexify.get_latex(solve)

'\\mathrm{solve}(a, b, c) = \\frac{-b + \\sqrt{ b^{2} - 4 a c }}{2 a}'

In [3]:
@latexify.function
def fact(n):
  if n == 0:
    return 1
  else:
    return n * fact(n-1)

fact

<latexify.ipython_wrappers.LatexifiedFunction at 0x7f1264f86490>

In [4]:
@latexify.function
def sinc(x):
  if x == 0:
    return 1
  else:
    return math.sin(x) / x

sinc

<latexify.ipython_wrappers.LatexifiedFunction at 0x7f1264f88550>

In [5]:
# Elif or nested else-if are unrolled.
@latexify.function
def fib(x):
  if x == 0:
    return 0
  elif x == 1:
    return 1
  else:
    return fib(x-1) + fib(x-2)

fib

<latexify.ipython_wrappers.LatexifiedFunction at 0x7f1231c0d5d0>

In [6]:
# Some math symbols are converted automatically.
@latexify.function(use_math_symbols=True)
def greek(alpha, beta, gamma, Omega):
  return alpha * beta + math.gamma(gamma) + Omega

greek

<latexify.ipython_wrappers.LatexifiedFunction at 0x7f126406ffd0>

In [7]:
# Function names, arguments, variables can be replaced.
identifiers = {
    "my_function": "f",
    "my_inner_function": "g",
    "my_argument": "x",
}

@latexify.function(identifiers=identifiers)
def my_function(my_argument):
    return my_inner_function(my_argument)

my_function

<latexify.ipython_wrappers.LatexifiedFunction at 0x7f1264f66a50>

In [8]:
# Assignments can be reduced into one expression.
@latexify.function(reduce_assignments=True)
def f(a, b, c):
    discriminant = b**2 - 4 * a * c
    numerator = -b + math.sqrt(discriminant)
    denominator = 2 * a
    return numerator / denominator

f

<latexify.ipython_wrappers.LatexifiedFunction at 0x7f1264f89110>

In [9]:
# Matrix support.
@latexify.function(reduce_assignments=True, use_math_symbols=True)
def transform(x, y, a, b, theta, s, t):
  cos_t = math.cos(theta)
  sin_t = math.sin(theta)
  scale = np.array([[a, 0, 0], [0, b, 0], [0, 0, 1]])
  rotate = np.array([[cos_t, -sin_t, 0], [sin_t, cos_t, 0], [0, 0, 1]])
  move = np.array([[1, 0, s], [0, 1, t], [0, 0, 1]])
  return move @ rotate @ scale @ np.array([[x], [y], [1]])

transform

<latexify.ipython_wrappers.LatexifiedFunction at 0x7f1231c1a8d0>

In [10]:
# latexify.algorithmic generates an algorithmic environment instead of an equation.
@latexify.algorithmic
def fib(x):
  if x == 0:
    return 0
  elif x == 1:
    return 1
  else:
    return fib(x-1) + fib(x-2)

fib

<latexify.ipython_wrappers.LatexifiedAlgorithm at 0x7f1264f8e410>

In [11]:
# Another example: latexify.algorithmic supports usual control flows.
@latexify.algorithmic
def collatz(x):
  n = 0
  while x > 1:
    n = n + 1
    if x % 2 == 0:
      x = x // 2
    else:
      x = 3 * x + 1
  return n

collatz

<latexify.ipython_wrappers.LatexifiedAlgorithm at 0x7f1264a74f50>

In [15]:
@latexify.function(identifiers = {'sigmoid': 'sigma'}, use_math_symbols = True)
def sigmoid(z):
  return 1 / (1-math.exp(-z))

sigmoid

<latexify.ipython_wrappers.LatexifiedFunction at 0x7f1231c18fd0>

Alternatives

In [17]:
from IPython.display import Math
Math(r'\mathrm{series}(x) = 1 + x + \frac{x^{2}}{2 !} + \frac{x^{3}}{3 !} + \frac{x^{4}}{4 !}')

<IPython.core.display.Math object>

In [19]:
@latexify.function(identifiers = {'taylor_series_exp': 'series'})
def taylor_series_exp(x):
    return 1 + x + (x**2) / math.factorial(2) + (x**3) / math.factorial(3) + (x**4) / math.factorial(4)

taylor_series_exp

print(taylor_series_exp)

\mathrm{series}(x) = 1 + x + \frac{x^{2}}{2 !} + \frac{x^{3}}{3 !} + \frac{x^{4}}{4 !}


In [21]:
%%latex
Math(r'F(k) = \int_{-\infty}^{\infty} f(x) e^{2\pi i k} dx')

<IPython.core.display.Latex object>