Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
f8a9d2c
intermittent none result
nichwch Jul 12, 2024
d465a8a
make everything above run_validators a generator
nichwch Jul 13, 2024
a63072f
plumbing and types
nichwch Jul 15, 2024
4e7e157
comments
nichwch Jul 15, 2024
b4d2b2d
extend generators down to run_validators, works w noop
nichwch Jul 16, 2024
1a4f9a1
some code for fix streaming logic
nichwch Jul 17, 2024
381ab07
accumulate fixed outputs for validators
nichwch Jul 19, 2024
95cb018
merging algo
nichwch Jul 19, 2024
37ceb30
format
nichwch Jul 19, 2024
a522c4c
handle last chunk, merge infra
nichwch Jul 20, 2024
f62839b
merge algo
nichwch Jul 30, 2024
8d8aadd
validator service
nichwch Jul 30, 2024
2803078
reset validationmap
nichwch Jul 31, 2024
2aa99f0
handle filter/refrain
nichwch Jul 31, 2024
056c0f8
format
nichwch Jul 31, 2024
3f933d8
fix raw_llm_output
nichwch Aug 1, 2024
43c704c
WIP test
nichwch Aug 1, 2024
19a912d
testing, fix refrain and fix behavior
nichwch Aug 1, 2024
a1b74ca
filter behavior
nichwch Aug 1, 2024
03c8a6d
remove prints
nichwch Aug 1, 2024
2070909
address changes, pt1
nichwch Aug 5, 2024
1dba10c
remove workspace
nichwch Aug 5, 2024
521ff60
format
nichwch Aug 5, 2024
5af79f1
merge
nichwch Aug 5, 2024
cc33fe9
use interface instead of tuple
nichwch Aug 5, 2024
ae774df
mock presidio behavior
nichwch Aug 6, 2024
a4ec762
format
nichwch Aug 6, 2024
74d1ee3
poetry lock
nichwch Aug 6, 2024
99bd0f4
use ids instead of rail alias
nichwch Aug 6, 2024
a0e95dc
merge test
nichwch Aug 6, 2024
aae3b20
remove tuple
nichwch Aug 7, 2024
0eb858d
failing test
nichwch Aug 7, 2024
bc26501
fix another infinite loop
nichwch Aug 8, 2024
4aad57e
unit tests for merge
nichwch Aug 8, 2024
4f02b78
broken test
nichwch Aug 9, 2024
c7ab3e0
merge
nichwch Aug 9, 2024
6a19810
broken test
nichwch Aug 9, 2024
59e569b
type fixes
nichwch Aug 9, 2024
922a2b7
typefix
nichwch Aug 12, 2024
6778ca5
use optional
nichwch Aug 12, 2024
4adad0e
address comments
nichwch Aug 12, 2024
2df3f2d
parsed chunk
nichwch Aug 12, 2024
7134773
type fix
nichwch Aug 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions guardrails/classes/validation/validation_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
ErrorSpan as IErrorSpan,
)
from guardrails.classes.generic.arbitrary_model import ArbitraryModel
from pydantic import BaseModel


class ValidationResult(IValidationResult, ArbitraryModel):
Expand Down Expand Up @@ -185,3 +186,9 @@ class ErrorSpan(IErrorSpan, ArbitraryModel):
end: int
# reason validation failed, specific to this chunk
reason: str


class StreamValidationResult(BaseModel):
chunk: Any
original_text: str
metadata: Dict[str, Any]
291 changes: 291 additions & 0 deletions guardrails/merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
# SOURCE: https://github.com/spyder-ide/three-merge/blob/master/three_merge/merge.py
from diff_match_patch import diff_match_patch

# Constants
DIFFER = diff_match_patch()
DIFFER.Diff_Timeout = 0.1
DIFFER.Diff_EditCost = 4
PRESERVED = 0
DELETION = -1
ADDITION = 1


def merge(source: str, target: str, base: str) -> str:
diff1_l = DIFFER.diff_main(base, source)
diff2_l = DIFFER.diff_main(base, target)

DIFFER.diff_cleanupEfficiency(diff1_l)
DIFFER.diff_cleanupEfficiency(diff2_l)

diff1 = iter(diff1_l)
diff2 = iter(diff2_l)

composed_text = []

source = next(diff1, None) # type: ignore
target = next(diff2, None) # type: ignore

prev_source_text = ""
prev_target_text = ""

while source is not None and target is not None:
source_status, source_text = source
target_status, target_text = target
if source_status == PRESERVED and target_status == PRESERVED:
# Base is preserved for both source and target
if len(source_text) > len(target_text):
# Addition performed by target
advance = True
composed_text.append(target_text)
tempdiff = DIFFER.diff_main(target_text, source_text)
_, invariant = tempdiff[1]
# _, (_, invariant) = DIFFER.diff_main(target_text, source_text)
prev_target_text = target[1]
target = next(diff2, None) # type: ignore
while invariant != "" and target is not None:
# Apply target changes until invariant is preserved
# target = next(diff2, None)
target_status, target_text = target
if target_status == DELETION:
if len(target_text) > len(invariant):
target_text = target_text[len(invariant) :]
invariant = ""
target = (target_status, target_text) # type: ignore
else:
invariant = invariant[len(target_text) :]
prev_target_text = target[1]
target = next(diff2, None) # type: ignore
elif target_status == ADDITION:
composed_text.append(target_text)
prev_target_text = target[1]
target = next(diff2, None) # type: ignore
else:
# Recompute invariant and advance source
if len(invariant) > len(target_text):
assert invariant[: len(target_text)] == target_text
source = (source_status, invariant[len(target_text) :]) # type: ignore
composed_text.append(target_text)
invariant = ""
advance = False
prev_target_text = target[1]
target = next(diff2, None) # type: ignore
else:
target_text = target_text[len(invariant) :]
composed_text.append(invariant)
invariant = ""
target = (target_status, target_text) # type: ignore
if advance:
prev_source_text = source[1]
source = next(diff1, None) # type: ignore
elif len(source_text) < len(target_text):
# Addition performed by source
advance = True
composed_text.append(source_text)
tempdiff = DIFFER.diff_main(target_text, source_text)
_, invariant = tempdiff[1]
# _, (_, invariant) = DIFFER.diff_main(source_text, target_text)
prev_source_text = source[1]
source = next(diff1, None) # type: ignore
while invariant != "" and target is not None and source is not None:
# Apply source changes until invariant is preserved
source_status, source_text = source
if source_status == DELETION:
if len(source_text) > len(invariant):
source_text = source_text[len(invariant) :]
invariant = ""
source = (source_status, source_text) # type: ignore
else:
invariant = invariant[len(source_text) :]
prev_source_text = source[1]
source = next(diff1, None) # type: ignore
elif source_status == ADDITION:
composed_text.append(source_text)
prev_source_text = source[1]
source = next(diff1, None) # type: ignore
else:
# Recompute invariant and advance source
# invariant = invariant[:len(source_text)]
if len(invariant) > len(source_text):
assert invariant[: len(source_text)] == source_text
target = (target_status, invariant[len(source_text) :]) # type: ignore
composed_text.append(source_text)
invariant = ""
advance = False
prev_source_text = source[1]
source = next(diff1, None) # type: ignore
else:
source_text = source_text[len(invariant) :]
composed_text.append(invariant)
invariant = ""
source = (source_status, source_text) # type: ignore
if advance:
prev_target_text = target[1]
target = next(diff2, None) # type: ignore
else:
# Source and target are equal
composed_text.append(source_text)
prev_source_text = source[1]
prev_target_text = target[1]
source = next(diff1, None) # type: ignore
target = next(diff2, None) # type: ignore
elif source_status == ADDITION and target_status == PRESERVED:
# Source is adding text
composed_text.append(source_text)
prev_source_text = source[1]
source = next(diff1, None) # type: ignore
elif source_status == PRESERVED and target_status == ADDITION:
# Target is adding text
composed_text.append(target_text)
prev_target_text = target[1]
target = next(diff2, None) # type: ignore
elif source_status == DELETION and target_status == PRESERVED:
if len(target_text) > len(source_text):
# Take target text, remove the corresponding part from source
target_text = target_text[len(source_text) :]
# composed_text.append(target_text)
# source = diff1.pop(0)
target = (target_status, target_text) # type: ignore
prev_source_text = source[1]
source = next(diff1, None) # type: ignore
elif len(target_text) <= len(source_text):
source_text = source_text[len(target_text) :]
source = (source_status, source_text) # type: ignore
prev_target_text = target[1]
target = next(diff2, None) # type: ignore
elif source_status == PRESERVED and target_status == DELETION:
if len(source_text) > len(target_text):
# Take source text, remove the corresponding part from target
source_text = source_text[len(target_text) :]
source = (source_status, source_text) # type: ignore
prev_target_text = target[1]
target = next(diff2, None) # type: ignore
elif len(source_text) <= len(target_text):
# Advance to next source
target_text = target_text[len(source_text) :]
target = (target_status, target_text) # type: ignore
prev_source_text = source[1]
source = next(diff1, None) # type: ignore
elif source_status == DELETION and target_status == ADDITION:
# Merge conflict
# Err on the side of deletion. Do not add anything
# composed_text.append("<<<<<<< ++ {0} ".format(target_text))
# composed_text.append("======= -- {0} ".format(source_text))
# composed_text.append(">>>>>>>")
prev_source_text = source[1]
prev_target_text = target[1]
source = next(diff1, None) # type: ignore
target = next(diff2, None) # type: ignore
if target is not None:
target_status, target_text = target
if target_text.startswith(source_text):
target_text = target_text[len(source_text) :]
target = (target_status, target_text) # type: ignore
elif source_status == ADDITION and target_status == DELETION:
# Merge conflict
# Err on the side of deletion. Do not add anything
# composed_text.append("<<<<<<< ++ {0} ".format(source_text))
# composed_text.append("======= -- {0} ".format(target_text))
# composed_text.append(">>>>>>>")
prev_source_text = source[1]
prev_target_text = target[1]
source = next(diff1, None) # type: ignore
target = next(diff2, None) # type: ignore
if source is not None:
source_status, source_text = source
if source_text.startswith(target_text):
source_text = source_text[len(target_text) :]
source = (source_status, source_text) # type: ignore
elif source_status == ADDITION and target_status == ADDITION:
# Possible merge conflict
if len(source_text) >= len(target_text):
if source_text.startswith(target_text):
composed_text.append(source_text)
else:
# Merge conflict
# Insert text that has highest distance from original
# we assume original is last operation
source_dist = DIFFER.diff_levenshtein(
DIFFER.diff_main(source_text, prev_source_text)
)
target_dist = DIFFER.diff_levenshtein(
DIFFER.diff_main(target_text, prev_target_text)
)
if source_dist > target_dist:
composed_text.append(source_text)
else:
composed_text.append(target_text)
else:
if target_text.startswith(source_text):
composed_text.append(target_text)
else:
# Merge conflict
# Insert text that has highest distance from original
source_dist = DIFFER.diff_levenshtein(
DIFFER.diff_main(source_text, prev_source_text)
)
target_dist = DIFFER.diff_levenshtein(
DIFFER.diff_main(target_text, prev_target_text)
)
if source_dist > target_dist:
composed_text.append(source_text)
else:
composed_text.append(target_text)
prev_source_text = source[1]
prev_target_text = target[1]
source = next(diff1, None) # type: ignore
target = next(diff2, None) # type: ignore
elif source_status == DELETION and target_status == DELETION:
# Possible merge conflict
merge_conflict = False
if len(source_text) > len(target_text):
if source_text.startswith(target_text):
# Peek target to delete preserved text
source_text = source_text[len(target_text) :]
source = (source_status, source_text) # type: ignore
prev_target_text = target[1]
target = next(diff2, None) # type: ignore
else:
merge_conflict = True
elif len(target_text) > len(source_text):
if target_text.startswith(source_text):
target_text = target_text[len(source_text) :]
target = (target_status, target_text) # type: ignore
prev_source_text = source[1]
source = next(diff1, None) # type: ignore
else:
merge_conflict = True
else:
if target_text == source_text:
# Both source and target remove the same text
prev_source_text = source[1]
prev_target_text = target[1]
source = next(diff1, None) # type: ignore
target = next(diff2, None) # type: ignore
else:
merge_conflict = True

# Don't handle double deletion scenario
if merge_conflict:
source = next(diff1, None) # type: ignore
target = next(diff2, None) # type: ignore
# composed_text.append("<<<<<<< -- {0} ".format(source_text))
# composed_text.append("======= -- {0} ".format(target_text))
# composed_text.append(">>>>>>>")

while source is not None:
source_status, source_text = source
# assert source_status == ADDITION or source_status == PRESERVED
if source_status == ADDITION:
composed_text.append(source_text)
prev_source_text = source[1]
source = next(diff1, None) # type: ignore

while target is not None:
target_status, target_text = target
# assert target_status == ADDITION or source_status == PRESERVED
if target_status == ADDITION:
composed_text.append(target_text)
prev_target_text = target[1]
target = next(diff2, None) # type: ignore

return "".join(composed_text)
Loading