Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update comment intervals on edit #3

Closed
dmadisetti opened this issue May 10, 2023 · 2 comments
Closed

Update comment intervals on edit #3

dmadisetti opened this issue May 10, 2023 · 2 comments

Comments

@dmadisetti
Copy link
Owner

Using interval tree. Currently not updating interval, so highlights will get out of sync with buffer if too much editing occurs.

@dmadisetti
Copy link
Owner Author

dmadisetti commented May 10, 2023

Some interval code that is almost there

import unittest
from intervaltree import Interval, IntervalTree

# The update_intervals and compute_difference functions from the previous answer

from intervaltree import Interval, IntervalTree

def compute_difference(old_text, new_text):
    import difflib
    matcher = difflib.SequenceMatcher(None, old_text, new_text)
    return [diff for diff in matcher.get_opcodes()]

def update_intervals(tree, old_text, new_text):
    diffs = compute_difference(old_text, new_text)
    shift = 0
    print(diffs)

    for tag, i1, i2, j1, j2 in diffs:
        if tag == 'delete':
            overlap = set({})
            for interval in tree[i1 - shift:i2 - shift]:
                tree.remove(interval)
                start = interval.begin + min(i1 - interval.begin, 0)
                end = interval.end - (min(i2 - shift, interval.end) - max(i1 - shift, interval.begin)) + min(i1 - interval.begin, 0)
                interval = Interval(start, end, interval.data)
                overlap.add(interval)
            for interval in tree[i2 - shift:]:
                tree.remove(interval)
                tree.add(Interval(interval.begin - (i2 - i1), interval.end - (i2 - i1), interval.data))
            for o in overlap:
                tree.add(o)
            shift += i2 - i1

        if tag == 'insert':
            overlap = set({})
            delta = (j2 - j1)
            for interval in tree[i1 - shift]:
                tree.remove(interval)
                end = interval.end + delta - shift
                interval = Interval(interval.begin, end, interval.data)
                overlap.add(interval)
            for interval in tree[i1 - shift:]:
                tree.remove(interval)
                tree.add(Interval(interval.begin + delta - shift, interval.end + delta - shift, interval.data))

            for o in overlap:
                tree.add(o)
            shift -= delta

import unittest
from intervaltree import Interval, IntervalTree

# The compute_difference and update_intervals functions from the previous answer

class TestIntervalTreeUpdate(unittest.TestCase):

    def assert_intervals_equal(self, tree, new_text, expected_substrings):
        self.assertEqual(len(tree), len(expected_substrings))
        for interval, expected_substring in zip(sorted(tree), expected_substrings):
            actual_substring = new_text[interval.begin:interval.end]
            self.assertEqual(actual_substring, expected_substring)

    def test_assert_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        text = "hello my name is george"
        self.assert_intervals_equal(tree, text, ["o my name is g"])

    def test_double_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello name george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o name g"])

    def test_pre_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "o my name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is g"])

    def test_post_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name is g"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is g"])

    def test_single_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o name is g"])

    def test_pre_into_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hename is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["name is g"])

    def test_post_into_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name e"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name "])

    def test_single_interval_insertion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name is not really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g"])


    def test_multiple_intervals_no_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(19, 21, "Highlight2")) #or
        old_text = "hello my name is george"
        new_text = "hello my name is not really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g", "or"])

    def test_multiple_intervals_with_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(9, 23, "Highlight2")) # name is george
        old_text = "hello my name is george"
        new_text = "hello my name is not really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g", "name is not really george"])

    def test_insertion_within_interval(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name is not really george, but I am called george actually"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g"])

    def test_single_char_insertion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello, my name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o, my name is g"])

    def test_single_char_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hell my name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, [" my name is g"])

    def test_insertion_in_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(9, 23, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my full name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my full name is g", "name is george"])

    def test_insertion_between_intervals(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(19, 21, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my name is really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is really g", "or"])

    def test_deletion_in_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(9, 23, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my ne is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my ne is g", "ne is george"])

    def test_deletion_between_intervals(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(19, 21, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my nameis george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my nameis g", "or"])

    def test_complex_case_diff_based(self):
        old_text = "abcdefghijklmno"

        # Create the initial tree with 5 highlights of varying sizes
        tree = IntervalTree()
        tree.add(Interval(1, 3, "Highlight1"))
        tree.add(Interval(4, 7, "Highlight2"))
        tree.add(Interval(8, 11, "Highlight3"))
        tree.add(Interval(10, 12, "Highlight4"))
        tree.add(Interval(12, 15, "Highlight5"))

        # Modify the text with multiple insertions and deletions
        # Deletion:  remove "cd"
        # Insertion: add "x" between "e" and "f"
        # Deletion:  remove "ij"
        # Insertion: add "yz" between "m" and "n"
        new_text = "abefxghklyzmno"

        # Update the tree based on the modifications
        update_intervals(tree, old_text, new_text)

        # Define the expected substrings for each updated highlight
        expected_substrings = [
            "b",  # Highlight1
            "efxg",  # Highlight2
            "h",  # Highlight3
            "k",  # Highlight4
            "lyzmno"  # Highlight5
        ]

        self.assert_intervals_equal(tree, new_text, expected_substrings)

    def test_complex_case(self):
        # Generate the test string dynamically
        part1 = "This is an example "
        part2 = "where we will insert, delete, and modify "
        part3 = "text within multiple overlapping intervals. "
        part4 = "The goal is to cover a variety of cases. "
        part5 = "Let's see how well the algorithm handles this."

        old_text = part1 + part2 + part3 + part4 + part5

        # Create the initial tree with 5 highlights of varying sizes
        tree = IntervalTree()
        tree.add(Interval(len(part1), len(part1) + len("insert"), "Highlight1"))
        tree.add(Interval(len(part1 + part2), len(part1 + part2) + len("text"), "Highlight2"))
        tree.add(Interval(len(part1 + part2 + part3) - len("cases. "), len(part1 + part2 + part3), "Highlight3"))
        tree.add(Interval(len(part1 + part2 + part3 + part4) - len("the "), len(part1 + part2 + part3 + part4), "Highlight4"))
        tree.add(Interval(len(part1 + part2 + part3 + part4 + part5) - len("handles this."), len(part1 + part2 + part3 + part4 + part5), "Highlight5"))

        # Modify the text with multiple insertions and deletions
        part2_modified = "where we will insert and delete some "
        part3_modified = "text within multiple overlapping intervals and add more content. "
        part4_modified = "The goal is to cover a variety of cases and situations. "
        new_text = part1 + part2_modified + part3_modified + part4_modified + part5

        # Update the tree based on the modifications
        update_intervals(tree, old_text, new_text)

        # Define the expected substrings for each updated highlight
        expected_substrings = [
            "insert and delete",  # Highlight1
            "text within multiple overlapping",  # Highlight2
            "situations.",  # Highlight3
            "to cover a variety of cases and",  # Highlight4
            "handles this."  # Highlight5
        ]

        self.assert_intervals_equal(tree, new_text, expected_substrings)


suite = unittest.TestLoader().loadTestsFromTestCase(TestIntervalTreeUpdate)
unittest.TextTestRunner(verbosity=2).run(suite)

@dmadisetti
Copy link
Owner Author

Works?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant