-
Notifications
You must be signed in to change notification settings - Fork 0
/
splitter.rb
105 lines (95 loc) · 2.31 KB
/
splitter.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
require 'nokogiri'
class Splitter
def initialize html=nil
parse html
end
# building the results is a destructive process
# we will need two trees, and this is easier
# than trying to copy one because the nodes have
# pointers everywhere
def parse html
return if html.nil?
@fragment = Nokogiri::HTML.fragment(html)
@fragment_copy = Nokogiri::HTML.fragment(html)
end
def split text, html=nil
parse html
first_part text
second_part text
[@fragment.to_html, @fragment_copy.to_html]
end
def print_tree
preorder_traverse @fragment do |node|
puts node.to_html
end
end
def update_content node, delimiter, side
parts = node.text.split(delimiter, 2)
if parts.count != 2 then
node.content = ''
else
if side == :keep_left then
node.content = parts[0]
else
node.content = parts[1].lstrip
end
end
end
# simply remove nodes after the search term is found
def first_part text
found = false
preorder_traverse @fragment do |node|
if found then
node.remove
elsif node.text? then
if not node.text.index(text).nil? then
found = true
update_content node, text, :keep_left
end
end
end
end
# do a post order traversal of the dom
# deleting nodes as you go until you find the
# target, then stop
def second_part text
postorder_traverse @fragment_copy do |node|
if node.text? then
if not node.text.index(text).nil? then
update_content node, text, :keep_right
return node
end
end
node.remove
end
end
# first the first occurance of text in html text
# nodes
def find text
preorder_traverse @fragment do |node|
if node.text? then
if not node.text.index(text).nil? then
return node
end
end
end
end
# take a block and a node
def postorder_traverse node, &block
if node.children.count > 0 then
node.children.each do |child|
postorder_traverse(child, &block)
end
end
block.call(node)
end
# take a block and a node
def preorder_traverse node, &block
block.call(node)
if node.children.count > 0 then
node.children.each do |child|
preorder_traverse(child, &block)
end
end
end
end