-
Notifications
You must be signed in to change notification settings - Fork 0
/
xmlparser.py
99 lines (83 loc) · 3.05 KB
/
xmlparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
class Node:
def __init__(self, tag_name, parent=None):
self.parent = parent;
self.tag_name = tag_name;
self.children = [];
self.text = "";
self.state = FirstTag();
def __str__(self):
if self.text:
return self.tag_name + ":" + self.text;
else:
return self.tag_name;
class Parser:
def __init__(self, parse_string):
self.parse_string = parse_string;
self.root = None;
self.current_node = None;
self.state = FirstTag();
def process(self, remaining_string):
remaining = self.state.process(remaining_string, self);
if remaining:
self.process(remaining);
def start(self):
self.process(self.parse_string);
class FirstTag:
def process(self, remaining_string, parser):
i_start_tag = remaining_string.find('<');
i_end_tag = remaining_string.find('>');
tag_name = remaining_string[i_start_tag+1:i_end_tag];
root = Node(tag_name);
parser.root = parser.current_node = root;
parser.state = ChildNode();
return remaining_string[i_end_tag+1:];
class ChildNode:
def process(self, remaining_string, parser):
stripped = remaining_string.strip();
# print (stripped.startswith("</"));exit();
if stripped.startswith("</"):
parser.state = CloseTag();
elif stripped.startswith("<"):
parser.state = OpenTag();
else:
parser.state = TextNode();
return stripped;
class OpenTag:
def process(self, remaining_string, parser):
i_start_tag = remaining_string.find('<');
i_end_tag = remaining_string.find('>');
tag_name = remaining_string[i_start_tag+1:i_end_tag];
node = Node(tag_name, parser.current_node);
parser.current_node.children.append(node);
parser.current_node = node;
parser.state = ChildNode();
return remaining_string[i_end_tag+1:];
class CloseTag:
def process(self, remaining_string, parser):
i_start_tag = remaining_string.find('<');
i_end_tag = remaining_string.find('>');
assert remaining_string[i_start_tag+1] == "/";
tag_name = remaining_string[i_start_tag+2:i_end_tag];
assert tag_name == parser.current_node.tag_name;
parser.current_node = parser.current_node.parent;
parser.state = ChildNode();
return remaining_string[i_end_tag+1:].strip();
class TextNode:
def process(self, remaining_string, parser):
i_start_tag = remaining_string.find('<');
text = remaining_string[:i_start_tag];
parser.current_node.text = text;
parser.state = ChildNode();
return remaining_string[i_start_tag:];
if __name__ == "__main__":
import sys;
with open(sys.argv[1]) as file:
contents = file.read();
p = Parser(contents);
p.start();
nodes = [p.root];
while nodes:
node = nodes.pop(0);
print(node);
nodes = node.children + nodes;