/
xml2dict.py
117 lines (99 loc) · 4.06 KB
/
xml2dict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python
"""Thunder Chen<nkchenz@gmail.com> 2007.9.1"""
from __future__ import with_statement
import re
try:
import xml.etree.ElementTree as ET
except: # pragma: no cover
# For Python 2.4
import cElementTree as ET
from object_dict import object_dict
class XML2Dict(object):
def _parse_node(self, node):
node_tree = object_dict()
if node.text and node.attrib:
if node.tag in node.attrib:
raise ValueError("Name conflict: Attribute name conflicts with "
"tag name. Check the documentation.")
node.attrib.update({node.tag: node.text})
node.text = ''
# Save attrs and text. Fair warning, if there's a child node with the same name
# as an attribute, values will become a list.
if node.text and node.text.strip():
node_tree = node.text
else:
for k, v in node.attrib.items():
k, v = self._namespace_split(k, v)
node_tree[k] = v
# Save children.
for child in node.getchildren():
tag, tree = self._namespace_split(child.tag, self._parse_node(child))
if tag not in node_tree: # First encounter, store it in dict.
node_tree[tag] = tree
continue
old = node_tree[tag]
if not isinstance(old, list):
# Multiple encounters, change dict to a list
node_tree.pop(tag)
node_tree[tag] = [old]
node_tree[tag].append(tree) # Add the new one.
if not node_tree:
node_tree = None
return node_tree
def _namespace_split(self, tag, value):
"""
Split the tag '{http://cs.sfsu.edu/csc867/myscheduler}patients'
ns = http://cs.sfsu.edu/csc867/myscheduler
name = patients
"""
result = re.compile("\{(.*)\}(.*)").search(tag)
if result:
tag = result.groups(1)
# value.namespace, tag = result.groups()
return (tag, value)
def parse(self, file):
"""Parse an XML file to a dict."""
with open(file, 'r') as f:
return self.fromstring(f.read())
def fromstring(self, s):
"""Parse a string."""
t = ET.fromstring(s)
root_tag, root_tree = self._namespace_split(t.tag, self._parse_node(t))
return object_dict({root_tag: root_tree})
class Dict2XML(object):
"""Turn a dictionary into an XML string."""
def tostring(self, d):
"""Convert dictionary to an XML string."""
if not isinstance(d, dict):
raise TypeError('tostring must receive a dictionary: %r' % d)
if len(d) != 1:
raise ValueError('Dictionary must have exactly one root element')
if isinstance(d.itervalues().next(), list):
raise ValueError('Dictionary must not be a map to list: %r' % d)
xml_list = ['<?xml version="1.0" encoding="UTF-8" ?>\n']
xml_list.append(self.__tostring_helper(d))
return ''.join(xml_list)
def __tostring_helper(self, d):
if isinstance(d, int):
return str(d)
elif isinstance(d, basestring):
return '<![CDATA[%s]]>' % d
elif isinstance(d, dict):
x = []
for tag, content in d.iteritems():
if content is None:
x.append('<%s />' % tag)
elif isinstance(content, list):
for c in content:
if c is None:
x.append('<%s />' % tag)
else:
x.append('<%s>%s</%s>' %\
(tag, self.__tostring_helper(c), tag))
else:
x.append('<%s>%s</%s>' %\
(tag, self.__tostring_helper(content), tag))
xml_string = ''.join(x)
return xml_string
else:
raise ValueError('Cannot convert %r to an XML string' % d)