-
Notifications
You must be signed in to change notification settings - Fork 0
/
format.py
34 lines (31 loc) · 958 Bytes
/
format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import xml.etree.ElementTree
import re
import pickle
e = xml.etree.ElementTree.parse('Posts.xml').getroot()
questions = {}
with open('tm_out', 'r') as doc:
documents = pickle.load(doc)
for a in e.findall('row'):
tags = a.get('Tags')
id = a.get('Id')
type = a.get('PostTypeId')
user = a.get('OwnerUserId')
if(type == '1'):
if(id in questions):
cs = questions[id]
else:
cs = ""
tags = [m.group(1) for m in re.finditer(r'\<([^\>]*)\>', tags)]
for tag in tags:
cs += "\t" + tag
if(id in documents):
for items in documents[id]:
cs += "\t" + str(items[0])
questions[id] = cs
for a in e.findall('row'):
type = a.get('PostTypeId')
pId = a.get("ParentId")
user = a.get('OwnerUserId')
if(type == '2'):
if(user is not None):
print (user + questions[pId])