/
noteParse.py
87 lines (70 loc) · 2.93 KB
/
noteParse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# noteParse contains methods that are used in breaking
# apart evernote .enex files and giving them a parsed
# and more accessible format
import xml.etree.ElementTree as ET
import note
import book
import os
def parseNote(fileName):
tree = ET.parse(fileName)
enExport = tree.getroot()
noteTree = enExport[0]
title = noteTree.findall('title')[0].text
author = noteTree.findall('note-attributes')[0].findall('author')[0].text
created = noteTree.findall('created')[0].text
updated = noteTree.findall('updated')[0].text
contentStr = noteTree.findall('content')[0].text
contentStr = contentStr.replace("""<?xml version="1.0" encoding="UTF-8" standalone="no"?>""", "")
contentStr = contentStr.replace("""<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">""","")
contentTree = ET.fromstring(contentStr)
resourceListTree = noteTree.findall('resource')
noteObj = note.Note()
noteObj.author = author
noteObj.title = title
noteObj.created = created
noteObj.updated = updated
noteObj.contentTree = contentTree
noteObj.resources = resourceListTree
for tagNode in noteTree.findall('tag'):
noteObj.addTag(tagNode.text)
return noteObj
def parseNotebook(fileName):
tree = ET.parse(fileName)
enExport = tree.getroot()
noteTree = enExport[0]
bookObj = book.Book()
bookObj.title = os.path.splitext(os.path.basename(fileName))[0]
for noteTree in enExport.findall('note'):
title = noteTree.findall('title')[0].text
try:
author = noteTree.findall('note-attributes')[0].findall('author')[0].text
except:
author = "Joey Kilgore"
created = noteTree.findall('created')[0].text
updated = noteTree.findall('updated')[0].text
contentStr = noteTree.findall('content')[0].text
contentStr = contentStr.replace("""<?xml version="1.0" encoding="UTF-8" standalone="no"?>""", "")
contentStr = contentStr.replace("""<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">""","")
contentStr = contentStr.replace(" ", "")
contentStr = ''.join([i if ord(i) < 128 else '' for i in contentStr]) # remove non ascii
contentTree = ET.fromstring(contentStr.strip())
resourceListTree = noteTree.findall('resource')
noteObj = note.Note()
noteObj.author = author
noteObj.title = title
noteObj.created = created
noteObj.updated = updated
noteObj.contentTree = contentTree
noteObj.resources = resourceListTree
bookObj.notes.append(noteObj)
bookObj.author = noteObj.author
return bookObj
def recursivePrintTree(prefix, node):
print(prefix + "|-->" + node.tag)
for child in node:
recursivePrintTree(prefix+" ", child)
def printTree(fileName):
tree = ET.parse(fileName)
root = tree.getroot()
print(fileName)
recursivePrintTree("", root)