-
Notifications
You must be signed in to change notification settings - Fork 14
/
tk_data.py
521 lines (464 loc) · 18.5 KB
/
tk_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
#!/usr/bin/python
#
# ThotKeeper -- a personal daily journal application.
#
# Copyright (c) 2004-2018 C. Michael Pilato. All rights reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE file which can be found at the top level of the ThotKeeper
# distribution.
#
# Website: http://www.thotkeeper.org/
import os
import shutil
import tempfile
import xml.sax
TK_DATA_VERSION = 1
# sorted() is new to Python 2.4, but an implementation of it that works for
# our list-sorting needs is easy enough to patch in for older versions.
try:
mysorted = sorted
del(mysorted)
except NameError:
def sorted(list):
if list is None:
return None
newlist = list[:]
newlist.sort()
return newlist
# sets (and the set() function) are new to Python 2.4, but an
# implementation of it that works for our list-sorting needs is easy
# enough to patch in for older versions.
try:
myset = set()
del(myset)
except NameError:
class MySet:
def __init__(self):
self.items = {}
def add(self, thing):
self.items[thing] = None
def remove(self, thing):
del self.items[thing]
def __iter__(self):
return self.items.keys().__iter__()
def set():
return MySet()
class TKEntry:
def __init__(self, author='', subject='', text='',
year=None, month=None, day=None, id=None, tags=[]):
self.author = author
self.subject = subject
self.text = text
self.year = year
self.month = month
self.day = day
self.id = id
self.tags = tags
def get_author(self):
return self.author
def get_subject(self):
return self.subject
def get_text(self):
return self.text
def get_date(self):
return self.year, self.month, self.day
def get_id(self):
return self.id
def get_tags(self):
return self.tags
class TKEntries:
def __init__(self):
self.entry_tree = {}
self.tag_tree = {}
self.listeners = []
self.tag_listeners = []
self.author_name = None
self.author_global = True
def register_listener(self, func):
"""Append FUNC to the list of functions called whenever one of
the diary entries changes. FUNC is a callback which accepts
the following: this instance, an event, year, month, and day."""
self.listeners.append(func)
def register_tag_listener(self, func):
self.tag_listeners.append(func)
def enumerate_entries(self, func):
"""Call FUNC for each diary entry, ordered by time and
intra-day index. FUNC is a callback which accepts a TKEntry
parameter."""
years = self.get_years()
years.sort()
for year in years:
months = self.get_months(year)
months.sort()
for month in months:
days = self.get_days(year, month)
days.sort()
for day in days:
ids = self.get_ids(year, month, day)
ids.sort()
for id in ids:
func(self.get_entry(year, month, day, id))
def enumerate_tag_entries(self, func):
tags = sorted(self.get_tags())
for tag in tags:
entries = sorted(self.get_entries_by_tag(tag))
for entry in entries:
func(entry, tag)
def _update_tags(self, oldtags, newtags, entry):
"""Update the tag set association for ENTRY. OLDTAGS are the
tags is used to carry; NEWTAGS are the tags it now carries.
Notify the tag listeners of relevant changes. If this change
removes the last association of an entry with a given tag,
prune the tag."""
addtags = filter(lambda x: x not in oldtags, newtags)
removetags = filter(lambda x: x not in newtags, oldtags)
for tag in newtags:
for func in self.tag_listeners:
func(tag, entry, True)
for tag in addtags:
if not self.tag_tree.has_key(tag):
self.tag_tree[tag] = set()
self.tag_tree[tag].add((entry.year, entry.month,
entry.day, entry.id))
for tag in removetags:
if not self.tag_tree.has_key(tag):
continue
entry_key = (entry.year, entry.month, entry.day, entry.id)
if entry_key in self.tag_tree[tag]:
self.tag_tree[tag].remove(entry_key)
for func in self.tag_listeners:
func(tag, entry, False)
if not self.tag_tree[tag]:
del self.tag_tree[tag]
def store_entry(self, entry):
year, month, day = entry.get_date()
if not self.entry_tree.has_key(year):
self.entry_tree[year] = {}
if not self.entry_tree[year].has_key(month):
self.entry_tree[year][month] = {}
if not self.entry_tree[year][month].has_key(day):
self.entry_tree[year][month][day] = {}
id = entry.get_id()
oldtags = []
if self.entry_tree[year][month][day].has_key(id):
oldtags = sorted(self.entry_tree[year][month][day][id].tags)
self.entry_tree[year][month][day][id] = entry
newtags = sorted(entry.tags)
self._update_tags(oldtags, newtags, entry)
for func in self.listeners:
func(entry, year, month, day, id)
def remove_entry(self, year, month, day, id):
entry = self.entry_tree[year][month][day][id]
oldtags = entry.tags
self._update_tags(oldtags, [], entry)
del self.entry_tree[year][month][day][id]
if not len(self.entry_tree[year][month][day].keys()):
del self.entry_tree[year][month][day]
if not len(self.entry_tree[year][month].keys()):
del self.entry_tree[year][month]
if not len(self.entry_tree[year].keys()):
del self.entry_tree[year]
for func in self.listeners:
func(None, year, month, day, id)
def get_years(self):
"""Return the years which have days with associated TKEntry
objects."""
return self.entry_tree.keys()
def get_months(self, year):
"""Return the months in YEAR which have days with associated
TKEntry objects."""
return self.entry_tree[year].keys()
def get_days(self, year, month):
"""Return the days in YEAR and MONTH which have associated
TKEntry objects."""
return self.entry_tree[year][month].keys()
def get_ids(self, year, month, day):
"""Return the IDS in YEAR, MONTH, and DAY which have associated
TKEntry objects."""
return self.entry_tree[year][month][day].keys()
def get_tags(self):
return self.tag_tree.keys()
def get_entries_by_tag(self, tag):
entry_keys = self.tag_tree[tag]
return map(lambda x: self.entry_tree[x[0]][x[1]][x[2]][x[3]],
entry_keys)
def get_entries_by_partial_tag(self, tagstart):
"""Return all the entries that start with tagstart"""
tagstartsep = tagstart + '/'
taglist = filter(lambda x: ((x==tagstart) or (x.startswith(tagstartsep))),
self.tag_tree.keys())
entrylist = map(self.get_entries_by_tag, taglist)
return reduce(lambda x,y: x+y, entrylist)
def get_entry(self, year, month, day, id):
"""Return the TKEntry associated with YEAR, MONTH, and DAY,
or None if no such entry exists."""
try:
return self.entry_tree[year][month][day][id]
except:
return None
def get_first_id(self, year, month, day):
"""Return the id of the first entry for that day"""
try:
day_keys = self.entry_tree[year][month][day].keys()
day_keys.sort()
return day_keys[0]
except:
return None
def get_last_id(self, year, month, day):
"""Return the id of the last entry for that day"""
try:
day_keys = self.entry_tree[year][month][day].keys()
day_keys.sort()
return day_keys[-1]
except:
return None
def get_new_id(self, year, month, day):
"""Return the first unused id for a given day."""
id = self.get_last_id(year, month, day)
if id is None:
return None
else:
return id + 1
def get_id_pos(self, year, month, day, id):
"""Return 1-based position of ID in the ordered list of
entries for YEAR, MONTH, DAY. If ID is not found, return the
position in that list it would hold if appended to the list (1
if the list is empty; number_of_entries + 1 otherwise)."""
try:
day_keys = self.entry_tree[year][month][day].keys()
day_keys.sort()
except:
day_keys = []
try:
return day_keys.index(id) + 1
except:
return len(day_keys) + 1
def get_next_id(self, year, month, day, id):
"""Return the id of the entry (in the set of entries for YEAR,
MONTH, DAY) which follows the entry for ID, or None if no
entries follow the one for ID."""
try:
day_keys = self.entry_tree[year][month][day].keys()
day_keys.sort()
idx = day_keys.index(id)
return day_keys[idx+1]
except:
return None
def get_prev_id(self, year, month, day, id):
"""Return the id of the entry (in the set of entries for YEAR,
MONTH, DAY) which precedes the entry for ID, or the last entry
for that day if no entry for ID can be found."""
try:
day_keys = self.entry_tree[year][month][day].keys()
day_keys.sort()
idx = day_keys.index(id)
return day_keys[idx-1]
except:
return self.get_last_id(year, month, day)
def get_author_name(self):
return self.author_name
def get_author_global(self):
return self.author_global
def set_author_name(self, name):
self.author_name = name
def set_author_global(self, enable):
self.author_global = enable
class TKDataVersionException(Exception):
pass
class TKDataParser(xml.sax.handler.ContentHandler):
"""XML Parser class for reading and writing diary data files.
The diary data files currently use a single XML container tag,
<diary>, which carries a 'version' attribute to indicate the
format of the data it contains. A missing version attribute
indicates version 0 of the format. Here are the supported
versions and their formats:
Version 0 (ThotKeeper 0.1): The original format.
<diary [version="0"]>
<entries>
<entry year="YYYY" month="M" day="D">
<author>CDATA</author>
<subject>CDATA</subject>
<text>CDATA</text>
</entry>
...
</entries>
</diary>
Version 1 (ThotKeeper 0.2): Adds an "id" attribute to entries for the
purposes of distinguishing multiple entries for a given day. Adds
an optional <tags> tag to entries, which contains 1 or more <tag>
tags.
<diary version="1">
<author global="True/False">CDATA</author>
<entries>
<entry year="YYYY" month="M" day="D" id="N">
<author>CDATA</author>
<subject>CDATA</subject>
<tags>
<tag>CDATA</tag>
...
</tags>
<text>CDATA</text>
</entry>
...
</entries>
</diary>
"""
TKJ_TAG_AUTHOR = 'author'
TKJ_TAG_DIARY = 'diary'
TKJ_TAG_ENTRIES = 'entries'
TKJ_TAG_ENTRY = 'entry'
TKJ_TAG_SUBJECT = 'subject'
TKJ_TAG_TAG = 'tag'
TKJ_TAG_TAGS = 'tags'
TKJ_TAG_TEXT = 'text'
_valid_parents = {
TKJ_TAG_AUTHOR : [ TKJ_TAG_DIARY, TKJ_TAG_ENTRY ],
TKJ_TAG_DIARY : [ ],
TKJ_TAG_ENTRIES : [ TKJ_TAG_DIARY ],
TKJ_TAG_ENTRY : [ TKJ_TAG_ENTRIES ],
TKJ_TAG_SUBJECT : [ TKJ_TAG_ENTRY ],
TKJ_TAG_TAG : [ TKJ_TAG_TAGS ],
TKJ_TAG_TAGS : [ TKJ_TAG_ENTRY ],
TKJ_TAG_TEXT : [ TKJ_TAG_ENTRY ],
}
def __init__(self, entries):
self.cur_entry = None
self.buffer = None
self.entries = entries
self.tag_stack = []
self.entries.set_author_global(False)
# If we are loading a file, we want there to be no global
# author *unless* one is actually found in the file (but the
# default should still be True for new files
def _validate_tag(self, name, parent_tag):
valid_parents = self._valid_parents[name]
if parent_tag is None and not valid_parents:
return
if parent_tag and valid_parents and parent_tag in valid_parents:
return
raise Exception("Unexpected tag (%s) in parent (%s)" \
% (name, parent_tag and parent_tag or ""))
def startElement(self, name, attrs):
# Validate ...
parent_tag = self.tag_stack and self.tag_stack[-1] or None
self._validate_tag(name, parent_tag)
self.tag_stack.append(name)
# ... and operate.
if name == self.TKJ_TAG_DIARY:
try:
version = int(attrs['version'])
except:
version = 0
if version > TK_DATA_VERSION:
raise TKDataVersionException("Data version newer than program "
"version; please upgrade.")
elif name == self.TKJ_TAG_ENTRY:
attr_names = attrs.keys()
if not (('month' in attr_names) \
and ('year' in attr_names) \
and ('day' in attr_names)):
raise Exception("Invalid XML file.")
self.cur_entry = dict(attrs)
if not ('id' in attr_names):
self.cur_entry['id'] = '1'
elif name == self.TKJ_TAG_AUTHOR:
if not self.cur_entry:
if (not 'global' in attrs.keys()):
raise Exception("Invalid XML file.")
if (attrs['global'].lower() == 'false'):
self.entries.set_author_global(False)
else:
self.entries.set_author_global(True)
self.buffer = ''
elif name == self.TKJ_TAG_TAGS:
self.cur_entry['tags'] = []
elif name == self.TKJ_TAG_SUBJECT \
or name == self.TKJ_TAG_TAG \
or name == self.TKJ_TAG_TEXT:
self.buffer = ''
def characters(self, ch):
if self.buffer is not None:
self.buffer = self.buffer + ch
return
def endElement(self, name):
# Pop from the tag stack ...
del self.tag_stack[-1]
# ... and operate.
if name == self.TKJ_TAG_ENTRY:
self.entries.store_entry(TKEntry(self.cur_entry.get('author', ''),
self.cur_entry.get('subject', ''),
self.cur_entry.get('text', ''),
int(self.cur_entry['year']),
int(self.cur_entry['month']),
int(self.cur_entry['day']),
int(self.cur_entry['id']),
self.cur_entry.get('tags', [])))
self.cur_entry = None
elif name == self.TKJ_TAG_AUTHOR:
if self.cur_entry:
self.cur_entry['author'] = self.buffer
else:
self.entries.set_author_name(self.buffer)
self.buffer = None
elif name == self.TKJ_TAG_SUBJECT \
or name == self.TKJ_TAG_TEXT:
self.cur_entry[name] = self.buffer
self.buffer = None
elif name == self.TKJ_TAG_TAG:
self.cur_entry['tags'].append(self.buffer)
def parse_data(datafile):
"""Parse an XML file, returning a TKEntries object."""
entries = TKEntries()
if datafile:
handler = TKDataParser(entries)
xml.sax.parse(datafile, handler)
return entries
def unparse_data(datafile, entries):
"""Unparse a TKEntries object into an XML file, using an
intermediate tempfile to try to reduce the chances of clobbering a
previously-good datafile with a half-baked one."""
fdesc, fname = tempfile.mkstemp()
fp = os.fdopen(fdesc, 'w')
try:
fp.write('<?xml version="1.0"?>\n'
'<diary version="%d">\n' % (TK_DATA_VERSION))
if not entries:
entries = TKEntries()
if (entries.get_author_name() != None):
fp.write(' <author global="%s">%s</author>\n'
% (entries.get_author_global() and "true" or "false",
entries.get_author_name().encode('utf8')))
fp.write(' <entries>\n')
def _write_entry(entry):
year, month, day = entry.get_date()
id = entry.get_id()
tags = entry.get_tags()
fp.write(' <entry year="%s" month="%s" day="%s" id="%s">\n'
% (year, month, day, id))
author = xml.sax.saxutils.escape(entry.get_author())
if author:
fp.write(' <author>%s</author>\n'
% (author.encode('utf8')))
subject = xml.sax.saxutils.escape(entry.get_subject())
if subject:
fp.write(' <subject>%s</subject>\n'
% (subject.encode('utf8')))
if len(tags):
fp.write(' <tags>\n')
for tag in tags:
fp.write(' <tag>%s</tag>\n'
% (xml.sax.saxutils.escape(tag.encode('utf8'))))
fp.write(' </tags>\n')
fp.write(' <text>%s</text>\n'
% (xml.sax.saxutils.escape(entry.get_text().encode('utf8'))))
fp.write(' </entry>\n')
entries.enumerate_entries(_write_entry)
fp.write(' </entries>\n</diary>\n')
fp.close()
# We use shutil.move() instead of os.rename() because the former
# can deal with moves across volumes while the latter cannot.
shutil.move(fname, datafile)
finally:
if os.path.exists(fname):
os.unlink(fname)