diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e20471c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +My Clippings.txt +*.pyc \ No newline at end of file diff --git a/kindleclippingsparser.py b/kindleclippingsparser.py index 0130727..19f9124 100755 --- a/kindleclippingsparser.py +++ b/kindleclippingsparser.py @@ -25,11 +25,11 @@ def parse_note(self, note): def collect_title(n, i): title = unicode() + iterm = n[i:].split('\r\n')[0].rfind('(') for index, c in enumerate(n[i:]): if c == ' ': # if the next character's an '(', we've found our terminator. - # TODO edge case: there's a sub-title starting with '('. Catch later. - if note[index + 1] == '(': + if index + 1 == iterm: #print "got end of title." return (title, index + 2 + i) if note[index + 1] == '\n': @@ -79,24 +79,28 @@ def collect_note_highlight(n, i): exit def collect_location(n, i): - if n[i:][:6] == " Loc. ": - i += 6 - loc = unicode() + loc = unicode() + if n[i:][:4] == ' on ': + i += 4 for index, c in enumerate(n[i:]): if c == '|': #print "got end-of-location" - return loc.strip(), i + 1 + index + loc = loc.strip() + ',' + i += 1 + index + break else: loc += c - elif n[i:][:4] == ' on ': - i += 4 - loc = unicode() + + if n[i:][:6] == " Loc. ": + i += 6 for index, c in enumerate(n[i:]): if c == '|': #print "got end-of-location" return loc.strip(), i + 1 + index else: - loc += c + if c != ' ': + loc += c + else: raise self.ParseError("parse error at %d. Expected ' Loc.'" % i)