Fixed collect_title() to handle subtitle in (), Rearranged conditions…

… in collect_location() to handle clippings with Page AND Location data
amandasystems · Aug 28, 2011 · 5bf4ca6 · 5bf4ca6
1 parent 2cf767f
commit 5bf4ca6
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 10 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+My Clippings.txt
+*.pyc
diff --git a/kindleclippingsparser.py b/kindleclippingsparser.py
@@ -25,11 +25,11 @@ def parse_note(self, note):
 
         def collect_title(n, i):
             title = unicode()
+            iterm = n[i:].split('\r\n')[0].rfind('(')
             for index, c in enumerate(n[i:]):
                 if c == ' ':
                     # if the next character's an '(', we've found our terminator.
-                    # TODO edge case: there's a sub-title starting with '('. Catch later.
-                    if note[index + 1] == '(':
+                    if index + 1 == iterm:
                         #print "got end of title."
                         return (title, index + 2 + i)
                 if note[index + 1] == '\n':
@@ -79,24 +79,28 @@ def collect_note_highlight(n, i):
                 exit
 
         def collect_location(n, i):
-            if n[i:][:6] == " Loc. ":
-                i += 6
-                loc = unicode()
+            loc = unicode()
+            if n[i:][:4] == ' on ':
+                i += 4
                 for index, c in enumerate(n[i:]):
                     if c == '|':
                         #print "got end-of-location"
-                        return loc.strip(), i + 1 + index
+                        loc = loc.strip() + ','
+                        i += 1 + index
+                        break
                     else:
                         loc += c
-            elif n[i:][:4] == ' on ':
-                i += 4
-                loc = unicode()
+                        
+            if n[i:][:6] == " Loc. ":
+                i += 6
                 for index, c in enumerate(n[i:]):
                     if c == '|':
                         #print "got end-of-location"
                         return loc.strip(), i + 1 + index
                     else:
-                        loc += c
+                        if c != ' ':
+                            loc += c
+
             else:
                 raise self.ParseError("parse error at %d. Expected ' Loc.'" % i)