/
pdf-expert-to-readwise.py
executable file
·94 lines (74 loc) · 2.73 KB
/
pdf-expert-to-readwise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python3
import utils
import fileinput
import json
def finalize_article(result, article):
if article is not None:
article["text"] = article["text"].strip()
if "note" in article:
article["note"] = article["note"].strip()
result.append(article)
def collect_highlights(lines):
article = {
"title": None,
"author": None,
"source_url": None,
"source_type": "Weread",
"category": "books",
}
result = []
pending_article = None
state = "body"
lineno = 0
for line in lines:
lineno += 1
line = line.strip()
if lineno == 1:
author, title = line[len("# Annotation Summary of ") :].split(
" - ", maxsplit=1
)
title = title.rsplit(".pdf", maxsplit=1)[0]
article["author"] = author
article["title"] = title
elif lineno == 2 and line.startswith("<"):
article["source_url"] = line.strip()[1:-1]
elif line.startswith("#### "):
state = "body"
finalize_article(result, pending_article)
pending_article = article.copy()
pending_article["text"] = line[5:]
pending_article["note"] = ".h1"
finalize_article(result, pending_article)
pending_article = None
elif line.startswith("*Highlight ["):
state = "highlight"
finalize_article(result, pending_article)
pending_article = article.copy()
page = line.split(" [")[1].split("]")[0]
text = line.split("]:* ", maxsplit=1)[1]
pending_article["text"] = f"{text} (Page {page})"
elif line.startswith("*and Note ["):
state = "note"
text = line.split("]:* ", maxsplit=1)[1]
pending_article["note"] = text
if text == ".h1" or text == ".h2" or text == ".h3":
pending_article["text"] = pending_article["text"].rsplit(
" (Page", maxsplit=1
)[0]
elif state == "highlight":
pending_article["text"] = pending_article["text"] + "\n" + line.rstrip()
elif state == "note":
pending_article["note"] = pending_article["note"] + "\n" + line.rstrip()
finalize_article(result, pending_article)
return result
def main(args):
dry_run = args[1] == "-n" if len(sys.argv) > 1 else False
input_args = args[1:] if not dry_run else args[2:]
highlights = collect_highlights(fileinput.input(input_args))
if dry_run:
print(json.dumps(highlights, indent=2, ensure_ascii=False))
return
utils.create_highlights(highlights)
if __name__ == "__main__":
import sys
main(sys.argv)