/
weread-to-readwise.py
executable file
·124 lines (106 loc) · 3.93 KB
/
weread-to-readwise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python3
import utils
import fileinput
import json
def finalize_article(result, article):
if article is not None:
article["text"] = article["text"].strip()
if "note" in article:
article["note"] = article["note"].strip()
if len(result) > 0 and result[-1]["text"] == article["text"]:
result[-1] = article
else:
result.append(article)
def collect_highlights(lines):
article = {
"title": None,
"author": None,
"source_url": None,
"source_type": "Weread",
"category": "books",
}
result = []
pending_article = None
state = "head"
for line in lines:
line = line.strip()
if (
line.startswith("◆ ")
or line.startswith(".h1 ")
or line.startswith(".h2 ")
or line.startswith(".h3 ")
):
state = "body"
if state == "head":
if line.startswith("《") and line.endswith("》"):
article["title"] = line[1:-1]
elif article["author"] is None:
article["author"] = line
elif article["source_url"] is None and (
line.startswith("https://") or line.startswith("<https://")
):
article["source_url"] = (
line if line.startswith("https://") else line[1:-1]
)
elif line == "":
state = "body"
elif state == "body":
if line.startswith("◆ "):
if line.endswith("发表想法"):
finalize_article(result, pending_article)
pending_article = article.copy()
pending_article["text"] = ""
pending_article["note"] = ""
state = "note"
else:
state = "highlight"
finalize_article(result, pending_article)
pending_article = article.copy()
pending_article["text"] = line[1:].lstrip()
elif line != "":
finalize_article(result, pending_article)
pending_article = article.copy()
if (
line.startswith(".h1 ")
or line.startswith(".h2 ")
or line.startswith(".h3 ")
):
pending_article["text"] = line[4:]
pending_article["note"] = line[:3]
else:
pending_article["text"] = line
pending_article["note"] = ".h1"
finalize_article(result, pending_article)
pending_article = None
elif state == "note":
if line.startswith("原文:"):
pending_article["text"] = line[3:]
state = "highlight"
else:
pending_article["note"] = pending_article["note"] + "\n" + line
elif state == "highlight":
if line == "":
state = "highlight-ending"
else:
pending_article["text"] = pending_article["text"] + "\n" + line
elif state == "highlight-ending":
if line == "":
finalize_article(result, pending_article)
pending_article = None
state = "body"
else:
pending_article["text"] = "\n" + pending_article["text"] + "\n" + line
state = "highlight"
finalize_article(result, pending_article)
return result
def main(args):
dry_run = args[1] == "-n" if len(sys.argv) > 1 else False
input_args = args[1:] if not dry_run else args[2:]
highlights = collect_highlights(fileinput.input(input_args))
if dry_run:
print(json.dumps(highlights, indent=2, ensure_ascii=False))
return
utils.create_highlights(highlights)
if __name__ == "__main__":
import sys
main(sys.argv)