/
make-text-cards.py
149 lines (124 loc) · 5.25 KB
/
make-text-cards.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python
#
# Usage:
# python make-text-cards.py <deck> <source-name> <input-text-file> <output-csv-file>
import csv
import json
from typing import Dict, List, Optional
from dotenv import load_dotenv
from markdown import markdown
from openai import OpenAI
# Load environment variables. Create a file named `.env` in the same directory as this file
# and add the following line to it:
#
# OPENAI_API_KEY="your-api-key"
load_dotenv()
def generate_cards(input_texts: List[str], *, source: Optional[str] = None) -> List[Dict[str, str]]:
"""Read in a file of text snippets and convert them into cards.
Output fields should be:
- Front: The original text
- Back: The translation
- Notes: Extra notes or context generated by the model for text marked with
"[[...]]".
- Source: The source of the text snippet.
"""
# We need to build up a sample dialog between the "user" and the
# "assistant", before asking our actual question. This "teaches" the model
# how to respond, essentially by putting words into its mouth.
system_message = """
You are a translator helping prepare Anki cards. You will be given short text in
Spanish, which will put onto the front of cards. Your job is to translate the
short text to English. Following the translation, you should briefly break break
down any phrases surrounded by [[ ]] and explain how they work. Do not include
any explanations if there are no [[ ]].
"""
prompt_1 = "Tenía un alma de tigre."
response_1 = {
"translation": "He had a tiger's soul."
}
prompt_2 = "Ni [[siquiera]] hay una gramola."
response_2 = {
"translation": "There isn't even a jukebox.",
"explanations": "- **siquiera:** The word \"siquiera\" in Spanish is used to add emphasis, typically in negative contexts, similar to the English word \"even.\" In this sentence, \"Ni siquiera\" translates directly to \"not even,\" emphasizing that there isn’t a jukebox at all.",
}
# Declare the function that the model should call.
tools = [{
"type": "function",
"function": {
"name": "add_data_to_card",
"description": "Add the translation (and optionally explanations) to the current card.",
"parameters": {
"type": "object",
"properties": {
"translation": { "type": "string" },
"explanations": { "type": "string" },
},
"required": ["translation"]
}
}
}]
# Generate the translations using GPT-3.5.
client = OpenAI()
result = []
for input_text in input_texts:
print(f"Input: {input_text}")
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": prompt_1},
{"role": "function", "name": "add_data_to_card", "content": json.dumps(response_1)},
{"role": "user", "content": prompt_2},
{"role": "function", "name": "add_data_to_card", "content": json.dumps(response_2)},
{"role": "user", "content": input_text},
],
tools = tools,
tool_choice = {"type": "function", "function": {"name": "add_data_to_card"}},
)
# Extract the tool call from the response.
tool_calls = response.choices[0].message.tool_calls
assert len(tool_calls) == 1
args = json.loads(tool_calls[0].function.arguments)
print(f"{json.dumps(args, indent=4)}")
# Convert [[ and ]] to ** and **.
front = input_text.replace("[[", "**").replace("]]", "**")
# Convert the explanations to Markdown.
if args.get("explanations"):
explanations = markdown(args["explanations"])
else:
explanations = None
result.append({
"Front": markdown(front),
"Back": markdown(args["translation"]),
"Notes": explanations,
"Source": source,
})
return result
def texts_to_csv(in_texts_path: str, out_csv_path: str, *, deck: str, source: Optional[str] = None) -> None:
"""Read in a file of text snippets separated by "\\n--\\n" and write the
generated cards to a CSV file."""
with open(in_texts_path, "r") as f:
input_texts = f.read().strip().split("\n--\n")
cards = generate_cards(input_texts, source=source)
# Write CSV correctly using a library. Note that Anki imports work much
# better if we provide a header.
with open(out_csv_path, "w", newline="") as f:
f.write(f"""#separator:Semicolon
#html:true
#notetype:Text Snippet
#deck:{deck}
#columns:""")
writer = csv.DictWriter(f, fieldnames=["Front", "Back", "Notes", "Source"], delimiter=";")
writer.writeheader()
writer.writerows(cards)
# Command line entry point.
if __name__ == "__main__":
import sys
if len(sys.argv) != 5:
print(f"Usage: {sys.argv[0]} <deck> <source-name> <input-text-file> <output-csv-file>")
sys.exit(1)
deck = sys.argv[1]
source = sys.argv[2]
in_texts_path = sys.argv[3]
out_csv_path = sys.argv[4]
texts_to_csv(in_texts_path, out_csv_path, deck=deck, source=source)