In [128]:
# Read glossary file.
with open('./data/glossary_gei.txt', 'r') as f:
    lines = f.readlines()

In [129]:
# Clean up unnecesary format.
import re

RE_TAG = re.compile('<.*?>')

glossary = []

for line in lines:
    if line.startswith(':'):
        # The term always starts with ':'
        term = line.replace(':', '').replace('\n', '')
        glossary.append(term)
    elif line.strip() == '{<font size="3">':
        # If a line starts with '{' is declaring a format
        # and some HTML attributes that we don't need.
        continue
    else:
        # The definition may contain some HTML tags, so we need
        # To remove them.
        definition = re.sub(RE_TAG, '', line).replace('{', '').replace(' } text/html', '').replace('&nbsp;} text/html', '').strip()
        glossary.append(definition)

In [130]:
# Due to the inconsistency in the format, some lines may be empty.
# We need to clean them up to avoid errors when paring terms with their definition.
glossary = [item for item in glossary if item != '']

In [131]:
# Group each term with its definition.
mapped = [glossary[i:i + 2] for i in range(0, len(glossary), 2)]

In [132]:
# Create Anki card's model.
import random
import genanki

model_id = random.randrange(1 << 30 << 31)

model = genanki.Model(
    model_id,
    "Glossary Card Model",
    fields=[
        {"name": "Question"},
        {"name": "Answer"},
    ],
    templates=[
        {
            "name": "Card 1",
            "qfmt": "{{Question}}",
            "afmt": '{{FrontSide}}<hr id="answer">{{Answer}}',
        },
    ],
)

In [133]:
# Use Anki model to generate notes.
notes = [genanki.Note(model=model, fields=pair) for pair in mapped]

In [134]:
# Create Anki deck and add notes.
deck_id = random.randrange(1 << 30 << 31)

deck = genanki.Deck(
    deck_id,
    'GEI',
)

for note in notes:
    deck.add_note(note)

In [135]:
# Generate Anki .apkg file.
genanki.Package(deck).write_to_file('output.apkg')

In [136]:
# Generate .csv file just in case.
import csv

fields = ['Term', 'Definition']

with open('output.csv', 'w') as file:
    writer = csv.writer(file)
    writer.writerow(fields)
    writer.writerows(mapped)