In [15]:
import pandas as pd
import json
import re
import random

In [30]:
def generate_multiple_options(options: list, answer, amount=4):
    result = {answer}
    candidates = list(random.choices(options, k=amount))
    result.update([item for item in candidates if item != answer][:3])
    return list(result)

In [22]:
def generate_quiz_records(file_path):
    prefix_pattern = re.compile(r"^【[^】]+】")
    df = pd.read_csv(file_path)
    df.WordDef = df.WordDef.apply(lambda x: prefix_pattern.sub("", x))

    all_WordPhonetic = list(
        df[~df.WordPhonetic.str.contains(r"[A-Za-z]")]["WordPhonetic"].unique()
    )
    all_WordDef = list(df["WordDef"].unique())
    # all_Word = list(df["Word"].unique())
    records = []
    for index in df.index[:]:
        detail = df.loc[index].to_dict()
        answer = detail["WordDef"]
        records.append(
            {
                "question": detail["Word"],
                "options": generate_multiple_options(
                    all_WordDef,
                    answer,
                ),
                "answer": answer,
            }
        )
        if detail["WordPhonetic"] != detail["Word"]:
            answer = detail["WordPhonetic"]
            records.append(
                {
                    "question": detail["Word"],
                    "options": generate_multiple_options(
                        all_WordPhonetic,
                        answer,
                    ),
                    "answer": answer,
                }
            )
    return records

In [None]:
for level in range(1, 6):
    file_path = f"N{level}词汇.csv"
    records = generate_quiz_records(file_path)

    with open(f"N{level}_words.json", "w", encoding="utf-8") as fp:
        fp.write(json.dumps(records, ensure_ascii=False))

In [31]:
records = generate_quiz_records("日语能力考试10000词高效速记.csv")
print(records[:10])
with open(f"words_10000.json", "w", encoding="utf-8") as fp:
    fp.write(json.dumps(records, ensure_ascii=False))

[{'question': '間', 'options': ['间，间隔', '抗体', '（食欲等）不振；（成绩）不好；商业萧条', '能干的人，干将；赠送方'], 'answer': '间，间隔'}, {'question': '間', 'options': ['あいだ', 'でんたつ', 'ちょうりし', 'なつかしむ'], 'answer': 'あいだ'}, {'question': '青', 'options': ['收成', '（豆子、果实等）连续长开状', '蓝色；青色；绿色；不成熟', '市议会'], 'answer': '蓝色；青色；绿色；不成熟'}, {'question': '青', 'options': ['にち', 'あお', 'きず', 'さわがしい'], 'answer': 'あお'}, {'question': '赤', 'options': ['红色；完全', '搬运；前往', '坚实', '使流动，冲；传播'], 'answer': '红色；完全'}, {'question': '赤', 'options': ['おかず', 'そそくさ', 'あか', 'まいにち'], 'answer': 'あか'}, {'question': '赤ちゃん', 'options': ['婴儿', '时髦，时尚', '用手指', '归还（大的东西）；（香港、澳门的）回归'], 'answer': '婴儿'}, {'question': '赤ちゃん', 'options': ['みおさめ', 'ぬける', 'あかちゃん', 'ちぢまる'], 'answer': 'あかちゃん'}, {'question': '赤ん坊', 'options': ['婴儿', '斗笠', '外面', '燃烧'], 'answer': '婴儿'}, {'question': '赤ん坊', 'options': ['つまらない', 'はいご', 'あかんぼう', 'ようせき'], 'answer': 'あかんぼう'}]


In [40]:
import pandas as pd

df = pd.DataFrame(records)
df.to_csv("japanese_kana.csv", index=False)

In [43]:
all_hiragana = list(df.hiragana.unique())
all_katakana = list(df.katakana.unique())
records = []
for index in df.index[:]:
    detail = df.loc[index].to_dict()
    question = detail["hiragana"]
    answer = detail["katakana"]
    records.append(
        {
            "question": question,
            "options": generate_multiple_options(
                all_katakana,
                answer,
            ),
            "answer": answer,
        }
    )
    question = detail["katakana"]
    answer = detail["hiragana"]
    records.append(
        {
            "question": question,
            "options": generate_multiple_options(
                all_hiragana,
                answer,
            ),
            "answer": answer,
        }
    )

In [44]:
print(records[:10])
with open(f"kana_quiz.json", "w", encoding="utf-8") as fp:
    fp.write(json.dumps(records, ensure_ascii=False))

[{'question': 'あ', 'options': ['ミュ', 'ア', 'レ', 'ソ'], 'answer': 'ア'}, {'question': 'ア', 'options': ['を', 'あ', 'びゃ', 'みゅ'], 'answer': 'あ'}, {'question': 'い', 'options': ['ホ', 'ゲ', 'シャ', 'イ'], 'answer': 'イ'}, {'question': 'イ', 'options': ['り', 'しょ', 'む', 'い'], 'answer': 'い'}, {'question': 'う', 'options': ['ウ', 'ヂュ', 'ミョ', 'ビュ'], 'answer': 'ウ'}, {'question': 'ウ', 'options': ['う', 'ひゅ', 'せ', 'や'], 'answer': 'う'}, {'question': 'え', 'options': ['ム', 'エ', 'ピャ', 'ヒョ'], 'answer': 'エ'}, {'question': 'エ', 'options': ['ぱ', 'ちょ', 'え', 'じょ'], 'answer': 'え'}, {'question': 'お', 'options': ['ネ', 'ミョ', 'オ', 'シャ'], 'answer': 'オ'}, {'question': 'オ', 'options': ['ぜ', 'お', 'わ', 'す'], 'answer': 'お'}]
