In [1]:
import os
import re

import pandas as pd

from anki.collection import Collection
from jinja2 import Environment, FileSystemLoader
from IPython.display import display, HTML
from pprint import pprint

In [2]:
path_to_collection = os.environ.get("PATH_TO_COLLECTION")
col = Collection(path_to_collection)

In [3]:
# Jinja Setup
loader = FileSystemLoader('../templates')
env = Environment(loader = loader)
furigana_template = env.get_template('html-furigana.jinja')

In [4]:
def make_note_from_row(row, note_model, note_array):

    note = col.new_note(note_model)
    
    for (k,v) in row.items():
        if k == 'is_new':
            continue
        
        note[k]= str(v).strip()

    row['is_new'] = not bool(note.duplicate_or_empty())
    if row['is_new']:
        note_array.append(note)
        
    return row

In [5]:
def plain_text_to_html_kanji(match, target_word_pattern, template):

    if re.match(target_word_pattern, match.group(0)):
        output = template.render(group = match.groups(), is_target_word=True)
        return output
    
    output = template.render(group= match.groups(), is_target_word=False)
    return output

In [6]:
def model_furigana_series_from_template(
        row,
        template,
        kanji_text_pattern,
        sentence_col_name,
        base_col_name,
        output_col_name):
    
    # If the row does not have a sample sentence yet, skip it.
    if pd.isna(row[sentence_col_name]):
        return row
    
    # Strip blank space from target word and sample sentence
    sentence_string = row[sentence_col_name].strip()
    target_word = row[base_col_name].strip()

    # Get a list of the kanjis in the target word
    kanji_pattern = r'[一-龯]+'
    target_kanjis = re.findall(kanji_pattern, target_word)

    target_word_pattern = ''
    
    if target_kanjis:
        target_word_pattern = rf'([{''.join(target_kanjis)}]+)[（\(]([ぁ-ん]+?)[）\)]([ぁ-ん]*)'
    
    # replace all kanjis using the html template
    sentence_string = re.sub(
        kanji_text_pattern,
        lambda match: plain_text_to_html_kanji(match, target_word_pattern, template),
        sentence_string)

    row[output_col_name] = re.sub(r'[\n\t]', '', sentence_string)
    return row

In [7]:
# Get kotoba deck references
kotoba_deck_id = int(os.environ.get("KOTOBA_DECK_ID"))
kotoba_model_id = os.environ.get("KOTOBA_MODEL_ID")
kotoba_model = col.models.get(kotoba_model_id)
kotoba_extra_fields = ['generated-furigana']
kotoba_cols = [
    field['name'] 
    for field in kotoba_model['flds'] 
    if field['name'] not in kotoba_extra_fields]

base_col = 'word'
sentence_col = 'in-context'
output_col = 'generated-furigana'
    
# Read clipboard data
kotoba_data = pd.read_clipboard(
    sep='|',
    usecols=[i+1 for i, _  in enumerate(kotoba_cols)],
    skiprows=2,
    names=kotoba_cols)

# Hard fix to match the prompt output with the desired input
kotoba_data[['word','reading']] = kotoba_data[['reading','word']]

# Setting up words that use kana alone
simple_word_locator = kotoba_data.loc[:,'word'].str.isspace()
kotoba_data.loc[simple_word_locator, 'word'] = kotoba_data.loc[simple_word_locator, 'reading']
kotoba_data.loc[simple_word_locator, 'reading'] = ''

# Creating and formating new anki notes
new_notes_in_clipboard_data = []
kanji_text_pattern = r'([一-龯々]+)[（\(]([ぁ-ん]+)[）\)]*([ぁ-ん]*)'

kotoba_data = (
    kotoba_data
        .apply(
            model_furigana_series_from_template, 
            axis='columns',
            template=furigana_template,
            kanji_text_pattern= kanji_text_pattern,
            sentence_col_name=sentence_col,
            base_col_name=base_col,
            output_col_name=output_col)
        .apply(
            make_note_from_row,
            axis='columns',
            note_model=kotoba_model,
            note_array=new_notes_in_clipboard_data)
        )


if not kotoba_data.empty:

    print('\nNew notes')
    display(kotoba_data.loc[kotoba_data['is_new']])

    print('\nDuplicate or empty notes')
    display(kotoba_data.loc[~kotoba_data['is_new']])

    template = env.get_template('anki-css-styles.jinja')
    
    display(kotoba_data[sentence_col].iloc[0])
    
    sentence_output = template.render(
        input=kotoba_data[output_col].iloc[0]
    )

    display(HTML(sentence_output))
    
    display(kotoba_data['context-translation'].iloc[0])
else: 

    print('No dataframe found in clipboard')



New notes


Unnamed: 0,word,reading,grammatical-function,functional,notes,in-context,context-translation,generated-furigana,is_new



Duplicate or empty notes


Unnamed: 0,word,reading,grammatical-function,functional,notes,in-context,context-translation,generated-furigana,is_new
0,遊びます,あそびます,Verb,"jugar, divertirse ...",,週末(しゅうまつ)には友達(ともだち)と公園(こうえん)でフリスビーをして遊(あそ)びます。,"On weekends, I play frisbee with my friends a...","<ruby class=""kanji"">週末 <rp>(</r...",False
1,泳ぎます,およぎます,Verb,nadar ...,,夏休(なつやす)みの間(あいだ)、毎日(まいにち)プールで泳(およ)いで、体力(たいりょく...,"During summer vacation, I swam every day in t...","<ruby class=""kanji"">夏休 <rp>(</r...",False
2,迎えます,むかえます,Verb,"acoger, recibir (a gente) ...",,空港(くうこう)で出張(しゅっちょう)から帰(かえ)ってきた父(ちち)を迎(むか)えに行(...,I went to the airport to pick up my father re...,"<ruby class=""kanji"">空港 <rp>(</r...",False
3,疲れます,つかれます,Verb,cansarse ...,,長時間(ちょうじかん)の会議(かいぎ)が終(お)わって、皆(みんな)とても疲(つか)れまし...,"After the long meeting, everyone was very tir...","<ruby class=""kanji"">長時間 <rp>(</...",False
4,出します,だします,Verb,enviar [una carta] ...,[手紙(てがみ)を～],昨日(きのう)、先生(せんせい)に感謝(かんしゃ)の気持(きも)ちを込(こ)めて手紙(てが...,"Yesterday, I sent a letter to my teacher expr...","<ruby class=""kanji"">昨日 <rp>(</r...",False
5,入ります,はいります,Verb,entrar [en una cafetería] ...,[喫茶店(きっさてん)に～],雨(あめ)が降(ふ)り始(はじ)めたので、急(きゅう)いで喫茶店(きっさてん)に入(はい)...,"When it started raining, I quickly went into ...","<ruby class=""kanji"">雨 <rp>(</rp...",False
6,出ます,でます,Verb,salir [de una cafetería] ...,[喫茶店(きっさてん)を～],用事(ようじ)があって、コーヒーを飲(の)み終(お)わった後(あと)すぐに喫茶店(きっさて...,"I had an errand, so I left the coffee shop ri...","<ruby class=""kanji"">用事 <rp>(</r...",False
7,結婚します,けっこんします,Verb,casarse ...,,彼女(かのじょ)とは来年(らいねん)の春(はる)に結婚(けっこん)する予定(よてい)です。...,I plan to marry her next spring. ...,"<ruby class=""kanji"">彼女 <rp>(</r...",False
8,買い物します,かいものします,Verb,hacer compras ...,,新年(しんねん)のセールで色々(いろいろ)な服(ふく)を買(か)い物(もの)しました。 ...,I did a lot of shopping for clothes at the Ne...,"<ruby class=""kanji"">新年 <rp>(</r...",False
9,食事します,しょくじします,Verb,comer ...,,昨夜(さくや)、高級(こうきゅう)なレストランで友人(ゆうじん)と食事(しょくじ)をしました。,"Last night, I had dinner with a friend at a f...","<ruby class=""kanji"">昨夜 <rp>(</r...",False


' 週末(しゅうまつ)には友達(ともだち)と公園(こうえん)でフリスビーをして遊(あそ)びます。  '

' On weekends, I play frisbee with my friends at the park. '

In [8]:
if kotoba_data.empty:
    pass

else:

    response = input("Are new inputs ok?")

    if re.match(r'^([Yy]|Yes|yes|1)$', response):
        
        for note in new_notes_in_clipboard_data:
            col.add_note(note, kotoba_deck_id)