In [2]:
import pandas as pd
import spacy
from lemminflect import getAllInflections
import random
import pattern.en
import pattern.es
import mlconjug3
from collections import OrderedDict


conjugator = mlconjug3.Conjugator(language='en')

nlp_en = spacy.load("en_core_web_sm")
nlp_ja = spacy.load("ja_core_news_sm")

In [3]:
japonaisCorpus = pd.read_csv("../ja/japonaisExercices.tsv", sep="\t", encoding="utf-8").dropna()
japonaisCorpus

Unnamed: 0,sourceWord,targetWord,minSource,maxSource,minTarget,maxTarget
1,because of,のため,tom died because of me.,she was born just a generation past slavery; a...,嵐で停電した。,彼女は奴隷制終了のちょうど一世代後に生まれました。時は道に車もなく、そらに飛行機もなく時代で...
2,be out of,から出る,the lock must be out of order.,because the japanese morning train is very cro...,数日町を離れます。,日本の朝の電車はとても混んでいるので、朝の８時に急行電車に乗ろうものなら、息が出来ませんよ。
3,on an empty stomach,空腹時に,drinking on an empty stomach is bad for your h...,drinking on an empty stomach is bad for your h...,空きっ腹にお酒を飲むのは体に良くない。,空きっ腹にお酒を飲むのは体に良くない。
4,?,？,i wonder if i should tell him … ?,is it ok if i ask a stupid question? can you r...,彼に話すのかな。,「おバカチックな質問していい？これなんて読むの？コロナ・・・？うず？なべ？違うよね」「コロナ...
7,on the contrary,それどころか,"i thought he was busy, but on the contrary he ...","i thought he was busy, but on the contrary he ...",彼は忙しいと思ったが、それどころか暇だった。,彼は忙しいと思ったが、それどころか暇だった。
...,...,...,...,...,...,...
2921,maiden name,旧姓,mary's maiden name is jackson.,mary's maiden name is jackson.,メアリーの旧姓は、ジャクソンだよ。,メアリーの旧姓は、ジャクソンだよ。
2926,tell the truth,実を言うと,he would not tell the truth after all.,he didn't tell the truth for fear she should g...,彼はついに真実を明かそうとはしなかった。,彼は彼女が怒らないように本当のことを言わなかった。
2929,cap,キャップ,which cap is yours?,"remove the cap from the ink refill bottle, fil...",君の帽子は、どれ？,インキ補充瓶のキャップを外し、スポイトでインキを吸い上げ、マーカーの吸収体へ適量、滴下してく...
2930,several,several,several years went by.,although most of the content and thought has n...,電話が数度鳴った。,ドイツでは今日、いくつかの都市で襲撃に反対する抗議行動が催されましたが、その都市の中には、月...


In [7]:
# Test without conjugator using verb inflections

for sentence in japonaisCorpus["minSource"][:10]:
    doc = nlp_en(sentence)
    for i, token in enumerate(doc):
        if token.pos_ == "VERB":
            inflections = [inflection[0] for inflection in getAllInflections(token.lemma_, upos="VERB").values()]
            s = sentence.replace(token.text, random.choice(inflections))
    print([sentence, s])

['tom died because of me.', 'tom die because of me.']
['the lock must be out of order.', 'tom die because of me.']
['drinking on an empty stomach is bad for your health.', 'drunk on an empty stomach is bad for your health.']
['i wonder if i should tell him … ?', 'i wonder if i should told him … ?']
['i thought he was busy, but on the contrary he was idle.', 'i think he was busy, but on the contrary he was idle.']
["let's go out unless it rains.", "let's go out unless it rained."]
['come again tomorrow.', 'come again tomorrow.']
['tom is at the end of the line.', 'come again tomorrow.']
['all will go except you.', 'all will went except you.']
["it's starting now.", "it's starts now."]


In [9]:
# Test using conjugator from pattern

for sentence in japonaisCorpus["minSource"][:10]:
    doc = nlp_en(sentence)
    s = ""
    for i, token in enumerate(doc):
        if token.pos_ == "VERB":
            tense, person, number, mood, aspect = random.choice(pattern.en.tenses(token.lemma_))
            newVerb = pattern.en.conjugate(token.lemma_, tense=tense, person=person, number=number, mood=mood, aspect=aspect)
            s = sentence.replace(token.text, newVerb)
    print([sentence, s])

['tom died because of me.', 'tom die because of me.']
['the lock must be out of order.', '']
['drinking on an empty stomach is bad for your health.', 'drinks on an empty stomach is bad for your health.']
['i wonder if i should tell him … ?', 'i wonder if i should tell him … ?']
['i thought he was busy, but on the contrary he was idle.', 'i think he was busy, but on the contrary he was idle.']
["let's go out unless it rains.", "let's go out unless it rain."]
['come again tomorrow.', 'come again tomorrow.']
['tom is at the end of the line.', '']
['all will go except you.', 'all will goes except you.']
["it's starting now.", "it's start now."]


In [13]:
# Making exercises using orthographic distractors (making orthographic errors)

def scramblefewLetterInWord(word):
    return word[0] + "".join(random.sample(word[1:-1], len(word[1:-1]))) + word[-1]

for sentence in japonaisCorpus["minSource"][:10]:
    doc = nlp_en(sentence)
    s = ""
    # choose index of one of the three biggest words in sentence
    indexWrongWord = random.choice([i for i, token in enumerate(doc) if token.pos_ == "NOUN" or token.pos_ == "VERB"])
    for i, token in enumerate(doc):
        if i == indexWrongWord:
            print(token.text)
            s = sentence.replace(token.text, scramblefewLetterInWord(token.text))
    print([sentence, s])

died
['tom died because of me.', 'tom died because of me.']
order
['the lock must be out of order.', 'the lock must be out of oderr.']
drinking
['drinking on an empty stomach is bad for your health.', 'dikinnrg on an empty stomach is bad for your health.']
wonder
['i wonder if i should tell him … ?', 'i wednor if i should tell him … ?']
contrary
['i thought he was busy, but on the contrary he was idle.', 'i thought he was busy, but on the corrtany he was idle.']
rains
["let's go out unless it rains.", "let's go out unless it rnais."]
come
['come again tomorrow.', 'come again tomorrow.']
end
['tom is at the end of the line.', 'tom is at the end of the line.']
go
['all will go except you.', 'all will go except you.']
starting
["it's starting now.", "it's stntarig now."]


In [6]:
# Segmentation Japanese Sentence

for sentence in japonaisCorpus["minTarget"][:10]:
    doc = nlp_ja(sentence)
    print(sentence)
    tokens = [token.text for token in doc]
    if tokens[-1] == "。":
        tokens = tokens[:-1]
    print(tokens)


嵐で停電した。
['嵐', 'で', '停電', 'し', 'た']
数日町を離れます。
['数', '日', '町', 'を', '離れ', 'ます']
空きっ腹にお酒を飲むのは体に良くない。
['空きっ腹', 'に', 'お', '酒', 'を', '飲む', 'の', 'は', '体', 'に', '良く', 'ない']
彼に話すのかな。
['彼', 'に', '話す', 'の', 'か', 'な']
彼は忙しいと思ったが、それどころか暇だった。
['彼', 'は', '忙しい', 'と', '思っ', 'た', 'が', '、', 'それ', 'どころ', 'か', '暇', 'だっ', 'た']
急がないと学校遅れるよ。
['急が', 'ない', 'と', '学校', '遅れる', 'よ']
ではまた明日。
['で', 'は', 'また', '明日']
父は週末に帰宅します。
['父', 'は', '週末', 'に', '帰宅', 'し', 'ます']
無用の立ち入り禁止。
['無用', 'の', '立ち入り', '禁止']
ほら、始まるわよ。
['ほら', '、', '始まる', 'わ', 'よ']


In [29]:
conjugator.conjugate("go").conjug_info

OrderedDict([('indicative',
              OrderedDict([('indicative present',
                            OrderedDict([('1s', 'go'),
                                         ('2s', 'go'),
                                         ('3s', 'goes'),
                                         ('1p', 'go'),
                                         ('2p', 'go'),
                                         ('3p', 'go')])),
                           ('indicative past tense',
                            OrderedDict([('1s', 'went'),
                                         ('2s', 'went'),
                                         ('3s', 'went'),
                                         ('1p', 'went'),
                                         ('2p', 'went'),
                                         ('3p', 'went')])),
                           ('indicative present continuous',
                            OrderedDict([('1s', 'going'),
                                         ('2s', 'going'),
            

In [32]:
def flattenConjugInfo(conjugInfo):
    conjugInfo = conjugInfo.conjug_info
    conjugInfo = OrderedDict(conjugInfo)
    conjugInfo = OrderedDict([(key, value) for key, value in conjugInfo.items() if value is not None])
    return conjugInfo

conjugator.conjugate("go").conjug_info

'indicative'