# Import required libraries


In [4]:
!pip install lemminflect

Collecting lemminflect
  Using cached lemminflect-0.2.3-py3-none-any.whl (769 kB)




Installing collected packages: lemminflect
Successfully installed lemminflect-0.2.3



You should consider upgrading via the 'C:\ProgramData\Anaconda3\python.exe -m pip install --upgrade pip' command.


In [2]:
import random

import pandas as pd
import stanza
from lemminflect import getAllInflections

random.seed(12345)

# Import all 3 sheets of the 700 Sentences dataset


In [3]:
df_7000_sents_1 = pd.read_excel(
    "../data/corpus/7000 Sentences Corpus With IDs.xlsx",
    sheet_name="3000",
    usecols=["ID", "French", "English"],
    na_values=['NA'],
)

df_7000_sents_2 = pd.read_excel(
    "../data/corpus/7000 Sentences Corpus With IDs.xlsx",
    sheet_name="6000",
    usecols=["ID", "French", "English"],
    na_values=['NA'],
)

df_7000_sents_3 = pd.read_excel(
    "../data/corpus/7000 Sentences Corpus With IDs.xlsx",
    sheet_name="1000",
    usecols=["ID", "French", "English"],
    na_values=['NA'],
)

In [4]:
df_7000_sents_1 = df_7000_sents_1.dropna().reset_index(drop = True)
df_7000_sents_2 = df_7000_sents_2.dropna().reset_index(drop = True)
df_7000_sents_3 = df_7000_sents_3.dropna().reset_index(drop = True)

In [5]:
df_7000_sents_1 = df_7000_sents_1.set_index(["ID", "French"]).apply(lambda x: x.str.split('/').explode()).reset_index()
df_7000_sents_2 = df_7000_sents_2.set_index(["ID", "French"]).apply(lambda x: x.str.split('/').explode()).reset_index()
df_7000_sents_3 = df_7000_sents_3.set_index(["ID", "French"]).apply(lambda x: x.str.split('/').explode()).reset_index()

# Import the 3000 Words dataset


In [6]:
df_3000_words = pd.read_excel(
    "../data/corpus/3000 Hindi Words Corpus With IDs.xlsx",
    sheet_name="Feuille1",
    usecols=["ID", "French Word", "English Word"],
    na_values=["NA"],
)

In [7]:
df_3000_words = df_3000_words.dropna().reset_index(drop = True)

In [8]:
df_3000_words = df_3000_words.set_index(["ID", "French Word"]).apply(lambda x: x.str.split('/').explode()).reset_index()

# Download a Stanza Language Model for English and French into the directory "../stanza_models"


In [9]:
stanza.download(lang='en')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.3.0.json:   0%|   …

2023-01-13 15:07:29 INFO: Downloading default packages for language: en (English)...


Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.3.0/models/default.zip:   0%|          | 0…

2023-01-13 15:08:24 INFO: Finished downloading models and saved to C:\Users\Administrator\stanza_resources.


# Initialize a Stanza pipeline with a language model for English and French

## Which is assigned to the variable 'nlp_en' and 'nlp_fr' using the Pipeline() class


In [10]:
nlp_en_stanza = stanza.Pipeline(lang='en')

2023-01-13 15:08:46 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| depparse     | combined  |
| sentiment    | sstplus   |
| constituency | wsj       |
| ner          | ontonotes |

2023-01-13 15:08:46 INFO: Use device: cpu
2023-01-13 15:08:46 INFO: Loading: tokenize
2023-01-13 15:08:47 INFO: Loading: pos
2023-01-13 15:08:48 INFO: Loading: lemma
2023-01-13 15:08:48 INFO: Loading: depparse
2023-01-13 15:08:48 INFO: Loading: sentiment
2023-01-13 15:08:49 INFO: Loading: constituency
2023-01-13 15:08:50 INFO: Loading: ner
2023-01-13 15:08:55 INFO: Done loading processors!


# Set Exercise Dataset Columns

In [11]:
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Dist_1",
    "Dist_2",
    "Dist_3",
    "Propositions",
    "Right_answer_id",
    "Explanation",
    "Difficulty",
    "Remediation",
]

# Distractor Creation Functions


## Creating Spelling Distractors


In [12]:
# exchanges two letters in a given word
def replace_letter(a, b, text, list_of_distractors):
    if (a in text):
        list_of_distractors.append(text.replace(a, b))

    if (b in text):
        list_of_distractors.append(text.replace(b, a))

    return list_of_distractors

In [13]:
def distractor_generator(word, list_of_distractors):
    if "ie" in word.text:
        list_of_distractors = replace_letter(
            "ie", "ei", word.text, list_of_distractors)

    if "ei" in word.text:
        list_of_distractors = replace_letter(
            "ei", "ie", word.text, list_of_distractors)

    consonats = [
        'b', 'c', 'd', 'f',
        'g', 'h', 'j', 'k',
        'l', 'm', 'n', 'p',
        'q', 'r', 's', 't',
        'v', 'w', 'x', 'y',
        'z'
    ]
    for i in consonats:
        txt = i + i
        if txt in word.text:
            list_of_distractors = replace_letter(
                txt, i, word.text, list_of_distractors)

    if 'ant' in word.text:
        list_of_distractors = replace_letter(
            'ant', 'ent', word.text, list_of_distractors)

    if 'ent' in word.text:
        list_of_distractors = replace_letter(
            'ent', 'ant', word.text, list_of_distractors)

    if 'ance' in word.text:
        list_of_distractors = replace_letter(
            'ance', 'ence', word.text, list_of_distractors)

    if 'ence' in word.text:
        list_of_distractors = replace_letter(
            'ence', 'ance', word.text, list_of_distractors)

    if 'ar' in word.text:
        list_of_distractors = replace_letter(
            'ar', 'er', word.text, list_of_distractors)

    if 'er' in word.text:
        list_of_distractors = replace_letter(
            'er', 'ar', word.text, list_of_distractors)

    if 'ary' in word.text:
        list_of_distractors = replace_letter(
            'ary', 'ery', word.text, list_of_distractors)

    if 'ery' in word.text:
        list_of_distractors = replace_letter(
            'ery', 'ary', word.text, list_of_distractors)

    if 'er' in word.text:
        list_of_distractors = replace_letter(
            'er', 'eur', word.text, list_of_distractors)

    if 'eur' in word.text:
        list_of_distractors = replace_letter(
            'eur', 'er', word.text, list_of_distractors)

    if 'ea' in word.text:
        list_of_distractors = replace_letter(
            'ea', 'e', word.text, list_of_distractors)

    if 'ly' in word.text:
        list_of_distractors = replace_letter(
            'ly', 'ely', word.text, list_of_distractors)

    if 'ely' in word.text:
        list_of_distractors = replace_letter(
            'ely', 'ly', word.text, list_of_distractors)

    if 'ies' in word.text:
        list_of_distractors = replace_letter(
            'ies', 'ys', word.text, list_of_distractors)

    if 'ys' in word.text:
        list_of_distractors = replace_letter(
            'ys', 'ies', word.text, list_of_distractors)

    return list_of_distractors

In [14]:
# Create 3 spelling distractors for a given word
def spelling_distractors(word, list_of_distractors=None):

    if list_of_distractors is None:
        list_of_distractors = []
        
    list_of_distractors = distractor_generator(word, list_of_distractors)

    if len(list_of_distractors) >= 3:
        list_of_distractors = list_of_distractors[:3]
    else:
        list_of_distractors = []

    return list_of_distractors

## Creating Verb Distractors


In [15]:
def verb_distractors(word, pos, list_of_distractors=None):

    if list_of_distractors is None:
        list_of_distractors = []
        
    list_of_distractors = set(sum(getAllInflections(word, upos=pos).values(),()))
    list_of_distractors.discard(word)
    if len(list_of_distractors) >= 3:
        list_of_distractors = list(random.sample(list(list_of_distractors), 3))
    else:
        list_of_distractors = []
    return list_of_distractors

# Creating Vocabulary Flashcards


In [16]:
def word_flashcards(df_source):
    data = []
    exo_id = 0
    for i in range(len(df_source)):
        if df_source["English Word"][i] != "NaN":
            exo_id += 1
            data.append([
                "10",
                "Flashcards",
                "Learning_Vocabulary",
                "",
                str(exo_id),
                "text",
                "text",
                "",
                str(df_source["ID"][i]),
                "French",
                "English",
                str(df_source["English Word"][i]),
                "",
                "",
                str(df_source["French Word"][i]),
                "",
                "",
                "",
                str(df_source["French Word"][i]),
                "0",
                "",
                "",
                "",
            ])
    return data

In [17]:
data_w_fc_1 = word_flashcards(df_3000_words)
df_w_fc_1 = pd.DataFrame(data_w_fc_1, columns=cols)

frames_w_fc = [df_w_fc_1]
df_w_fc = pd.concat(frames_w_fc)
df_w_fc

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,10,Flashcards,Learning_Vocabulary,,1,text,text,,1,French,...,,Beauté,,,,Beauté,0,,,
1,10,Flashcards,Learning_Vocabulary,,2,text,text,,2,French,...,,Verité,,,,Verité,0,,,
2,10,Flashcards,Learning_Vocabulary,,3,text,text,,3,French,...,,Liberté,,,,Liberté,0,,,
3,10,Flashcards,Learning_Vocabulary,,4,text,text,,4,French,...,,Fraternité,,,,Fraternité,0,,,
4,10,Flashcards,Learning_Vocabulary,,5,text,text,,5,French,...,,Bonté,,,,Bonté,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3012,10,Flashcards,Learning_Vocabulary,,3013,text,text,,2996,French,...,,Plombier,,,,Plombier,0,,,
3013,10,Flashcards,Learning_Vocabulary,,3014,text,text,,2997,French,...,,Garagiste,,,,Garagiste,0,,,
3014,10,Flashcards,Learning_Vocabulary,,3015,text,text,,2998,French,...,,Démissionner,,,,Démissionner,0,,,
3015,10,Flashcards,Learning_Vocabulary,,3016,text,text,,2999,French,...,,Informaticien,,,,Informaticien,0,,,


# Create Spelling MCQ Exercises


In [18]:
def spelling_mcq(df_source, exo_id, cols):
    data = []

    spellings = {col: [] for col in cols}

    for i in range(len(df_source)):
        doc_en_phrase = nlp_en_stanza(str(df_source["English Word"][i]))

        for sent in doc_en_phrase.sentences:
            for word in sent.words:
                list_of_distractors = []

                if word.text not in spellings["Right_answer"]:
                    list_of_distractors = spelling_distractors(word, list_of_distractors)

                    if list_of_distractors != []:
                        list_of_distractors += [word.text]
                        random.shuffle(list_of_distractors)
                        right_answer_id = list_of_distractors.index(word.text)
                        options = "".join(f"-{dist}" for dist in list_of_distractors)
                        
                        exo_id += 1

                        spellings["Exo_type_id"].append("14")
                        spellings["Exo_type"].append("MCQ")
                        spellings["Exo_objective"].append("Learning vocabulary")
                        spellings["Exo_focus"].append("Spellings")
                        spellings["Exo_id"].append(str(exo_id))
                        spellings["Source_format"].append("text")
                        spellings["Target_format"].append("text")
                        spellings["Source_sentence_id"].append("")
                        spellings["Source_word_id"].append(str(df_source["ID"][i]))
                        spellings["Source_lang"].append("French")
                        spellings["Target_lang"].append("English")
                        spellings["Full_sentence"].append(str(word.text))
                        spellings["Instruction"].append("Select the correct spelling (for " + str(df_source["French Word"][i]) + "): ")
                        spellings["Sentence_w_blank"].append("")
                        spellings["Right_answer"].append(str(word.text))
                        spellings["Dist_1"].append(list_of_distractors[0]),
                        spellings["Dist_2"].append(list_of_distractors[1]),
                        spellings["Dist_3"].append(list_of_distractors[2]),
                        spellings["Propositions"].append(options),
                        spellings["Right_answer_id"].append(str(right_answer_id)),
                        spellings["Explanation"].append("")
                        spellings["Difficulty"].append("")
                        spellings["Remediation"].append("")

    return exo_id, spellings

In [19]:
exo_id = 0
exo_id, data_spelling_mcq_1 = spelling_mcq(df_3000_words, exo_id, cols)

df_spelling_mcq_1 = pd.DataFrame(data_spelling_mcq_1, columns=cols)

frames_spelling_mcq = [df_spelling_mcq_1]
df_spelling_mcq = pd.concat(frames_spelling_mcq)
df_spelling_mcq

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,14,MCQ,Learning vocabulary,Spellings,1,text,text,,8,French,...,,Difference,Difffference,Diference,Differance,-Difffference-Diference-Differance-Difference,3,,,
1,14,MCQ,Learning vocabulary,Spellings,2,text,text,,13,French,...,,Cleanliness,Cleanliness,Cleanlines,Cleanlinessss,-Cleanliness-Cleanlines-Cleanlinessss-Clenliness,0,,,
2,14,MCQ,Learning vocabulary,Spellings,3,text,text,,44,French,...,,Narrow,Narow,Narrrrow,Narrow,-Narow-Narrrrow-Narrow-Nerrow,2,,,
3,14,MCQ,Learning vocabulary,Spellings,4,text,text,,61,French,...,,Excellent,Excellant,Excelent,Excellent,-Excellant-Excelent-Excellent-Excellllent,2,,,
4,14,MCQ,Learning vocabulary,Spellings,5,text,text,,80,French,...,,Pleasant,Pleaasant,Pleasent,Plesant,-Pleaasant-Pleasent-Plesant-Pleasant,3,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,14,MCQ,Learning vocabulary,Spellings,212,text,text,,2975,French,...,,Cashier,Cashieur,Casheir,Cashier,-Cashieur-Casheir-Cashier-Cashiar,2,,,
212,14,MCQ,Learning vocabulary,Spellings,213,text,text,,2982,French,...,,Shareholder,Shareholdar,Shereholder,Shareholdar,-Shareholdar-Shereholder-Shareholdar-Shareholder,3,,,
213,14,MCQ,Learning vocabulary,Spellings,214,text,text,,2991,French,...,,Harassment,Harassmant,Harasment,Harassssment,-Harassmant-Harasment-Harassssment-Harassment,3,,,
214,14,MCQ,Learning vocabulary,Spellings,215,text,text,,2993,French,...,,Veterinary,Vetarinary,Veterinary,Veterinery,-Vetarinary-Veterinary-Veterinery-Vetarinary,1,,,


# Creating Useful Sentences Flashcards


In [20]:
def sentence_flashcards(df_source):
    return [
        [
            "24", 
            "Flashcards", 
            "Useful_Sentences", 
            "", 
            str(exo_id), 
            "text", 
            "text", 
            str(df_source["ID"][i]), 
            "",
            "French", 
            "English", 
            str(df_source["English"][i]), 
            "", 
            "", 
            str(df_source["French"][i]), 
            "", 
            "", 
            "", 
            str(df_source["French"][i]),
            "0", 
            "", 
            "", 
            "",
        ] 
        for exo_id, i in enumerate(range(len(df_source)))
    ]

In [21]:
data_s_fc = sentence_flashcards(df_7000_sents_3)
df_s_fc = pd.DataFrame(data_s_fc, columns=cols)

df_s_fc

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,24,Flashcards,Useful_Sentences,,0,text,text,6001,,French,...,,Oui,,,,Oui,0,,,
1,24,Flashcards,Useful_Sentences,,1,text,text,6002,,French,...,,Non,,,,Non,0,,,
2,24,Flashcards,Useful_Sentences,,2,text,text,6003,,French,...,,S'il-vous-plaît,,,,S'il-vous-plaît,0,,,
3,24,Flashcards,Useful_Sentences,,3,text,text,6004,,French,...,,Merci,,,,Merci,0,,,
4,24,Flashcards,Useful_Sentences,,4,text,text,6005,,French,...,,Je vous/t' en prie,,,,Je vous/t' en prie,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014,24,Flashcards,Useful_Sentences,,1014,text,text,6996,,French,...,,Qu'est-ce-qu'il fait chaud aujourd'hui!,,,,Qu'est-ce-qu'il fait chaud aujourd'hui!,0,,,
1015,24,Flashcards,Useful_Sentences,,1015,text,text,6997,,French,...,,Il y aura du brouillard demain,,,,Il y aura du brouillard demain,0,,,
1016,24,Flashcards,Useful_Sentences,,1016,text,text,6998,,French,...,,Au voleur !,,,,Au voleur !,0,,,
1017,24,Flashcards,Useful_Sentences,,1017,text,text,6999,,French,...,,On m'a volé,,,,On m'a volé,0,,,


# Creating Verb Conjugation MCQ Exercises


In [22]:
def verb_conjug_mcq(df_source, exo_id, cols):

    verb_conjug = {col: [] for col in cols}

    for i in range(len(df_source)):
        doc_en_phrase = nlp_en_stanza(str(df_source["English"][i]))

        count = 0
        blank = []
        answer = []
        hint = []
        for sent in doc_en_phrase.sentences:
            for word in sent.words:
                if word.upos in ["VERB", "AUX"] and word.text != word.lemma:
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            question = []

            for b in range(len(blank)):

                question.append(
                    " ".join(                              
                        [
                            word.text if word.id != blank[b] + 1 else "..." for word in sent.words for sent in doc_en_phrase.sentences
                        ]
                    )
                )

                list_of_distractors = []
                list_of_distractors = verb_distractors(answer[b], nlp_en_stanza(answer[b]).sentences[0].words[0].upos)

                if list_of_distractors != []:
                    list_of_distractors += [word.text]
                    random.shuffle(list_of_distractors)
                    right_answer_id = list_of_distractors.index(word.text)
                    options = "".join(f"-{dist}" for dist in list_of_distractors)
                    
                    exo_id += 1

                    verb_conjug["Exo_type_id"].append("35")
                    verb_conjug["Exo_type"].append("MCQ")
                    verb_conjug["Exo_objective"].append("Grammar")
                    verb_conjug["Exo_focus"].append("Verb_Conjugation")
                    verb_conjug["Exo_id"].append(str(exo_id))
                    verb_conjug["Source_format"].append("text")
                    verb_conjug["Target_format"].append("text")
                    verb_conjug["Source_sentence_id"].append(str(df_source["ID"][i]))
                    verb_conjug["Source_word_id"].append("")
                    verb_conjug["Source_lang"].append("French")
                    verb_conjug["Target_lang"].append("English")
                    verb_conjug["Full_sentence"].append(str(df_source["English"][i]))
                    verb_conjug["Instruction"].append("Conjugate the verb correctly:")
                    verb_conjug["Sentence_w_blank"].append(str(f"{question[b]} ({hint[b]})"))
                    verb_conjug["Right_answer"].append(str(answer[b]))
                    verb_conjug["Dist_1"].append(list_of_distractors[0]),
                    verb_conjug["Dist_2"].append(list_of_distractors[1]),
                    verb_conjug["Dist_3"].append(list_of_distractors[2]),
                    verb_conjug["Propositions"].append(options),
                    verb_conjug["Right_answer_id"].append(str(right_answer_id)),
                    verb_conjug["Explanation"].append("")
                    verb_conjug["Difficulty"].append("")
                    verb_conjug["Remediation"].append("")

    return exo_id, verb_conjug

In [23]:
exo_id = 0

exo_id, data_v_conjug_mcq_1 = verb_conjug_mcq(df_7000_sents_1, exo_id, cols)
df_v_conjug_mcq_1 = pd.DataFrame(data_v_conjug_mcq_1, columns=cols)

exo_id, data_v_conjug_mcq_2 = verb_conjug_mcq(df_7000_sents_2, exo_id, cols)
df_v_conjug_mcq_2 = pd.DataFrame(data_v_conjug_mcq_2, columns=cols)

exo_id, data_v_conjug_mcq_3 = verb_conjug_mcq(df_7000_sents_3, exo_id, cols)
df_v_conjug_mcq_3 = pd.DataFrame(data_v_conjug_mcq_3, columns=cols)

frames_v_conjug_mcq = [df_v_conjug_mcq_1, df_v_conjug_mcq_2, df_v_conjug_mcq_3]
df_v_conjug_mcq = pd.concat(frames_v_conjug_mcq)
df_v_conjug_mcq

Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT


Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,35,MCQ,Grammar,Verb_Conjugation,1,text,text,40,,French,...,"... careful , this dog is nasty . (be)",Be,Being,Been,Were,-Being-Been-Were-.,3,,,
1,35,MCQ,Grammar,Verb_Conjugation,2,text,text,103,,French,...,"... careful , this dish is piping hot . (be)",Be,Been,Being,Am,-Been-Being-Am-.,3,,,
2,35,MCQ,Grammar,Verb_Conjugation,3,text,text,104,,French,...,... you prefer onions raw or cooked ? (do),Do,?,Done,Does,-?-Done-Does-Doing,0,,,
3,35,MCQ,Grammar,Verb_Conjugation,4,text,text,144,,French,...,... you eaten enough ? (have),Have,Having,?,Has,-Having-?-Has-Had,1,,,
4,35,MCQ,Grammar,Verb_Conjugation,5,text,text,167,,French,...,"... read somewhere else , the girl needs to sl...",Go,Goes,Gone,Going,-Goes-Gone-Going-.,3,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,35,MCQ,Grammar,Verb_Conjugation,371,text,text,6968,,French,...,... to me ( playing soccer ) (pass),Pass,Passes,),Passing,-Passes-)-Passing-Passed,1,,,
92,35,MCQ,Grammar,Verb_Conjugation,372,text,text,6982,,French,...,... I have to pay a fine ? (do),Do,Doing,?,Does,-Doing-?-Does-Did,1,,,
93,35,MCQ,Grammar,Verb_Conjugation,373,text,text,6984,,French,...,... you had unprotected sex recently ? (have),Have,Had,Has,?,-Had-Has-?-Having,2,,,
94,35,MCQ,Grammar,Verb_Conjugation,374,text,text,6993,,French,...,... you use contraceptives ? (do),Do,Did,Done,Doing,-Did-Done-Doing-?,3,,,


# Creating Verb Conjugation Cloze Test Exercises


In [24]:
def verbs_cloze_test(df_source):

    data = []
    exo_id = 0
    for i in range(len(df_source)):
        doc_en_phrase = nlp_en_stanza(str(df_source["English"][i]))

        count = 0
        blank = []
        answer = []
        hint = []
        for sent in doc_en_phrase.sentences:
            for word in sent.words:
                if word.upos in ["VERB", "AUX"] and word.text != word.lemma:
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            question = []

            for b in range(len(blank)):
                exo_id += 1
                question.append(" ".join(
                    [word.text if word.id != blank[b] + 1 else "..." for word in sent.words for sent in doc_en_phrase.sentences]))
                data.append([
                    "38", 
                    "Cloze_Test", 
                    "Grammar", 
                    "Verb_Conjugation", 
                    str(exo_id), 
                    "text", 
                    "text", 
                    str(df_source["ID"][i]), 
                    "", 
                    "French", 
                    "English", 
                    str(df_source["English"][i]), 
                    "Conjugate the verb correctly:", 
                    f"{question[b]} ({hint[b]})", 
                    str(answer[b]), 
                    "", 
                    "", 
                    "", 
                    str(answer[b]), 
                    "0", 
                    "",
                    "",
                    "",
                ])

    return data

In [25]:
data_v_ct_1 = verbs_cloze_test(df_7000_sents_1)
df_v_ct_1 = pd.DataFrame(data_v_ct_1, columns=cols)

data_v_ct_2 = verbs_cloze_test(df_7000_sents_2)
df_v_ct_2 = pd.DataFrame(data_v_ct_2, columns=cols)

data_v_ct_3 = verbs_cloze_test(df_7000_sents_3)
df_v_ct_3 = pd.DataFrame(data_v_ct_3, columns=cols)

frames_v_ct = [df_v_ct_1, df_v_ct_2, df_v_ct_3]
df_v_ct = pd.concat(frames_v_ct)
df_v_ct

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,38,Cloze_Test,Grammar,Verb_Conjugation,1,text,text,1,,French,...,The beauty of the landscape ... the travellers...,struck,,,,struck,0,,,
1,38,Cloze_Test,Grammar,Verb_Conjugation,2,text,text,2,,French,...,Nobody ... the truth about this affair . (know),knows,,,,knows,0,,,
2,38,Cloze_Test,Grammar,Verb_Conjugation,3,text,text,3,,French,...,"In a dictatorship , freedom of expression ... ...",is,,,,is,0,,,
3,38,Cloze_Test,Grammar,Verb_Conjugation,4,text,text,3,,French,...,"In a dictatorship , freedom of expression is ....",limited,,,,limited,0,,,
4,38,Cloze_Test,Grammar,Verb_Conjugation,5,text,text,4,,French,...,"Liberty , equality , fraternity ... the motto ...",is,,,,is,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
917,38,Cloze_Test,Grammar,Verb_Conjugation,918,text,text,6998,,French,...,... thief ! (stop),Stop,,,,Stop,0,,,
918,38,Cloze_Test,Grammar,Verb_Conjugation,919,text,text,6999,,French,...,I ... stolen (be),was,,,,was,0,,,
919,38,Cloze_Test,Grammar,Verb_Conjugation,920,text,text,6999,,French,...,I was ... (steal),stolen,,,,stolen,0,,,
920,38,Cloze_Test,Grammar,Verb_Conjugation,921,text,text,7000,,French,...,You ... breaking up (be),are,,,,are,0,,,


# Merge All Exercise Dataframes


In [26]:
frames_en_exercises = [df_w_fc, df_spelling_mcq, df_s_fc, df_v_conjug_mcq, df_v_ct]
df_en_exercises = pd.concat(frames_en_exercises)
df_en_exercises

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,10,Flashcards,Learning_Vocabulary,,1,text,text,,1,French,...,,Beauté,,,,Beauté,0,,,
1,10,Flashcards,Learning_Vocabulary,,2,text,text,,2,French,...,,Verité,,,,Verité,0,,,
2,10,Flashcards,Learning_Vocabulary,,3,text,text,,3,French,...,,Liberté,,,,Liberté,0,,,
3,10,Flashcards,Learning_Vocabulary,,4,text,text,,4,French,...,,Fraternité,,,,Fraternité,0,,,
4,10,Flashcards,Learning_Vocabulary,,5,text,text,,5,French,...,,Bonté,,,,Bonté,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
917,38,Cloze_Test,Grammar,Verb_Conjugation,918,text,text,6998,,French,...,... thief ! (stop),Stop,,,,Stop,0,,,
918,38,Cloze_Test,Grammar,Verb_Conjugation,919,text,text,6999,,French,...,I ... stolen (be),was,,,,was,0,,,
919,38,Cloze_Test,Grammar,Verb_Conjugation,920,text,text,6999,,French,...,I was ... (steal),stolen,,,,stolen,0,,,
920,38,Cloze_Test,Grammar,Verb_Conjugation,921,text,text,7000,,French,...,You ... breaking up (be),are,,,,are,0,,,


# Extract Exercise Dataframe to an Excel File


In [27]:
df_en_exercises.to_excel("English_Exercises.xlsx", index=False)