# Import required libraries


In [1]:
import random
from pprint import pprint

import pandas as pd
import stanza
from lemminflect import getAllInflections


# Import all 3 sheets of the 700 Sentences dataset


In [2]:
df_7000_sents_1 = pd.read_excel(
    "../7000 Sentences Corpus With IDs.xlsx",
    sheet_name="3000",
    usecols=["ID", "French", "English"],
    na_values=['NA'],
)

df_7000_sents_2 = pd.read_excel(
    "../7000 Sentences Corpus With IDs.xlsx",
    sheet_name="6000",
    usecols=["ID", "French", "English"],
    na_values=['NA'],
)

df_7000_sents_3 = pd.read_excel(
    "../7000 Sentences Corpus With IDs.xlsx",
    sheet_name="1000",
    usecols=["ID", "French", "English"],
    na_values=['NA'],
)

# Import the 3000 Words dataset


In [3]:
df_3000_words = pd.read_excel(
    "../3000 Hindi Words Corpus With IDs.xlsx",
    sheet_name="Feuille1",
    usecols=["ID", "French Word", "English Word"],
    na_values=["NA"],
)

# df_6000_words = pd.read_excel(
#     "../6000 Hindi Words Corpus With IDs.xlsx",
#     usecols=["ID", "French Word", "English Word"],
#     na_values=["NA"],
# )

# Download a Stanza Language Model for English and French into the directory "../stanza_models"


In [4]:
stanza.download(lang='en', model_dir='../stanza_models')
# stanza.download(lang='fr', model_dir='../stanza_models')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-09-28 17:57:09 INFO: Downloading default packages for language: en (English) ...


Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.4.1/models/default.zip:   0%|          | 0…

2022-09-28 17:57:32 INFO: Finished downloading models and saved to ../stanza_models.


# Initialize a Stanza pipeline with a language model for English and French

## Which is assigned to the variable 'nlp_en' and 'nlp_fr' using the Pipeline() class


In [5]:
nlp_en_stanza = stanza.Pipeline(lang='en', dir='../stanza_models')
# nlp_fr_stanza = stanza.Pipeline(lang='fr', dir='../stanza_models')

2022-09-28 17:57:33 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.4.1/models/pretrain/fasttextcrawl.pt:   0%…

2022-09-28 17:57:38 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| depparse     | combined  |
| sentiment    | sstplus   |
| constituency | wsj       |
| ner          | ontonotes |

2022-09-28 17:57:38 INFO: Use device: cpu
2022-09-28 17:57:38 INFO: Loading: tokenize
2022-09-28 17:57:38 INFO: Loading: pos
2022-09-28 17:57:39 INFO: Loading: lemma
2022-09-28 17:57:39 INFO: Loading: depparse
2022-09-28 17:57:39 INFO: Loading: sentiment
2022-09-28 17:57:40 INFO: Loading: constituency
2022-09-28 17:57:41 INFO: Loading: ner
2022-09-28 17:57:42 INFO: Done loading processors!
2022-09-28 17:57:42 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/tokenize/gsd.pt:   0%|         …

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/mwt/gsd.pt:   0%|          | 0.…

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/pos/gsd.pt:   0%|          | 0.…

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/lemma/gsd.pt:   0%|          | …

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/depparse/gsd.pt:   0%|         …

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/ner/wikiner.pt:   0%|          …

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/pretrain/gsd.pt:   0%|         …

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/forward_charlm/newswiki.pt:   0…

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/backward_charlm/newswiki.pt:   …

Downloading https://huggingface.co/stanfordnlp/stanza-fr/resolve/v1.4.1/models/pretrain/fasttextwiki.pt:   0%|…

2022-09-28 17:58:09 INFO: Loading these models for language: fr (French):
| Processor | Package |
-----------------------
| tokenize  | gsd     |
| mwt       | gsd     |
| pos       | gsd     |
| lemma     | gsd     |
| depparse  | gsd     |
| ner       | wikiner |

2022-09-28 17:58:09 INFO: Use device: cpu
2022-09-28 17:58:09 INFO: Loading: tokenize
2022-09-28 17:58:09 INFO: Loading: mwt
2022-09-28 17:58:09 INFO: Loading: pos
2022-09-28 17:58:10 INFO: Loading: lemma
2022-09-28 17:58:10 INFO: Loading: depparse
2022-09-28 17:58:11 INFO: Loading: ner
2022-09-28 17:58:15 INFO: Done loading processors!


# Distractor Creation Functions


## Creating Spelling Distractors


In [6]:
# exchanges two letters in a given word
def replace_letter(a, b, text, list_of_distractors):
    if (a in text):
        list_of_distractors.append(text.replace(a, b))

    if (b in text):
        list_of_distractors.append(text.replace(b, a))

    return list_of_distractors

In [None]:
def distractor_generator(word, list_of_distractors):
    if "ie" in word.text:
        list_of_distractors = replace_letter(
            "ie", "ei", word.text, list_of_distractors)

    if "ei" in word.text:
        list_of_distractors = replace_letter(
            "ei", "ie", word.text, list_of_distractors)

    consonats = [
        'b', 'c', 'd', 'f',
        'g', 'h', 'j', 'k',
        'l', 'm', 'n', 'p',
        'q', 'r', 's', 't',
        'v', 'w', 'x', 'y',
        'z'
    ]
    for i in consonats:
        txt = i + i
        if txt in word.text:
            list_of_distractors = replace_letter(
                txt, i, word.text, list_of_distractors)

    if 'ant' in word.text:
        list_of_distractors = replace_letter(
            'ant', 'ent', word.text, list_of_distractors)

    if 'ent' in word.text:
        list_of_distractors = replace_letter(
            'ent', 'ant', word.text, list_of_distractors)

    if 'ance' in word.text:
        list_of_distractors = replace_letter(
            'ance', 'ence', word.text, list_of_distractors)

    if 'ence' in word.text:
        list_of_distractors = replace_letter(
            'ence', 'ance', word.text, list_of_distractors)

    if 'ar' in word.text:
        list_of_distractors = replace_letter(
            'ar', 'er', word.text, list_of_distractors)

    if 'er' in word.text:
        list_of_distractors = replace_letter(
            'er', 'ar', word.text, list_of_distractors)

    if 'ary' in word.text:
        list_of_distractors = replace_letter(
            'ary', 'ery', word.text, list_of_distractors)

    if 'ery' in word.text:
        list_of_distractors = replace_letter(
            'ery', 'ary', word.text, list_of_distractors)

    if 'er' in word.text:
        list_of_distractors = replace_letter(
            'er', 'eur', word.text, list_of_distractors)

    if 'eur' in word.text:
        list_of_distractors = replace_letter(
            'eur', 'er', word.text, list_of_distractors)

    if 'ea' in word.text:
        list_of_distractors = replace_letter(
            'ea', 'e', word.text, list_of_distractors)

    if 'ly' in word.text:
        list_of_distractors = replace_letter(
            'ly', 'ely', word.text, list_of_distractors)

    if 'ely' in word.text:
        list_of_distractors = replace_letter(
            'ely', 'ly', word.text, list_of_distractors)

    if 'ies' in word.text:
        list_of_distractors = replace_letter(
            'ies', 'ys', word.text, list_of_distractors)

    if 'ys' in word.text:
        list_of_distractors = replace_letter(
            'ys', 'ies', word.text, list_of_distractors)

    return list_of_distractors

In [None]:
# Create 3 spelling distractors for a given word
def spelling_distractors(word, list_of_distractors=None):

    if list_of_distractors is None:
        list_of_distractors = []
        
    list_of_distractors = distractor_generator(word, list_of_distractors)

    if len(list_of_distractors) >= 3:
        list_of_distractors = list_of_distractors[:3]
    else:
        list_of_distractors = []

    return list_of_distractors


## Creating Verb Distractors


In [7]:
def verb_distractors(word, pos, list_of_distractors=None):

    if list_of_distractors is None:
        list_of_distractors = []
        
    list_of_distractors = set(sum(getAllInflections(word, upos=pos).values(),()))
    list_of_distractors.discard(word)
    if len(list_of_distractors) >= 3:
        list_of_distractors = list(random.sample(list(list_of_distractors), 3))
    else:
        list_of_distractors = []
    return list_of_distractors

# Creating Vocabulary Flashcards


In [None]:
def word_flashcards(df_source):
    data = []
    index = 0
    for i in range(len(df_source)):
        if df_source["English Word"][i] != "NaN":
            index += 1
            data.append([
                "10",
                "Flashcards",
                "Learning vocabulary",
                "",
                str(index),
                "text",
                "text",
                "",
                df_source["ID"][i],
                "French",
                "English",
                str(df_source["French Word"][i]),
                "",
                str(df_source["French Word"][i]),
                str(df_source["English Word"][i]),
                "",
                "",
                "",
                ""
            ])
    return data

In [8]:
# add exercise to dataset
cols = [
    "Exo_type_index",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence"
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Options",
    "Explanation",
    "Difficulty",
    "Remediation"
]

data_w_fc_1 = word_flashcards(df_3000_words)
df_w_fc_1 = pd.DataFrame(data_w_fc_1, columns=cols)

frames_w_fc = [df_w_fc_1]
df_w_fc = pd.concat(frames_w_fc)
df_w_fc


Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,Target_lang,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation
0,10,Flashcards,Learning vocabulary,,1,text,text,,1,French,English,Beauté,,Beauté,Beauty,,,,
1,10,Flashcards,Learning vocabulary,,2,text,text,,2,French,English,Verité,,Verité,Truth,,,,
2,10,Flashcards,Learning vocabulary,,3,text,text,,3,French,English,Liberté,,Liberté,Freedom,,,,
3,10,Flashcards,Learning vocabulary,,4,text,text,,4,French,English,Fraternité,,Fraternité,Brotherhood,,,,
4,10,Flashcards,Learning vocabulary,,5,text,text,,5,French,English,Bonté,,Bonté,Kindness,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,10,Flashcards,Learning vocabulary,,2996,text,text,,2996,French,English,Plombier,,Plombier,Plumber,,,,
2996,10,Flashcards,Learning vocabulary,,2997,text,text,,2997,French,English,Garagiste,,Garagiste,Mechanic,,,,
2997,10,Flashcards,Learning vocabulary,,2998,text,text,,2998,French,English,Démissionner,,Démissionner,To quit,,,,
2998,10,Flashcards,Learning vocabulary,,2999,text,text,,2999,French,English,Informaticien,,Informaticien,Computer scientist,,,,


# Create Spelling MCQ Exercises


In [9]:
def spelling_mcq(df_source, index):
    data = []

    spellings = {
        "Exo_type_id": [],
        "Exo_type": [],
        "Exo_objective": [],
        "Exo_focus": [],
        "Exo_id": [],
        "Source_format": [],
        "Target_format": [],
        "Source_sentence_id": [],
        "Source_word_id": [],
        "Source_lang": [],
        "Target_lang": [],
        "Full_sentence": [],
        "Instruction": [],
        "Sentence_w_blank": [],
        "Right_answer": [],
        "Options": [],
        "Explanation": [],
        "Difficulty": [],
        "Remediation": [],
    }


    for i in range(len(df_source)):
        doc_en_phrase = nlp_en_stanza(str(df_source["English Word"][i]))

        for sent in doc_en_phrase.sentences:
            for word in sent.words:
                list_of_distractors = []

                if word.text not in spellings["Right_answer"]:
                    list_of_distractors = spelling_distractors(word, list_of_distractors)

                    if list_of_distractors != []:
                        list_of_distractors += [word.text]
                        random.shuffle(list_of_distractors)

                        options = "".join(f"-{dist}" for dist in list_of_distractors)

                        index+=1

                        spellings["Exo_type_id"].append("14")
                        spellings["Exo_type"].append("MCQ")
                        spellings["Exo_objective"].append("Learning vocabulary")
                        spellings["Exo_focus"].append("Spellings")
                        spellings["Exo_id"].append(index)
                        spellings["Source_format"].append("text")
                        spellings["Target_format"].append("text")
                        spellings["Source_sentence_id"].append("")
                        spellings["Source_word_id"].append(str(df_source["ID"][i]))
                        spellings["Source_lang"].append("French")
                        spellings["Target_lang"].append("English")
                        spellings["Full_sentence"].append(str(df_source["English Word"][i]))
                        spellings["Instruction"].append("Select the correct spelling (for " + str(df_source["French Word"][i]) + "): ")
                        spellings["Sentence_w_blank"].append("")
                        spellings["Right_answer"].append(str(word.text))
                        spellings["Options"].append(options),
                        spellings["Explanation"].append((""))
                        spellings["Difficulty"].append((""))
                        spellings["Remediation"].append((""))

    return index, spellings


# add exercise to dataset
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Options",
    "Explanation",
    "Difficulty",
    "Remediation"
]

index = 0
index, data_spelling_mcq_1 = spelling_mcq(df_3000_words, index)
df_spelling_mcq_1 = pd.DataFrame(data_spelling_mcq_1, columns=cols)

frames_spelling_mcq = [df_spelling_mcq_1]
df_spelling_mcq = pd.concat(frames_spelling_mcq)
df_spelling_mcq

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,Target_lang,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation
0,14,MCQ,Learning vocabulary,Spellings,1,text,text,,8,French,English,Différence,Select the correct spelling (for Différence):,,Difference,-Difffference-Differance-Diference-Difference,,,
1,14,MCQ,Learning vocabulary,Spellings,2,text,text,,13,French,English,Propreté,Select the correct spelling (for Propreté):,,Cleanliness,-Cleanlinessss-Cleanlines-Cleanliness-Clenliness,,,
2,14,MCQ,Learning vocabulary,Spellings,3,text,text,,44,French,English,étroit,Select the correct spelling (for étroit):,,Narrow,-Narow-Nerrow-Narrow-Narrrrow,,,
3,14,MCQ,Learning vocabulary,Spellings,4,text,text,,61,French,English,Excellent,Select the correct spelling (for Excellent):,,Excellent,-Excelent-Excellllent-Excellent-Excellant,,,
4,14,MCQ,Learning vocabulary,Spellings,5,text,text,,80,French,English,Agréable,Select the correct spelling (for Agréable):,,Pleasant,-Pleasent-Pleaasant-Pleasant-Plesant,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,14,MCQ,Learning vocabulary,Spellings,212,text,text,,2975,French,English,Caissier,Select the correct spelling (for Caissier):,,Cashier,-Cashier-Cashieur-Casheir-Cashiar,,,
212,14,MCQ,Learning vocabulary,Spellings,213,text,text,,2982,French,English,Actionnaire,Select the correct spelling (for Actionnaire):,,Shareholder,-Shareholder-Shareholdar-Shareholdar-Shereholder,,,
213,14,MCQ,Learning vocabulary,Spellings,214,text,text,,2991,French,English,Harcèlement,Select the correct spelling (for Harcèlement):,,Harassment,-Harassmant-Harasment-Harassssment-Harassment,,,
214,14,MCQ,Learning vocabulary,Spellings,215,text,text,,2993,French,English,Vétérinaire,Select the correct spelling (for Vétérinaire):,,Veterinary,-Veterinary-Veterinery-Vetarinary-Vetarinary,,,


# Creating Useful Sentences Flashcards


In [10]:
def sentence_flashcards(df_source):
    return [
        [
            "24", 
            "Flashcards", 
            "", 
            "", 
            str(index), 
            "text", 
            "text", 
            df_source["ID"][i], 
            "", 
            "French", 
            "English", 
            str(df_source["English"][i]), 
            "", 
            str(df_source["French"][i]), 
            str(df_source["English"][i]), 
            "", 
            "", 
            "", 
            "",
        ] 
        for index, i in enumerate(range(len(df_source)))
    ]

# add exercise to dataset
cols = [
    "Exo_type_id", 
    "Exo_type", 
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Options",
    "Explanation",
    "Difficulty",
    "Remediation"
]

data_s_fc_1 = sentence_flashcards(df_7000_sents_1)
df_s_fc_1 = pd.DataFrame(data_s_fc_1, columns=cols)

data_s_fc_2 = sentence_flashcards(df_7000_sents_1)
df_s_fc_2 = pd.DataFrame(data_s_fc_2, columns=cols)

data_s_fc_3 = sentence_flashcards(df_7000_sents_1)
df_s_fc_3 = pd.DataFrame(data_s_fc_3, columns=cols)

frames_s_fc = [df_s_fc_1, df_s_fc_2, df_s_fc_3]
df_s_fc = pd.concat(frames_s_fc)
df_s_fc

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,Target_lang,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation
0,24,Flashcards,,,0,text,text,1,,French,English,La beauté du paysage frappa les voyageurs,,La beauté du paysage frappa les voyageurs,The beauty of the landscape struck the travell...,,,,
1,24,Flashcards,,,1,text,text,2,,French,English,Personne ne connaît la vérité sur cette affaire,,Personne ne connaît la vérité sur cette affaire,Nobody knows the truth about this affair.,,,,
2,24,Flashcards,,,2,text,text,3,,French,English,"Dans une dictature, la liberté d'expression es...",,"Dans une dictature, la liberté d'expression es...","In a dictatorship, freedom of expression is li...",,,,
3,24,Flashcards,,,3,text,text,4,,French,English,"Liberté, égalité, fraternité est la devise de ...",,"Liberté, égalité, fraternité est la devise de ...","Liberty, equality, fraternity is the motto of ...",,,,
4,24,Flashcards,,,4,text,text,5,,French,English,Il ne t'a pas aidé par bonté,,Il ne t'a pas aidé par bonté,He did not help you out of kindness.,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,24,Flashcards,,,2995,text,text,2996,,French,English,La facture du garagiste est très élevée,,La facture du garagiste est très élevée,The mechanic's bill is very high.,,,,
2996,24,Flashcards,,,2996,text,text,2997,,French,English,Il a démissionné de son travail car son salair...,,Il a démissionné de son travail car son salair...,He quit his job because his salary was too low.,,,,
2997,24,Flashcards,,,2997,text,text,2998,,French,English,Les informaticiens trouvent rapidement du travail,,Les informaticiens trouvent rapidement du travail,Computer scientists find a job quickly enough.,,,,
2998,24,Flashcards,,,2998,text,text,2999,,French,English,Les cordonniers réparent les chaussures,,Les cordonniers réparent les chaussures,Shoemakers rapair shoes.,,,,


# Creating Verb Conjugation MCQ Exercises


In [11]:
def verb_conjug_mcq(df_source, index):
    verb_conjug = {
        "Exo_type_id": [],
        "Exo_type": [],
        "Exo_objective": [],
        "Exo_focus": [],
        "Exo_id": [],
        "Source_format": [],
        "Target_format": [],
        "Source_sentence_id": [],
        "Source_word_id": [],
        "Source_lang": [],
        "Target_lang": [],
        "Full_sentence": [],
        "Instruction": [],
        "Sentence_w_blank": [],
        "Right_answer": [],
        "Options": [],
        "Explanation": [],
        "Difficulty": [],
        "Remediation": [],
    }
    for i in range(len(df_source)):
        doc_en_phrase = nlp_en_stanza(str(df_source["English"][i]))

        count = 0
        blank = []
        answer = []
        hint = []
        for sent in doc_en_phrase.sentences:
            for word in sent.words:
                if word.upos in ["VERB", "AUX"] and word.text != word.lemma:
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            question = []

            for b in range(len(blank)):

                question.append(
                    " ".join(                              
                        [
                            word.text if word.index != blank[b] + 1 else "..." for word in sent.words for sent in doc_en_phrase.sentences
                        ]
                    )
                )

                list_of_distractors = []
                list_of_distractors = verb_distractors(answer[b], nlp_en_stanza(answer[b]).sentences[0].words[0].upos)

                if list_of_distractors != []:
                    list_of_distractors += [word.text]
                    random.shuffle(list_of_distractors)

                    options = "".join(f"-{dist}" for dist in list_of_distractors)
                    index+=1

                    verb_conjug["Exo_type_id"].append("35")
                    verb_conjug["Exo_type"].append("MCQ")
                    verb_conjug["Exo_objective"].append("Grammar")
                    verb_conjug["Exo_focus"].append("Verb Conjugation")
                    verb_conjug["Exo_id"].append(str(index))
                    verb_conjug["Source_format"].append("text")
                    verb_conjug["Target_format"].append("text")
                    verb_conjug["Source_sentence_id"].append(str(df_source["ID"][i]))
                    verb_conjug["Source_word_id"].append("")
                    verb_conjug["Source_lang"].append("French")
                    verb_conjug["Target_lang"].append("English")
                    verb_conjug["Full_sentence"].append(str(df_source["English"][i]))
                    verb_conjug["Instruction"].append("Conjugate the verb correctly:")
                    verb_conjug["Sentence_w_blank"].append(str(f"{question[b]} ({hint[b]})"))
                    verb_conjug["Right_answer"].append(str(answer[b]))
                    verb_conjug["Options"].append(options),
                    verb_conjug["Explanation"].append("")
                    verb_conjug["Difficulty"].append("")
                    verb_conjug["Remediation"].append("")

    return index, verb_conjug


# add exercise to dataset
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Options",
    "Explanation",
    "Difficulty",
    "Remediation"
]

index = 0

index, data_v_conjug_mcq_1 = verb_conjug_mcq(df_7000_sents_1, index)
df_v_conjug_mcq_1 = pd.DataFrame(data_v_conjug_mcq_1, columns=cols)

index, data_v_conjug_mcq_2 = verb_conjug_mcq(df_7000_sents_2, index)
df_v_conjug_mcq_2 = pd.DataFrame(data_v_conjug_mcq_2, columns=cols)

index, data_v_conjug_mcq_3 = verb_conjug_mcq(df_7000_sents_3, index)
df_v_conjug_mcq_3 = pd.DataFrame(data_v_conjug_mcq_3, columns=cols)

frames_v_conjug_mcq = [df_v_conjug_mcq_1, df_v_conjug_mcq_2, df_v_conjug_mcq_3]
df_v_conjug_mcq = pd.concat(frames_v_conjug_mcq)
df_v_conjug_mcq


Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,Target_lang,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation
0,35,MCQ,Grammar,Verb Conjugation,1,text,text,40,,French,English,"Attention, ce chien est méchant",Conjugate the verb correctly:,"... careful , this dog is nasty . (be)",Be,-Been-Was-Is-.,,,
1,35,MCQ,Grammar,Verb Conjugation,2,text,text,103,,French,English,"Attention, le plat est brûlant",Conjugate the verb correctly:,"... careful , this dish is piping hot . (be)",Be,-Is-.-Being-Been,,,
2,35,MCQ,Grammar,Verb Conjugation,3,text,text,104,,French,English,Tu préfères les oignons crus ou cuits?,Conjugate the verb correctly:,... you prefer onions raw or cooked ? (do),Do,-Did-Doing-?-Does,,,
3,35,MCQ,Grammar,Verb Conjugation,4,text,text,144,,French,English,Tu as assez mangé?,Conjugate the verb correctly:,... you eaten enough ? (have),Have,-?-Having-Has-Had,,,
4,35,MCQ,Grammar,Verb Conjugation,5,text,text,167,,French,English,"Va lire ailleurs, la petite doit dormir",Conjugate the verb correctly:,"... read somewhere else , the girl needs to sl...",Go,-Went-.-Gone-Going,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,35,MCQ,Grammar,Verb Conjugation,364,text,text,6967,,French,English,Est-ce que tu aimes le football?,Conjugate the verb correctly:,... you like soccer ? (do),Do,-?-Does-Doing-Did,,,
90,35,MCQ,Grammar,Verb Conjugation,365,text,text,6982,,French,English,Est-ce que je dois payer une amende?,Conjugate the verb correctly:,... I have to pay a fine ? (do),Do,-Doing-Done-?-Does,,,
91,35,MCQ,Grammar,Verb Conjugation,366,text,text,6984,,French,English,Avez-vous eu des rapports sexuels non protégés...,Conjugate the verb correctly:,... you had unprotected sex recently ? (have),Have,-Having-?-Had-Has,,,
92,35,MCQ,Grammar,Verb Conjugation,367,text,text,6993,,French,English,Est-ce que vous utilisez des moyens contracept...,Conjugate the verb correctly:,... you use contraceptives ? (do),Do,-Does-Done-Did-?,,,


# Creating Verb Conjugation Cloze Test Exercises


In [12]:
def verbs_cloze_test(df_source):

    data = []
    index = 0
    for i in range(len(df_source)):
        doc_en_phrase = nlp_en_stanza(str(df_source["English"][i]))

        count = 0
        blank = []
        answer = []
        hint = []
        for sent in doc_en_phrase.sentences:
            for word in sent.words:
                if word.upos in ["VERB", "AUX"] and word.text != word.lemma:
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            question = []

            for b in range(len(blank)):
                index += 1
                question.append(" ".join(
                    [word.text if word.index != blank[b] + 1 else "..." for word in sent.words for sent in doc_en_phrase.sentences]))
                data.append(["38", "Cloze_Test", "Verb_Conjugation", "Grammar", str(index), "text", "text", str(df_source["ID"][i]), "", "French", "English", str(df_source["English"][i]), "Conjugate the verb correctly:", f"{question[b]} ({hint[b]})", str(answer[b]), "", "", "", "", "", ""])

    return data


# add exercise to dataset
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "dist_1",
    "dist_2",
    "dist_3",
    "Explanation",
    "Difficulty",
    "Remediation"
]
data_v_ct_1 = verbs_cloze_test(df_7000_sents_1)
df_v_ct_1 = pd.DataFrame(data_v_ct_1, columns=cols)

data_v_ct_2 = verbs_cloze_test(df_7000_sents_2)
df_v_ct_2 = pd.DataFrame(data_v_ct_2, columns=cols)

data_v_ct_3 = verbs_cloze_test(df_7000_sents_3)
df_v_ct_3 = pd.DataFrame(data_v_ct_3, columns=cols)

frames_v_ct = [df_v_ct_1, df_v_ct_2, df_v_ct_3]
df_v_ct = pd.concat(frames_v_ct)
df_v_ct

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Full_sentence,Instruction,Sentence_w_blank,Right_answer,dist_1,dist_2,dist_3,Explanation,Difficulty,Remediation
0,38,Cloze_Test,Verb_Conjugation,Grammar,1,text,text,1,,French,...,The beauty of the landscape struck the travell...,Conjugate the verb correctly:,The beauty of the landscape ... the travellers...,struck,,,,,,
1,38,Cloze_Test,Verb_Conjugation,Grammar,2,text,text,2,,French,...,Nobody knows the truth about this affair.,Conjugate the verb correctly:,Nobody ... the truth about this affair . (know),knows,,,,,,
2,38,Cloze_Test,Verb_Conjugation,Grammar,3,text,text,3,,French,...,"In a dictatorship, freedom of expression is li...",Conjugate the verb correctly:,"In a dictatorship , freedom of expression ... ...",is,,,,,,
3,38,Cloze_Test,Verb_Conjugation,Grammar,4,text,text,3,,French,...,"In a dictatorship, freedom of expression is li...",Conjugate the verb correctly:,"In a dictatorship , freedom of expression is ....",limited,,,,,,
4,38,Cloze_Test,Verb_Conjugation,Grammar,5,text,text,4,,French,...,"Liberty, equality, fraternity is the motto of ...",Conjugate the verb correctly:,"Liberty , equality , fraternity ... the motto ...",is,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
903,38,Cloze_Test,Verb_Conjugation,Grammar,904,text,text,6998,,French,...,Stop thief!,Conjugate the verb correctly:,... thief ! (stop),Stop,,,,,,
904,38,Cloze_Test,Verb_Conjugation,Grammar,905,text,text,6999,,French,...,I was stolen,Conjugate the verb correctly:,I ... stolen (be),was,,,,,,
905,38,Cloze_Test,Verb_Conjugation,Grammar,906,text,text,6999,,French,...,I was stolen,Conjugate the verb correctly:,I was ... (steal),stolen,,,,,,
906,38,Cloze_Test,Verb_Conjugation,Grammar,907,text,text,7000,,French,...,You are breaking up,Conjugate the verb correctly:,You ... breaking up (be),are,,,,,,


# Merge All Exercise Dataframes


In [13]:
frames_en_exercises = [df_w_fc, df_spelling_mcq, df_s_fc, df_v_conjug_mcq, df_v_ct]
df_en_exercises = pd.concat(frames_en_exercises)
df_en_exercises

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation,dist_1,dist_2,dist_3
0,10,Flashcards,Learning vocabulary,,1,text,text,,1,French,...,,Beauté,Beauty,,,,,,,
1,10,Flashcards,Learning vocabulary,,2,text,text,,2,French,...,,Verité,Truth,,,,,,,
2,10,Flashcards,Learning vocabulary,,3,text,text,,3,French,...,,Liberté,Freedom,,,,,,,
3,10,Flashcards,Learning vocabulary,,4,text,text,,4,French,...,,Fraternité,Brotherhood,,,,,,,
4,10,Flashcards,Learning vocabulary,,5,text,text,,5,French,...,,Bonté,Kindness,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
903,38,Cloze_Test,Verb_Conjugation,Grammar,904,text,text,6998,,French,...,Conjugate the verb correctly:,... thief ! (stop),Stop,,,,,,,
904,38,Cloze_Test,Verb_Conjugation,Grammar,905,text,text,6999,,French,...,Conjugate the verb correctly:,I ... stolen (be),was,,,,,,,
905,38,Cloze_Test,Verb_Conjugation,Grammar,906,text,text,6999,,French,...,Conjugate the verb correctly:,I was ... (steal),stolen,,,,,,,
906,38,Cloze_Test,Verb_Conjugation,Grammar,907,text,text,7000,,French,...,Conjugate the verb correctly:,You ... breaking up (be),are,,,,,,,


# Extract Exercise Dataframe to an Excel File


In [14]:
df_en_exercises.to_excel("English_Exercises.xlsx", index=False)