# Import required libraries


In [1]:
import random
from pprint import pprint

import pandas as pd
import stanza
from lemminflect import getAllInflections

# Import all 3 sheets of the 700 Sentences dataset


In [2]:
df_7000_sents_1 = pd.read_excel(
    "../data/corpus/7000 Sentences Corpus With IDs.xlsx",
    sheet_name="3000",
    usecols=["ID", "French", "English"],
    na_values=['NA'],
)

df_7000_sents_2 = pd.read_excel(
    "../data/corpus/7000 Sentences Corpus With IDs.xlsx",
    sheet_name="6000",
    usecols=["ID", "French", "English"],
    na_values=['NA'],
)

df_7000_sents_3 = pd.read_excel(
    "../data/corpus/7000 Sentences Corpus With IDs.xlsx",
    sheet_name="1000",
    usecols=["ID", "French", "English"],
    na_values=['NA'],
)

# Import the 3000 Words dataset


In [3]:
df_3000_words = pd.read_excel(
    "../data/corpus/3000 Hindi Words Corpus With IDs.xlsx",
    sheet_name="Feuille1",
    usecols=["ID", "French Word", "English Word"],
    na_values=["NA"],
)

# Download a Stanza Language Model for English and French into the directory "../stanza_models"


In [4]:
stanza.download(lang='en')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-10-13 16:21:09 INFO: Downloading default packages for language: en (English) ...
2022-10-13 16:21:10 INFO: File exists: C:\Users\Khushi\stanza_resources\en\default.zip
2022-10-13 16:21:15 INFO: Finished downloading models and saved to C:\Users\Khushi\stanza_resources.


# Initialize a Stanza pipeline with a language model for English and French

## Which is assigned to the variable 'nlp_en' and 'nlp_fr' using the Pipeline() class


In [5]:
nlp_en_stanza = stanza.Pipeline(lang='en')

2022-10-13 16:21:15 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-10-13 16:21:17 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| depparse     | combined  |
| sentiment    | sstplus   |
| constituency | wsj       |
| ner          | ontonotes |

2022-10-13 16:21:17 INFO: Use device: cpu
2022-10-13 16:21:17 INFO: Loading: tokenize
2022-10-13 16:21:17 INFO: Loading: pos
2022-10-13 16:21:18 INFO: Loading: lemma
2022-10-13 16:21:18 INFO: Loading: depparse
2022-10-13 16:21:18 INFO: Loading: sentiment
2022-10-13 16:21:19 INFO: Loading: constituency
2022-10-13 16:21:20 INFO: Loading: ner
2022-10-13 16:21:21 INFO: Done loading processors!


# Set Exercise Dataset Columns

In [6]:
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Dist_1",
    "Dist_2",
    "Dist_3",
    "Explanation",
    "Difficulty",
    "Remediation",
]

# Distractor Creation Functions


## Creating Spelling Distractors


In [7]:
# exchanges two letters in a given word
def replace_letter(a, b, text, list_of_distractors):
    if (a in text):
        list_of_distractors.append(text.replace(a, b))

    if (b in text):
        list_of_distractors.append(text.replace(b, a))

    return list_of_distractors

In [8]:
def distractor_generator(word, list_of_distractors):
    if "ie" in word.text:
        list_of_distractors = replace_letter(
            "ie", "ei", word.text, list_of_distractors)

    if "ei" in word.text:
        list_of_distractors = replace_letter(
            "ei", "ie", word.text, list_of_distractors)

    consonats = [
        'b', 'c', 'd', 'f',
        'g', 'h', 'j', 'k',
        'l', 'm', 'n', 'p',
        'q', 'r', 's', 't',
        'v', 'w', 'x', 'y',
        'z'
    ]
    for i in consonats:
        txt = i + i
        if txt in word.text:
            list_of_distractors = replace_letter(
                txt, i, word.text, list_of_distractors)

    if 'ant' in word.text:
        list_of_distractors = replace_letter(
            'ant', 'ent', word.text, list_of_distractors)

    if 'ent' in word.text:
        list_of_distractors = replace_letter(
            'ent', 'ant', word.text, list_of_distractors)

    if 'ance' in word.text:
        list_of_distractors = replace_letter(
            'ance', 'ence', word.text, list_of_distractors)

    if 'ence' in word.text:
        list_of_distractors = replace_letter(
            'ence', 'ance', word.text, list_of_distractors)

    if 'ar' in word.text:
        list_of_distractors = replace_letter(
            'ar', 'er', word.text, list_of_distractors)

    if 'er' in word.text:
        list_of_distractors = replace_letter(
            'er', 'ar', word.text, list_of_distractors)

    if 'ary' in word.text:
        list_of_distractors = replace_letter(
            'ary', 'ery', word.text, list_of_distractors)

    if 'ery' in word.text:
        list_of_distractors = replace_letter(
            'ery', 'ary', word.text, list_of_distractors)

    if 'er' in word.text:
        list_of_distractors = replace_letter(
            'er', 'eur', word.text, list_of_distractors)

    if 'eur' in word.text:
        list_of_distractors = replace_letter(
            'eur', 'er', word.text, list_of_distractors)

    if 'ea' in word.text:
        list_of_distractors = replace_letter(
            'ea', 'e', word.text, list_of_distractors)

    if 'ly' in word.text:
        list_of_distractors = replace_letter(
            'ly', 'ely', word.text, list_of_distractors)

    if 'ely' in word.text:
        list_of_distractors = replace_letter(
            'ely', 'ly', word.text, list_of_distractors)

    if 'ies' in word.text:
        list_of_distractors = replace_letter(
            'ies', 'ys', word.text, list_of_distractors)

    if 'ys' in word.text:
        list_of_distractors = replace_letter(
            'ys', 'ies', word.text, list_of_distractors)

    return list_of_distractors

In [9]:
# Create 3 spelling distractors for a given word
def spelling_distractors(word, list_of_distractors=None):

    if list_of_distractors is None:
        list_of_distractors = []
        
    list_of_distractors = distractor_generator(word, list_of_distractors)

    if len(list_of_distractors) >= 3:
        list_of_distractors = list_of_distractors[:3]
    else:
        list_of_distractors = []

    return list_of_distractors

## Creating Verb Distractors


In [10]:
def verb_distractors(word, pos, list_of_distractors=None):

    if list_of_distractors is None:
        list_of_distractors = []
        
    list_of_distractors = set(sum(getAllInflections(word, upos=pos).values(),()))
    list_of_distractors.discard(word)
    if len(list_of_distractors) >= 3:
        list_of_distractors = list(random.sample(list(list_of_distractors), 3))
    else:
        list_of_distractors = []
    return list_of_distractors

# Creating Vocabulary Flashcards


In [11]:
def word_flashcards(df_source):
    data = []
    exo_id = 0
    for i in range(len(df_source)):
        if df_source["English Word"][i] != "NaN":
            exo_id += 1
            data.append([
                "10",
                "Flashcards",
                "Learning_Vocabulary",
                "",
                str(exo_id),
                "text",
                "text",
                "",
                df_source["ID"][i],
                "French",
                "English",
                str(df_source["English Word"][i]),
                "",
                "",
                str(df_source["French Word"][i]),
                "",
                "",
                "",
                "",
                "",
                "",
            ])
    return data

In [12]:
data_w_fc_1 = word_flashcards(df_3000_words)
df_w_fc_1 = pd.DataFrame(data_w_fc_1, columns=cols)

frames_w_fc = [df_w_fc_1]
df_w_fc = pd.concat(frames_w_fc)
df_w_fc


Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Explanation,Difficulty,Remediation
0,10,Flashcards,Learning vocabulary,,1,text,text,,1,French,...,Beauty,,,Beauté,,,,,,
1,10,Flashcards,Learning vocabulary,,2,text,text,,2,French,...,Truth,,,Verité,,,,,,
2,10,Flashcards,Learning vocabulary,,3,text,text,,3,French,...,Freedom,,,Liberté,,,,,,
3,10,Flashcards,Learning vocabulary,,4,text,text,,4,French,...,Brotherhood,,,Fraternité,,,,,,
4,10,Flashcards,Learning vocabulary,,5,text,text,,5,French,...,Kindness,,,Bonté,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,10,Flashcards,Learning vocabulary,,2996,text,text,,2996,French,...,Plumber,,,Plombier,,,,,,
2996,10,Flashcards,Learning vocabulary,,2997,text,text,,2997,French,...,Mechanic,,,Garagiste,,,,,,
2997,10,Flashcards,Learning vocabulary,,2998,text,text,,2998,French,...,To quit,,,Démissionner,,,,,,
2998,10,Flashcards,Learning vocabulary,,2999,text,text,,2999,French,...,Computer scientist,,,Informaticien,,,,,,


# Create Spelling MCQ Exercises


In [13]:
def spelling_mcq(df_source, exo_id, cols):
    data = []

    spellings = {col: [] for col in cols}

    for i in range(len(df_source)):
        doc_en_phrase = nlp_en_stanza(str(df_source["English Word"][i]))

        for sent in doc_en_phrase.sentences:
            for word in sent.words:
                list_of_distractors = []

                if word.text not in spellings["Right_answer"]:
                    list_of_distractors = spelling_distractors(word, list_of_distractors)

                    if list_of_distractors != []:

                        exo_id += 1

                        spellings["Exo_type_id"].append("14")
                        spellings["Exo_type"].append("MCQ")
                        spellings["Exo_objective"].append("Learning vocabulary")
                        spellings["Exo_focus"].append("Spellings")
                        spellings["Exo_id"].append(exo_id)
                        spellings["Source_format"].append("text")
                        spellings["Target_format"].append("text")
                        spellings["Source_sentence_id"].append("")
                        spellings["Source_word_id"].append(str(df_source["ID"][i]))
                        spellings["Source_lang"].append("French")
                        spellings["Target_lang"].append("English")
                        spellings["Full_sentence"].append(str(word.text))
                        spellings["Instruction"].append("Select the correct spelling (for " + str(df_source["French Word"][i]) + "): ")
                        spellings["Sentence_w_blank"].append("")
                        spellings["Right_answer"].append(str(word.text))
                        spellings["Dist_1"].append(list_of_distractors[0]),
                        spellings["Dist_2"].append(list_of_distractors[1]),
                        spellings["Dist_3"].append(list_of_distractors[2]),
                        spellings["Explanation"].append((""))
                        spellings["Difficulty"].append((""))
                        spellings["Remediation"].append((""))

    return exo_id, spellings

exo_id = 0
exo_id, data_spelling_mcq_1 = spelling_mcq(df_3000_words, exo_id, cols)

df_spelling_mcq_1 = pd.DataFrame(data_spelling_mcq_1, columns=cols)

frames_spelling_mcq = [df_spelling_mcq_1]
df_spelling_mcq = pd.concat(frames_spelling_mcq)
df_spelling_mcq

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Explanation,Difficulty,Remediation
0,14,MCQ,Learning vocabulary,Spellings,1,text,text,,8,French,...,Difference,Select the correct spelling (for Différence):,,Difference,Diference,Difffference,Differance,,,
1,14,MCQ,Learning vocabulary,Spellings,2,text,text,,13,French,...,Cleanliness,Select the correct spelling (for Propreté):,,Cleanliness,Cleanlines,Cleanlinessss,Clenliness,,,
2,14,MCQ,Learning vocabulary,Spellings,3,text,text,,44,French,...,Narrow,Select the correct spelling (for étroit):,,Narrow,Narow,Narrrrow,Nerrow,,,
3,14,MCQ,Learning vocabulary,Spellings,4,text,text,,61,French,...,Excellent,Select the correct spelling (for Excellent):,,Excellent,Excelent,Excellllent,Excellant,,,
4,14,MCQ,Learning vocabulary,Spellings,5,text,text,,80,French,...,Pleasant,Select the correct spelling (for Agréable):,,Pleasant,Pleasent,Plesant,Pleaasant,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,14,MCQ,Learning vocabulary,Spellings,212,text,text,,2975,French,...,Cashier,Select the correct spelling (for Caissier):,,Cashier,Casheir,Cashiar,Cashieur,,,
212,14,MCQ,Learning vocabulary,Spellings,213,text,text,,2982,French,...,Shareholder,Select the correct spelling (for Actionnaire):,,Shareholder,Shereholder,Shareholdar,Shareholdar,,,
213,14,MCQ,Learning vocabulary,Spellings,214,text,text,,2991,French,...,Harassment,Select the correct spelling (for Harcèlement):,,Harassment,Harasment,Harassssment,Harassmant,,,
214,14,MCQ,Learning vocabulary,Spellings,215,text,text,,2993,French,...,Veterinary,Select the correct spelling (for Vétérinaire):,,Veterinary,Veterinery,Vetarinary,Vetarinary,,,


# Creating Useful Sentences Flashcards


In [14]:
def sentence_flashcards(df_source):
    return [
        [
            "24", 
            "Flashcards", 
            "Useful_Sentences", 
            "", 
            str(exo_id), 
            "text", 
            "text", 
            df_source["ID"][i], 
            "",
            "French", 
            "English", 
            str(df_source["English"][i]), 
            "", 
            "", 
            str(df_source["French"][i]), 
            "", 
            "", 
            "", 
            "", 
            "", 
            "",
        ] 
        for exo_id, i in enumerate(range(len(df_source)))
    ]

data_s_fc_1 = sentence_flashcards(df_7000_sents_1)
df_s_fc_1 = pd.DataFrame(data_s_fc_1, columns=cols)

data_s_fc_2 = sentence_flashcards(df_7000_sents_1)
df_s_fc_2 = pd.DataFrame(data_s_fc_2, columns=cols)

data_s_fc_3 = sentence_flashcards(df_7000_sents_1)
df_s_fc_3 = pd.DataFrame(data_s_fc_3, columns=cols)

frames_s_fc = [df_s_fc_1, df_s_fc_2, df_s_fc_3]
df_s_fc = pd.concat(frames_s_fc)
df_s_fc

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Explanation,Difficulty,Remediation
0,24,Flashcards,Useful Sentences,,0,text,text,1,,French,...,The beauty of the landscape struck the travell...,,,The beauty of the landscape struck the travell...,,,,,,
1,24,Flashcards,Useful Sentences,,1,text,text,2,,French,...,Nobody knows the truth about this affair.,,,Nobody knows the truth about this affair.,,,,,,
2,24,Flashcards,Useful Sentences,,2,text,text,3,,French,...,"In a dictatorship, freedom of expression is li...",,,"In a dictatorship, freedom of expression is li...",,,,,,
3,24,Flashcards,Useful Sentences,,3,text,text,4,,French,...,"Liberty, equality, fraternity is the motto of ...",,,"Liberty, equality, fraternity is the motto of ...",,,,,,
4,24,Flashcards,Useful Sentences,,4,text,text,5,,French,...,He did not help you out of kindness.,,,He did not help you out of kindness.,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,24,Flashcards,Useful Sentences,,2995,text,text,2996,,French,...,The mechanic's bill is very high.,,,The mechanic's bill is very high.,,,,,,
2996,24,Flashcards,Useful Sentences,,2996,text,text,2997,,French,...,He quit his job because his salary was too low.,,,He quit his job because his salary was too low.,,,,,,
2997,24,Flashcards,Useful Sentences,,2997,text,text,2998,,French,...,Computer scientists find a job quickly enough.,,,Computer scientists find a job quickly enough.,,,,,,
2998,24,Flashcards,Useful Sentences,,2998,text,text,2999,,French,...,Shoemakers rapair shoes.,,,Shoemakers rapair shoes.,,,,,,


# Creating Verb Conjugation MCQ Exercises


In [15]:
def verb_conjug_mcq(df_source, exo_id, cols):

    verb_conjug = {col: [] for col in cols}

    for i in range(len(df_source)):
        doc_en_phrase = nlp_en_stanza(str(df_source["English"][i]))

        count = 0
        blank = []
        answer = []
        hint = []
        for sent in doc_en_phrase.sentences:
            for word in sent.words:
                if word.upos in ["VERB", "AUX"] and word.text != word.lemma:
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            question = []

            for b in range(len(blank)):

                question.append(
                    " ".join(                              
                        [
                            word.text if word.id != blank[b] + 1 else "..." for word in sent.words for sent in doc_en_phrase.sentences
                        ]
                    )
                )

                list_of_distractors = []
                list_of_distractors = verb_distractors(answer[b], nlp_en_stanza(answer[b]).sentences[0].words[0].upos)

                if list_of_distractors != []:
                    
                    exo_id+=1

                    verb_conjug["Exo_type_id"].append("35")
                    verb_conjug["Exo_type"].append("MCQ")
                    verb_conjug["Exo_objective"].append("Grammar")
                    verb_conjug["Exo_focus"].append("Verb_Conjugation")
                    verb_conjug["Exo_id"].append(str(exo_id))
                    verb_conjug["Source_format"].append("text")
                    verb_conjug["Target_format"].append("text")
                    verb_conjug["Source_sentence_id"].append(str(df_source["ID"][i]))
                    verb_conjug["Source_word_id"].append("")
                    verb_conjug["Source_lang"].append("French")
                    verb_conjug["Target_lang"].append("English")
                    verb_conjug["Full_sentence"].append(str(df_source["English"][i]))
                    verb_conjug["Instruction"].append("Conjugate the verb correctly:")
                    verb_conjug["Sentence_w_blank"].append(str(f"{question[b]} ({hint[b]})"))
                    verb_conjug["Right_answer"].append(str(answer[b]))
                    verb_conjug["Dist_1"].append(list_of_distractors[0]),
                    verb_conjug["Dist_2"].append(list_of_distractors[1]),
                    verb_conjug["Dist_3"].append(list_of_distractors[2]),
                    verb_conjug["Explanation"].append("")
                    verb_conjug["Difficulty"].append("")
                    verb_conjug["Remediation"].append("")

    return exo_id, verb_conjug

exo_id = 0

exo_id, data_v_conjug_mcq_1 = verb_conjug_mcq(df_7000_sents_1, exo_id, cols)
df_v_conjug_mcq_1 = pd.DataFrame(data_v_conjug_mcq_1, columns=cols)

exo_id, data_v_conjug_mcq_2 = verb_conjug_mcq(df_7000_sents_2, exo_id, cols)
df_v_conjug_mcq_2 = pd.DataFrame(data_v_conjug_mcq_2, columns=cols)

exo_id, data_v_conjug_mcq_3 = verb_conjug_mcq(df_7000_sents_3, exo_id, cols)
df_v_conjug_mcq_3 = pd.DataFrame(data_v_conjug_mcq_3, columns=cols)

frames_v_conjug_mcq = [df_v_conjug_mcq_1, df_v_conjug_mcq_2, df_v_conjug_mcq_3]
df_v_conjug_mcq = pd.concat(frames_v_conjug_mcq)
df_v_conjug_mcq


Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos type = PUNCT
Invalid upos

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Explanation,Difficulty,Remediation
0,35,MCQ,Grammar,Verb Conjugation,1,text,text,40,,French,...,"Be careful, this dog is nasty.",Conjugate the verb correctly:,"... careful , this dog is nasty . (be)",Be,Am,Is,Are,,,
1,35,MCQ,Grammar,Verb Conjugation,2,text,text,103,,French,...,"Be careful, this dish is piping hot.",Conjugate the verb correctly:,"... careful , this dish is piping hot . (be)",Be,Am,Being,Were,,,
2,35,MCQ,Grammar,Verb Conjugation,3,text,text,104,,French,...,Do you prefer onions raw or cooked?,Conjugate the verb correctly:,... you prefer onions raw or cooked ? (do),Do,Done,Does,Doing,,,
3,35,MCQ,Grammar,Verb Conjugation,4,text,text,144,,French,...,Have you eaten enough?,Conjugate the verb correctly:,... you eaten enough ? (have),Have,Had,Having,Has,,,
4,35,MCQ,Grammar,Verb Conjugation,5,text,text,167,,French,...,"Go read somewhere else, the girl needs to sleep.",Conjugate the verb correctly:,"... read somewhere else , the girl needs to sl...",Go,Going,Goes,Gone,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,35,MCQ,Grammar,Verb Conjugation,364,text,text,6967,,French,...,Do you like soccer?,Conjugate the verb correctly:,... you like soccer ? (do),Do,Does,Did,Doing,,,
90,35,MCQ,Grammar,Verb Conjugation,365,text,text,6982,,French,...,Do I have to pay a fine?,Conjugate the verb correctly:,... I have to pay a fine ? (do),Do,Does,Doing,Done,,,
91,35,MCQ,Grammar,Verb Conjugation,366,text,text,6984,,French,...,Have you had unprotected sex recently?,Conjugate the verb correctly:,... you had unprotected sex recently ? (have),Have,Had,Has,Having,,,
92,35,MCQ,Grammar,Verb Conjugation,367,text,text,6993,,French,...,Do you use contraceptives?,Conjugate the verb correctly:,... you use contraceptives ? (do),Do,Did,Done,Does,,,


# Creating Verb Conjugation Cloze Test Exercises


In [16]:
def verbs_cloze_test(df_source):

    data = []
    exo_id = 0
    for i in range(len(df_source)):
        doc_en_phrase = nlp_en_stanza(str(df_source["English"][i]))

        count = 0
        blank = []
        answer = []
        hint = []
        for sent in doc_en_phrase.sentences:
            for word in sent.words:
                if word.upos in ["VERB", "AUX"] and word.text != word.lemma:
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            question = []

            for b in range(len(blank)):
                exo_id += 1
                question.append(" ".join(
                    [word.text if word.id != blank[b] + 1 else "..." for word in sent.words for sent in doc_en_phrase.sentences]))
                data.append([
                    "38", 
                    "Cloze_Test", 
                    "Grammar", 
                    "Verb_Conjugation", 
                    str(exo_id), 
                    "text", 
                    "text", 
                    str(df_source["ID"][i]), 
                    "", 
                    "French", 
                    "English", 
                    str(df_source["English"][i]), 
                    "Conjugate the verb correctly:", 
                    f"{question[b]} ({hint[b]})", 
                    str(answer[b]), 
                    "", 
                    "", 
                    "", 
                    "", 
                    "", 
                    "",
                ])

    return data

data_v_ct_1 = verbs_cloze_test(df_7000_sents_1)
df_v_ct_1 = pd.DataFrame(data_v_ct_1, columns=cols)

data_v_ct_2 = verbs_cloze_test(df_7000_sents_2)
df_v_ct_2 = pd.DataFrame(data_v_ct_2, columns=cols)

data_v_ct_3 = verbs_cloze_test(df_7000_sents_3)
df_v_ct_3 = pd.DataFrame(data_v_ct_3, columns=cols)

frames_v_ct = [df_v_ct_1, df_v_ct_2, df_v_ct_3]
df_v_ct = pd.concat(frames_v_ct)
df_v_ct

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Explanation,Difficulty,Remediation
0,38,Cloze_Test,Verb_Conjugation,Grammar,1,text,text,1,,French,...,The beauty of the landscape struck the travell...,Conjugate the verb correctly:,The beauty of the landscape ... the travellers...,struck,,,,,,
1,38,Cloze_Test,Verb_Conjugation,Grammar,2,text,text,2,,French,...,Nobody knows the truth about this affair.,Conjugate the verb correctly:,Nobody ... the truth about this affair . (know),knows,,,,,,
2,38,Cloze_Test,Verb_Conjugation,Grammar,3,text,text,3,,French,...,"In a dictatorship, freedom of expression is li...",Conjugate the verb correctly:,"In a dictatorship , freedom of expression ... ...",is,,,,,,
3,38,Cloze_Test,Verb_Conjugation,Grammar,4,text,text,3,,French,...,"In a dictatorship, freedom of expression is li...",Conjugate the verb correctly:,"In a dictatorship , freedom of expression is ....",limited,,,,,,
4,38,Cloze_Test,Verb_Conjugation,Grammar,5,text,text,4,,French,...,"Liberty, equality, fraternity is the motto of ...",Conjugate the verb correctly:,"Liberty , equality , fraternity ... the motto ...",is,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
903,38,Cloze_Test,Verb_Conjugation,Grammar,904,text,text,6998,,French,...,Stop thief!,Conjugate the verb correctly:,... thief ! (stop),Stop,,,,,,
904,38,Cloze_Test,Verb_Conjugation,Grammar,905,text,text,6999,,French,...,I was stolen,Conjugate the verb correctly:,I ... stolen (be),was,,,,,,
905,38,Cloze_Test,Verb_Conjugation,Grammar,906,text,text,6999,,French,...,I was stolen,Conjugate the verb correctly:,I was ... (steal),stolen,,,,,,
906,38,Cloze_Test,Verb_Conjugation,Grammar,907,text,text,7000,,French,...,You are breaking up,Conjugate the verb correctly:,You ... breaking up (be),are,,,,,,


# Merge All Exercise Dataframes


In [17]:
frames_en_exercises = [df_w_fc, df_spelling_mcq, df_s_fc, df_v_conjug_mcq, df_v_ct]
df_en_exercises = pd.concat(frames_en_exercises)
df_en_exercises

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Explanation,Difficulty,Remediation
0,10,Flashcards,Learning vocabulary,,1,text,text,,1,French,...,Beauty,,,Beauté,,,,,,
1,10,Flashcards,Learning vocabulary,,2,text,text,,2,French,...,Truth,,,Verité,,,,,,
2,10,Flashcards,Learning vocabulary,,3,text,text,,3,French,...,Freedom,,,Liberté,,,,,,
3,10,Flashcards,Learning vocabulary,,4,text,text,,4,French,...,Brotherhood,,,Fraternité,,,,,,
4,10,Flashcards,Learning vocabulary,,5,text,text,,5,French,...,Kindness,,,Bonté,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
903,38,Cloze_Test,Verb_Conjugation,Grammar,904,text,text,6998,,French,...,Stop thief!,Conjugate the verb correctly:,... thief ! (stop),Stop,,,,,,
904,38,Cloze_Test,Verb_Conjugation,Grammar,905,text,text,6999,,French,...,I was stolen,Conjugate the verb correctly:,I ... stolen (be),was,,,,,,
905,38,Cloze_Test,Verb_Conjugation,Grammar,906,text,text,6999,,French,...,I was stolen,Conjugate the verb correctly:,I was ... (steal),stolen,,,,,,
906,38,Cloze_Test,Verb_Conjugation,Grammar,907,text,text,7000,,French,...,You are breaking up,Conjugate the verb correctly:,You ... breaking up (be),are,,,,,,


# Extract Exercise Dataframe to an Excel File


In [18]:
df_en_exercises.to_excel("English_Exercises.xlsx", index=False)