# Import required libraries


In [1]:
import random

import pandas as pd
import stanza

random.seed(12345)

# Import all sheets from the 7000 Sentences dataset


In [2]:
df_7000_sents_1 = pd.read_excel(
    "../data/corpus/7000 Sentences Corpus With IDs.xlsx",
    sheet_name="3000",
    usecols=["ID", "French", "Hindi"],
    na_values=['NA'],
)

df_7000_sents_2 = pd.read_excel(
    "../data/corpus/7000 Sentences Corpus With IDs.xlsx",
    sheet_name="6000",
    usecols=["ID", "French", "Hindi"],
    na_values=['NA'],
)

df_7000_sents_3 = pd.read_excel(
    "../data/corpus/7000 Sentences Corpus With IDs.xlsx",
    sheet_name="1000",
    usecols=["ID", "French", "Hindi"],
    na_values=['NA'],
)

In [3]:
df_7000_sents_1 = df_7000_sents_1.dropna().reset_index(drop = True)
df_7000_sents_2 = df_7000_sents_2.dropna().reset_index(drop = True)
df_7000_sents_3 = df_7000_sents_3.dropna().reset_index(drop = True)

In [4]:
df_7000_sents_1 = df_7000_sents_1.set_index(["ID", "French"]).apply(lambda x: x.str.split('/').explode()).reset_index()
df_7000_sents_2 = df_7000_sents_2.set_index(["ID", "French"]).apply(lambda x: x.str.split('/').explode()).reset_index()
df_7000_sents_3 = df_7000_sents_3.set_index(["ID", "French"]).apply(lambda x: x.str.split('/').explode()).reset_index()

# Import the 3000 Words dataset


In [5]:
df_3000_words = pd.read_excel(
    "../data/corpus/3000 Hindi Words Corpus With IDs.xlsx",
    sheet_name="Feuille1",
    usecols=["ID", "French Word", "Hindi Word"],
    na_values=["NA"],
)

In [6]:
df_3000_words = df_3000_words.dropna().reset_index(drop = True)

In [7]:
df_3000_words = df_3000_words.set_index(["ID", "French Word"]).apply(lambda x: x.str.split('/').explode()).reset_index()

# Download a Stanza Language Model for Hindi and French into the directory "../stanza_models"


In [8]:
stanza.download(lang='hi')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-11-22 14:17:35 INFO: Downloading default packages for language: hi (Hindi) ...
2022-11-22 14:17:38 INFO: File exists: C:\Users\Administrator\stanza_resources\hi\default.zip
2022-11-22 14:17:52 INFO: Finished downloading models and saved to C:\Users\Administrator\stanza_resources.


# Initialize a Stanza pipeline with a language model for Hindi and French

## Which is assigned to the variable 'nlp_hi' and 'nlp_fr' using the Pipeline() class


In [9]:
nlp_hi_stanza = stanza.Pipeline(lang='hi')

2022-11-22 14:17:54 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-11-22 14:18:13 INFO: Loading these models for language: hi (Hindi):
| Processor | Package |
-----------------------
| tokenize  | hdtb    |
| pos       | hdtb    |
| lemma     | hdtb    |
| depparse  | hdtb    |

2022-11-22 14:18:13 INFO: Use device: cpu
2022-11-22 14:18:13 INFO: Loading: tokenize
2022-11-22 14:18:14 INFO: Loading: pos
2022-11-22 14:18:15 INFO: Loading: lemma
2022-11-22 14:18:15 INFO: Loading: depparse
2022-11-22 14:18:16 INFO: Done loading processors!


# Set Exercise Dataset Columns

In [10]:
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Dist_1",
    "Dist_2",
    "Dist_3",
    "Propositions",
    "Right_answer_id",
    "Explanation",
    "Difficulty",
    "Remediation",
]

# Distractor Creation Functions


## Spelling Distractors


In [11]:
# exchanges two letters in a given word
def exchange_letter(a, b, text, list_of_distractors):
    if (a in text):
        list_of_distractors.append(text.replace(a, b))

    if (b in text):
        list_of_distractors.append(text.replace(b, a))

    return (list_of_distractors)

# Mistakes with similar sounding and looking vowels
def vowel_changer(word, list_of_distractors):
    if ('इ' in word.text or 'ई' in word.text):
        list_of_distractors = exchange_letter(
            'इ', 'ई', word.text, list_of_distractors)

    if ('ि' in word.text or 'ी' in word.text):
        list_of_distractors = exchange_letter(
            'ि', 'ी', word.text, list_of_distractors)

    if ('उ' in word.text or 'ऊ' in word.text):
        list_of_distractors = exchange_letter(
            'उ', 'ऊ', word.text, list_of_distractors)

    if ('ु' in word.text or 'ू' in word.text):
        list_of_distractors = exchange_letter(
            'ु', 'ू', word.text, list_of_distractors)

    if ('ए' in word.text or 'ऐ' in word.text):
        list_of_distractors = exchange_letter(
            'ए', 'ऐ', word.text, list_of_distractors)

    if ('े' in word.text or 'ै' in word.text):
        list_of_distractors = exchange_letter(
            'े', 'ै', word.text, list_of_distractors)

    if ('ओ' in word.text or 'औ' in word.text):
        list_of_distractors = exchange_letter(
            'ओ', 'औ', word.text, list_of_distractors)

    if ('ो' in word.text or 'ौ' in word.text):
        list_of_distractors = exchange_letter(
            'ो', 'ौ', word.text, list_of_distractors)

    if ('अं' in word.text or 'अँ' in word.text):
        list_of_distractors = exchange_letter(
            'अं', 'अँ', word.text, list_of_distractors)

    if ('ं' in word.text or 'ँ' in word.text):
        list_of_distractors = exchange_letter(
            'ं', 'ँ', word.text, list_of_distractors)

    if ('ॉ' in word.text or 'ाँ' in word.text):
        list_of_distractors = exchange_letter(
            'ॉ', 'ाँ', word.text, list_of_distractors)

    if ('ॉ' in word.text or 'ां' in word.text):
        list_of_distractors = exchange_letter(
            'ॉ', ' ां', word.text, list_of_distractors)

    if ('ाँ' in word.text or 'ां' in word.text):
        list_of_distractors = exchange_letter(
            'ाँ', 'ां', word.text, list_of_distractors)

    return list_of_distractors


# Mistakes with similar sounding consonents
def consonent_changer(word, list_of_distractors):
    if ('ट' in word.text or 'त' in word.text):
        list_of_distractors = exchange_letter(
            'ट', 'त', word.text, list_of_distractors)

    if ('ठ' in word.text or 'थ' in word.text):
        list_of_distractors = exchange_letter(
            'ठ', 'थ', word.text, list_of_distractors)

    if ('ड' in word.text or 'द' in word.text):
        list_of_distractors = exchange_letter(
            'ड', 'द', word.text, list_of_distractors)

    if ('ढ' in word.text or 'ध' in word.text):
        list_of_distractors = exchange_letter(
            'ढ', 'ध', word.text, list_of_distractors)

    if ('न' in word.text or 'ण' in word.text):
        list_of_distractors = exchange_letter(
            'न', 'ण', word.text, list_of_distractors)

    if ('श' in word.text or 'ष' in word.text):
        list_of_distractors = exchange_letter(
            'श', 'ष', word.text, list_of_distractors)

    if ('श' in word.text or 'स' in word.text):
        list_of_distractors = exchange_letter(
            'श', 'स', word.text, list_of_distractors)

    if ('स' in word.text or 'ष' in word.text):
        list_of_distractors = exchange_letter(
            'स', 'ष', word.text, list_of_distractors)

    return list_of_distractors

# Mistakes with similar looking letters
def letter_changer(word, list_of_distractors):
    if ('ज' in word.text or 'ज्ञ' in word.text):
        list_of_distractors = exchange_letter(
            'ज', 'ज्ञ', word.text, list_of_distractors)

    if ('क्ष' in word.text or 'श्र' in word.text):
        list_of_distractors = exchange_letter(
            'क्ष', 'श्र', word.text, list_of_distractors)

    return list_of_distractors

# Create 3 spelling distractors for a given word
def spelling_distractors(word, list_of_distractors=None):

    if list_of_distractors is None:
        list_of_distractors = []

    list_of_distractors = vowel_changer(word, list_of_distractors)
    if len(list_of_distractors) < 3:
        list_of_distractors = consonent_changer(word, list_of_distractors)
    
    if len(list_of_distractors) < 3:
        list_of_distractors = letter_changer(word, list_of_distractors)

    if len(list_of_distractors) >= 3:
        list_of_distractors = list_of_distractors[:3]
    else:
        list_of_distractors = []

    return list_of_distractors

# Creating Vocabulary Flashcards


In [12]:
def word_flashcards(df_source, exo_id = 0):
    data = []
    for i in range(len(df_source)):
        exo_id += 1
        data.append([
            "10",
            "Flashcards",
            "Learning vocabulary",
            "",
            str(exo_id),
            "text",
            "text",
            "",
            str(df_source["ID"][i]),
            "French",
            "Hindi",
            str(df_source["Hindi Word"][i]),
            "",
            "",
            str(df_source["French Word"][i]),
            "",
            "",
            "",
            str(df_source["French Word"][i]),
            "0",
            "",
            "",
            "",
        ])
    return exo_id, data

In [13]:
exo_id = 0

exo_id, data_w_fc_1 = word_flashcards(df_3000_words, exo_id)
df_w_fc_1 = pd.DataFrame(data_w_fc_1, columns=cols)

frames_w_fc = [df_w_fc_1]
df_w_fc = pd.concat(frames_w_fc)
df_w_fc

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,10,Flashcards,Learning vocabulary,,1,text,text,,2,French,...,,Verité,,,,Verité,0,,,
1,10,Flashcards,Learning vocabulary,,2,text,text,,3,French,...,,Liberté,,,,Liberté,0,,,
2,10,Flashcards,Learning vocabulary,,3,text,text,,3,French,...,,Liberté,,,,Liberté,0,,,
3,10,Flashcards,Learning vocabulary,,4,text,text,,4,French,...,,Fraternité,,,,Fraternité,0,,,
4,10,Flashcards,Learning vocabulary,,5,text,text,,5,French,...,,Bonté,,,,Bonté,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1122,10,Flashcards,Learning vocabulary,,1123,text,text,,995,French,...,,Intoxication alimentaire,,,,Intoxication alimentaire,0,,,
1123,10,Flashcards,Learning vocabulary,,1124,text,text,,996,French,...,,Crise de foie,,,,Crise de foie,0,,,
1124,10,Flashcards,Learning vocabulary,,1125,text,text,,997,French,...,,Entorse,,,,Entorse,0,,,
1125,10,Flashcards,Learning vocabulary,,1126,text,text,,998,French,...,,Bandage,,,,Bandage,0,,,


# Create Spelling MCQ Exercises


In [14]:
def spelling_mcq(df_source, exo_id, cols):
    data = []

    spellings = {col: [] for col in cols}


    for i in range(len(df_source)):
        doc_hi_phrase = nlp_hi_stanza(str(df_source["Hindi Word"][i]))

        for sent in doc_hi_phrase.sentences:
            for word in sent.words:
                list_of_distractors = []

                if word.text not in spellings["Right_answer"]:
                    list_of_distractors = spelling_distractors(word, list_of_distractors)

                    if list_of_distractors != []:
                        list_of_distractors += [word.text]
                        random.shuffle(list_of_distractors)
                        right_answer_id = list_of_distractors.index(word.text)
                        options = "".join(f"-{dist}" for dist in list_of_distractors)
                        
                        exo_id += 1

                        spellings["Exo_type_id"].append("14")
                        spellings["Exo_type"].append("MCQ")
                        spellings["Exo_objective"].append("Learning vocabulary")
                        spellings["Exo_focus"].append("Spellings")
                        spellings["Exo_id"].append(str(exo_id))
                        spellings["Source_format"].append("text")
                        spellings["Target_format"].append("text")
                        spellings["Source_sentence_id"].append("")
                        spellings["Source_word_id"].append(str(df_source["ID"][i]))
                        spellings["Source_lang"].append("French")
                        spellings["Target_lang"].append("Hindi")
                        spellings["Full_sentence"].append(str(df_source["Hindi Word"][i]))
                        spellings["Instruction"].append("Select the correct spelling:")
                        spellings["Sentence_w_blank"].append("")
                        spellings["Right_answer"].append(str(word.text))
                        spellings["Dist_1"].append(list_of_distractors[0]),
                        spellings["Dist_2"].append(list_of_distractors[1]),
                        spellings["Dist_3"].append(list_of_distractors[2]),
                        spellings["Propositions"].append(options),
                        spellings["Right_answer_id"].append(str(right_answer_id)),
                        spellings["Explanation"].append("")
                        spellings["Difficulty"].append("")
                        spellings["Remediation"].append("")

    return exo_id, spellings

In [15]:
exo_id = 0

exo_id, data_spelling_mcq_1 = spelling_mcq(df_3000_words, exo_id, cols)
df_spelling_mcq_1 = pd.DataFrame(data_spelling_mcq_1, columns=cols)

frames_spelling_mcq = [df_spelling_mcq_1]
df_spelling_mcq = pd.concat(frames_spelling_mcq)
df_spelling_mcq

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,14,MCQ,Learning vocabulary,Spellings,1,text,text,,3,French,...,,स्वतंत्रता,स्वटंट्रटा,स्वतँत्रता,श्वतंत्रता,-स्वटंट्रटा-स्वतँत्रता-श्वतंत्रता-स्वतंत्रता,3,,,
1,14,MCQ,Learning vocabulary,Spellings,2,text,text,,3,French,...,,स्वाधीनता,स्वाधीनता,स्वाधिनता,स्वाधीनटा,-स्वाधीनता-स्वाधिनता-स्वाधीनटा-स्वाढीनता,0,,,
2,14,MCQ,Learning vocabulary,Spellings,3,text,text,,6,French,...,,दुष्टता,दूष्टता,दुष्तता,दुष्टता,-दूष्टता-दुष्तता-दुष्टता-दुष्टटा,2,,,
3,14,MCQ,Learning vocabulary,Spellings,4,text,text,,7,French,...,,सौष्ठव,सौश्ठव,सोष्ठव,सौष्ठव,-सौश्ठव-सोष्ठव-सौष्ठव-सौष्थव,2,,,
4,14,MCQ,Learning vocabulary,Spellings,5,text,text,,9,French,...,,आदर्श,आदर्स,आडर्श,आदर्ष,-आदर्स-आडर्श-आदर्ष-आदर्श,3,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
602,14,MCQ,Learning vocabulary,Spellings,603,text,text,,992,French,...,,संक्रमण,शंक्रमण,सँक्रमण,संक्रमण,-शंक्रमण-सँक्रमण-संक्रमण-संक्रमन,2,,,
603,14,MCQ,Learning vocabulary,Spellings,604,text,text,,993,French,...,,बेहोश,बेहोष,बेहौश,बैहोश,-बेहोष-बेहौश-बैहोश-बेहोश,3,,,
604,14,MCQ,Learning vocabulary,Spellings,605,text,text,,994,French,...,,बदहज़मी,बडहज़मी,बदहज़मी,बदहज्ञ़मी,-बडहज़मी-बदहज़मी-बदहज्ञ़मी-बदहज़मि,1,,,
605,14,MCQ,Learning vocabulary,Spellings,606,text,text,,995,French,...,,फुड-पॉईज्निंग,फुड-पॉइज्निंग,फूड-पॉईज्निंग,फुड-पॉईज्नींग,-फुड-पॉइज्निंग-फूड-पॉईज्निंग-फुड-पॉईज्नींग-फुड...,3,,,


# Creating Useful Sentences Flashcards


In [16]:
def sentence_flashcards(df_source):
    data = []
    exo_id = 0
    for i in range(len(df_source)):
        if df_source["Hindi"][i] != "NaN":
            exo_id += 1
            data.append([
                "24",
                "Flashcards",
                "Useful Sentences",
                "",
                str(exo_id),
                "text",
                "text",
                str(df_source["ID"][i]),
                "",
                "French",
                "Hindi",
                str(df_source["Hindi"][i]),
                "",
                "",
                str(df_source["French"][i]),
                "",
                "",
                "",
                str(df_source["French"][i]),
                "0",
                "",
                "",
                "",
            ])
    return data

In [17]:
data_s_fc = sentence_flashcards(df_7000_sents_3)
df_s_fc = pd.DataFrame(data_s_fc, columns=cols)

df_s_fc

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,24,Flashcards,Useful Sentences,,1,text,text,6001,,French,...,,Oui,,,,Oui,0,,,
1,24,Flashcards,Useful Sentences,,2,text,text,6002,,French,...,,Non,,,,Non,0,,,
2,24,Flashcards,Useful Sentences,,3,text,text,6003,,French,...,,S'il-vous-plaît,,,,S'il-vous-plaît,0,,,
3,24,Flashcards,Useful Sentences,,4,text,text,6004,,French,...,,Merci,,,,Merci,0,,,
4,24,Flashcards,Useful Sentences,,5,text,text,6005,,French,...,,Je vous/t' en prie,,,,Je vous/t' en prie,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014,24,Flashcards,Useful Sentences,,1015,text,text,6996,,French,...,,Qu'est-ce-qu'il fait chaud aujourd'hui!,,,,Qu'est-ce-qu'il fait chaud aujourd'hui!,0,,,
1015,24,Flashcards,Useful Sentences,,1016,text,text,6997,,French,...,,Il y aura du brouillard demain,,,,Il y aura du brouillard demain,0,,,
1016,24,Flashcards,Useful Sentences,,1017,text,text,6998,,French,...,,Au voleur !,,,,Au voleur !,0,,,
1017,24,Flashcards,Useful Sentences,,1018,text,text,6999,,French,...,,On m'a volé,,,,On m'a volé,0,,,


# Creating Verb Conjugation MCQ Exercises


In [18]:
def verb_conjug_mcq(df_source, exo_id, cols):
    
    verb_conjug = {col: [] for col in cols}

    for i in range(len(df_source)):
        doc_hi_phrase = nlp_hi_stanza(str(df_source["Hindi"][i]))

        count = 0
        blank = []
        answer = []
        hint = []

        for sent in doc_hi_phrase.sentences:
            for word in sent.words:
                if word.upos in ["VERB", "AUX"] and word.text != word.lemma:
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            question = []

            for b in range(len(blank)):

                question.append(
                    " ".join([
                        word.text 
                        if word.id != blank[b] + 1
                        else "..." 
                        for word in sent.words 
                        for sent in doc_hi_phrase.sentences
                    ])
                )

                list_of_distractors = []
                list_of_distractors = spelling_distractors(nlp_hi_stanza(answer[b]), list_of_distractors)

                if list_of_distractors != []:
                    list_of_distractors += [word.text]
                    random.shuffle(list_of_distractors)
                    right_answer_id = list_of_distractors.index(word.text)
                    options = "".join(f"-{dist}" for dist in list_of_distractors)
                    
                    exo_id += 1

                    verb_conjug["Exo_type_id"].append("35")
                    verb_conjug["Exo_type"].append("MCQ")
                    verb_conjug["Exo_objective"].append("Grammar")
                    verb_conjug["Exo_focus"].append("Verb_Conjugation")
                    verb_conjug["Exo_id"].append(str(exo_id))
                    verb_conjug["Source_format"].append("text")
                    verb_conjug["Target_format"].append("text")
                    verb_conjug["Source_sentence_id"].append(
                        str(df_source["ID"][i]))
                    verb_conjug["Source_word_id"].append("")
                    verb_conjug["Source_lang"].append("French")
                    verb_conjug["Target_lang"].append("Hindi")
                    verb_conjug["Full_sentence"].append(
                        str(df_source["Hindi"][i]))
                    verb_conjug["Instruction"].append(
                        "Conjugate the verb correctly:")
                    verb_conjug["Sentence_w_blank"].append(str(f"{question[b]} ({hint[b]})"))
                    verb_conjug["Right_answer"].append(str(answer[b]))
                    verb_conjug["Dist_1"].append(list_of_distractors[0]),
                    verb_conjug["Dist_2"].append(list_of_distractors[1]),
                    verb_conjug["Dist_3"].append(list_of_distractors[2]),
                    verb_conjug["Propositions"].append(options),
                    verb_conjug["Right_answer_id"].append(str(right_answer_id)),
                    verb_conjug["Explanation"].append("")
                    verb_conjug["Difficulty"].append("")
                    verb_conjug["Remediation"].append("")

    return exo_id, verb_conjug

In [19]:
exo_id = 0

exo_id, data_v_conjug_mcq_1 = verb_conjug_mcq(df_7000_sents_1, exo_id, cols)
df_v_conjug_mcq_1 = pd.DataFrame(data_v_conjug_mcq_1, columns=cols)

exo_id, data_v_conjug_mcq_2 = verb_conjug_mcq(df_7000_sents_2, exo_id, cols)
df_v_conjug_mcq_2 = pd.DataFrame(data_v_conjug_mcq_2, columns=cols)

exo_id, data_v_conjug_mcq_3 = verb_conjug_mcq(df_7000_sents_3, exo_id, cols)
df_v_conjug_mcq_3 = pd.DataFrame(data_v_conjug_mcq_3, columns=cols)

frames_v_conjug_mcq = [df_v_conjug_mcq_1, df_v_conjug_mcq_2, df_v_conjug_mcq_3]
df_v_conjug_mcq = pd.concat(frames_v_conjug_mcq)
df_v_conjug_mcq

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,35,MCQ,Grammar,Verb_Conjugation,1,text,text,3,,French,...,स्वाधीनता मर्यादित ... है (हो),होती,होटी,है,होति,-होटी-है-होति-हौती,1,,,
1,35,MCQ,Grammar,Verb_Conjugation,2,text,text,15,,French,...,उसकी मा बहोत स्वादिष्ट पाय ... है (बना),बनाती,बनाटी,बणाती,है,-बनाटी-बणाती-है-बनाति,2,,,
2,35,MCQ,Grammar,Verb_Conjugation,3,text,text,26,,French,...,"बिल्ली बहोत उँचाई पर है , मैं वहाँतक पहोच नही ...",सकता,सकटा,षकता,शकता,-सकटा-षकता-शकता-सकता,3,,,
3,35,MCQ,Grammar,Verb_Conjugation,4,text,text,32,,French,...,वो हमेशा काले कपड़े ... है (पहेन),पहेनता,पहेणता,पहेनटा,है,-पहेणता-पहेनटा-है-पहैनता,2,,,
4,35,MCQ,Grammar,Verb_Conjugation,5,text,text,41,,French,...,दयालु होता है जब उसे तुम्हारी ज़रूरत ... है (हो),होती,होति,होटी,हौती,-होति-होटी-हौती-है,3,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218,35,MCQ,Grammar,Verb_Conjugation,2106,text,text,6957,,French,...,मुझे लगता है हम यहाँ बंद कर ... चाहिए लगता है ...,देना,देणा,दैना,डेना,-देणा-दैना-डेना-है,3,,,
219,35,MCQ,Grammar,Verb_Conjugation,2107,text,text,6966,,French,...,एक फुटबॉल मैच के लिए ... के बारे में क्या ? (जा),जाने,?,ज्ञाने,जाणे,-?-ज्ञाने-जाणे-जानै,0,,,
220,35,MCQ,Grammar,Verb_Conjugation,2108,text,text,6970,,French,...,आप मेरा एक काम कर ... हैं ? (सक),सकते,शकते,सकटे,?,-शकते-सकटे-?-सकतै,2,,,
221,35,MCQ,Grammar,Verb_Conjugation,2109,text,text,6981,,French,...,मैं अपने वकील से बात कर ... हैं ? (सक),सकते,सकटे,शकते,सकतै,-सकटे-शकते-सकतै-?,3,,,


# Creating Verb Conjugation Cloze Test Exercises


In [20]:
def verb_conjug_cloze_test(df_source, exo_id):
    data = []
    for i in range(len(df_source)):
        doc_hi_phrase = nlp_hi_stanza(str(df_source["Hindi"][i]))

        count = 0
        blank = []
        answer = []
        hint = []
        for sent in doc_hi_phrase.sentences:
            for word in sent.words:
                if word.upos in ["VERB", "AUX"] and word.text != word.lemma:
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            question = []

            for b in range(len(blank)):
                exo_id += 1
                question.append(
                    " ".join([
                        word.text 
                        if word.id != blank[b] + 1 
                        else "..." 
                        for word in sent.words 
                        for sent in doc_hi_phrase.sentences
                    ])
                )
                data.append([
                    "38", 
                    "Cloze_Test", 
                    "Grammar", 
                    "Verb_Conjugation", 
                    str(exo_id), 
                    "text", 
                    "text", 
                    str(df_source["ID"][i]), 
                    "", 
                    "French", 
                    "Hindi", 
                    str(df_source["Hindi"][i]), 
                    "Conjugate the verb correctly:", 
                    f"{question[b]} ({hint[b]})", 
                    str(answer[b]), 
                    "", 
                    "", 
                    "", 
                    str(answer[b]),
                    "0", 
                    "", 
                    "", 
                    "",
                ])

    return exo_id, data

In [21]:
exo_id = 0

exo_id, data_v_conjug_ct_1 = verb_conjug_cloze_test(df_7000_sents_1, exo_id)
df_v_conjug_ct_1 = pd.DataFrame(data_v_conjug_ct_1, columns=cols)

exo_id, data_v_conjug_ct_2 = verb_conjug_cloze_test(df_7000_sents_2, exo_id)
df_v_conjug_ct_2 = pd.DataFrame(data_v_conjug_ct_2, columns=cols)

exo_id, data_v_conjug_ct_3 = verb_conjug_cloze_test(df_7000_sents_3, exo_id)
df_v_conjug_ct_3 = pd.DataFrame(data_v_conjug_ct_3, columns=cols)

frames_v_conjug_ct = [df_v_conjug_ct_1, df_v_conjug_ct_2, df_v_conjug_ct_3]
df_v_conjug_ct = pd.concat(frames_v_conjug_ct)
df_v_conjug_ct

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,38,Cloze_Test,Grammar,Verb_Conjugation,1,text,text,3,,French,...,स्वाधीनता मर्यादित ... है (हो),होती,,,,होती,0,,,
1,38,Cloze_Test,Grammar,Verb_Conjugation,2,text,text,4,,French,...,"स्वतंत्रता , समता और बंधुभाव ये फ्रेंच रिपूब्ल...",थे,,,,थे,0,,,
2,38,Cloze_Test,Grammar,Verb_Conjugation,3,text,text,5,,French,...,उसने आपको दया की भावना से मदद नही ... (कर),की,,,,की,0,,,
3,38,Cloze_Test,Grammar,Verb_Conjugation,4,text,text,6,,French,...,उसकी दुष्टता की कोई सीमा नही ... (था),थी,,,,थी,0,,,
4,38,Cloze_Test,Grammar,Verb_Conjugation,5,text,text,7,,French,...,उसके लालित्य और सौष्ठव से सभा प्रभावित हो ... ...,गयी,,,,गयी,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,38,Cloze_Test,Grammar,Verb_Conjugation,9571,text,text,6995,,French,...,इंटरनेट का काम नहीं ... (कर),करता,,,,करता,0,,,
1072,38,Cloze_Test,Grammar,Verb_Conjugation,9572,text,text,6997,,French,...,यह धूमिल कल ... (हो),होगा,,,,होगा,0,,,
1073,38,Cloze_Test,Grammar,Verb_Conjugation,9573,text,text,6999,,French,...,मैं चोरी हो ... था (जा),गया,,,,गया,0,,,
1074,38,Cloze_Test,Grammar,Verb_Conjugation,9574,text,text,7000,,French,...,आप तोड़ ... हैं (रह),रहे,,,,रहे,0,,,


# Merge All Exercise Dataframes


In [22]:
frames_hi_exercises = [df_w_fc, df_spelling_mcq, df_s_fc, df_v_conjug_mcq, df_v_conjug_ct]
df_hi_exercises = pd.concat(frames_hi_exercises)
df_hi_exercises

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Sentence_w_blank,Right_answer,Dist_1,Dist_2,Dist_3,Propositions,Right_answer_id,Explanation,Difficulty,Remediation
0,10,Flashcards,Learning vocabulary,,1,text,text,,2,French,...,,Verité,,,,Verité,0,,,
1,10,Flashcards,Learning vocabulary,,2,text,text,,3,French,...,,Liberté,,,,Liberté,0,,,
2,10,Flashcards,Learning vocabulary,,3,text,text,,3,French,...,,Liberté,,,,Liberté,0,,,
3,10,Flashcards,Learning vocabulary,,4,text,text,,4,French,...,,Fraternité,,,,Fraternité,0,,,
4,10,Flashcards,Learning vocabulary,,5,text,text,,5,French,...,,Bonté,,,,Bonté,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,38,Cloze_Test,Grammar,Verb_Conjugation,9571,text,text,6995,,French,...,इंटरनेट का काम नहीं ... (कर),करता,,,,करता,0,,,
1072,38,Cloze_Test,Grammar,Verb_Conjugation,9572,text,text,6997,,French,...,यह धूमिल कल ... (हो),होगा,,,,होगा,0,,,
1073,38,Cloze_Test,Grammar,Verb_Conjugation,9573,text,text,6999,,French,...,मैं चोरी हो ... था (जा),गया,,,,गया,0,,,
1074,38,Cloze_Test,Grammar,Verb_Conjugation,9574,text,text,7000,,French,...,आप तोड़ ... हैं (रह),रहे,,,,रहे,0,,,


# Extract Exercise Dataframe to an Excel File


In [23]:
df_hi_exercises.to_excel("Hindi_Exercises.xlsx", index=False)