# Import required libraries


In [1]:
import random
import re
from pprint import pprint

import pandas as pd
import requests
import stanza


# Import all sheets from the 7000 Sentences dataset


In [2]:
df_7000_sents_1 = pd.read_excel(
    "../7000 Sentences Corpus With IDs.xlsx",
    sheet_name="3000",
    usecols=["ID", "French", "Hindi"],
    na_values=['NA'],
)

df_7000_sents_2 = pd.read_excel(
    "../7000 Sentences Corpus With IDs.xlsx",
    sheet_name="6000",
    usecols=["ID", "French", "Hindi"],
    na_values=['NA'],
)

df_7000_sents_3 = pd.read_excel(
    "../7000 Sentences Corpus With IDs.xlsx",
    sheet_name="1000",
    usecols=["ID", "French", "Hindi"],
    na_values=['NA'],
)

# Import the 3000 Words dataset


In [3]:
df_3000_words = pd.read_excel(
    "../3000 Hindi Words Corpus With IDs.xlsx",
    sheet_name="Feuille1",
    usecols=["ID", "French Word", "Hindi Word"],
    na_values=["NA"],
)

# df_6000_words = pd.read_excel(
#     "../6000 Hindi Words Corpus With IDs.xlsx",
#     usecols=["ID", "French Word", "Hindi Word"],
#     na_values=["NA"],
# )

# Download a Stanza Language Model for Hindi and French into the directory "../stanza_models"


In [4]:
stanza.download(lang='hi', model_dir='../stanza_models')
# stanza.download(lang='fr', model_dir='../stanza_models')

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-09-28 17:57:12 INFO: Downloading default packages for language: hi (Hindi) ...


Downloading https://huggingface.co/stanfordnlp/stanza-hi/resolve/v1.4.1/models/default.zip:   0%|          | 0…

2022-09-28 17:57:29 INFO: Finished downloading models and saved to ../stanza_models.


# Initialize a Stanza pipeline with a language model for Hindi and French

## Which is assigned to the variable 'nlp_hi' and 'nlp_fr' using the Pipeline() class


In [5]:
nlp_hi_stanza = stanza.Pipeline(lang='hi', dir='../stanza_models')
# nlp_fr_stanza = stanza.Pipeline(lang='fr', dir='../stanza_models')

2022-09-28 17:57:30 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-09-28 17:57:30 INFO: Loading these models for language: hi (Hindi):
| Processor | Package |
-----------------------
| tokenize  | hdtb    |
| pos       | hdtb    |
| lemma     | hdtb    |
| depparse  | hdtb    |

2022-09-28 17:57:30 INFO: Use device: cpu
2022-09-28 17:57:30 INFO: Loading: tokenize
2022-09-28 17:57:30 INFO: Loading: pos
2022-09-28 17:57:31 INFO: Loading: lemma
2022-09-28 17:57:31 INFO: Loading: depparse
2022-09-28 17:57:31 INFO: Done loading processors!


# Distractor Creation Functions


## Spelling Distractors


In [6]:
# exchanges two letters in a given word
def exchange_letter(a, b, text, list_of_distractors):
    if (a in text):
        list_of_distractors.append(text.replace(a, b))

    if (b in text):
        list_of_distractors.append(text.replace(b, a))

    return (list_of_distractors)

# Mistakes with similar sounding and looking vowels
def vowel_changer(word, list_of_distractors):
    if ('इ' in word.text or 'ई' in word.text):
        list_of_distractors = exchange_letter(
            'इ', 'ई', word.text, list_of_distractors)

    if ('ि' in word.text or 'ी' in word.text):
        list_of_distractors = exchange_letter(
            'ि', 'ी', word.text, list_of_distractors)

    if ('उ' in word.text or 'ऊ' in word.text):
        list_of_distractors = exchange_letter(
            'उ', 'ऊ', word.text, list_of_distractors)

    if ('ु' in word.text or 'ू' in word.text):
        list_of_distractors = exchange_letter(
            'ु', 'ू', word.text, list_of_distractors)

    if ('ए' in word.text or 'ऐ' in word.text):
        list_of_distractors = exchange_letter(
            'ए', 'ऐ', word.text, list_of_distractors)

    if ('े' in word.text or 'ै' in word.text):
        list_of_distractors = exchange_letter(
            'े', 'ै', word.text, list_of_distractors)

    if ('ओ' in word.text or 'औ' in word.text):
        list_of_distractors = exchange_letter(
            'ओ', 'औ', word.text, list_of_distractors)

    if ('ो' in word.text or 'ौ' in word.text):
        list_of_distractors = exchange_letter(
            'ो', 'ौ', word.text, list_of_distractors)

    if ('अं' in word.text or 'अँ' in word.text):
        list_of_distractors = exchange_letter(
            'अं', 'अँ', word.text, list_of_distractors)

    if ('ं' in word.text or 'ँ' in word.text):
        list_of_distractors = exchange_letter(
            'ं', 'ँ', word.text, list_of_distractors)

    if ('ॉ' in word.text or 'ाँ' in word.text):
        list_of_distractors = exchange_letter(
            'ॉ', 'ाँ', word.text, list_of_distractors)

    if ('ॉ' in word.text or 'ां' in word.text):
        list_of_distractors = exchange_letter(
            'ॉ', ' ां', word.text, list_of_distractors)

    if ('ाँ' in word.text or 'ां' in word.text):
        list_of_distractors = exchange_letter(
            'ाँ', 'ां', word.text, list_of_distractors)

    return list_of_distractors


# Mistakes with similar sounding consonents
def consonent_changer(word, list_of_distractors):
    if ('ट' in word.text or 'त' in word.text):
        list_of_distractors = exchange_letter(
            'ट', 'त', word.text, list_of_distractors)

    if ('ठ' in word.text or 'थ' in word.text):
        list_of_distractors = exchange_letter(
            'ठ', 'थ', word.text, list_of_distractors)

    if ('ड' in word.text or 'द' in word.text):
        list_of_distractors = exchange_letter(
            'ड', 'द', word.text, list_of_distractors)

    if ('ढ' in word.text or 'ध' in word.text):
        list_of_distractors = exchange_letter(
            'ढ', 'ध', word.text, list_of_distractors)

    if ('न' in word.text or 'ण' in word.text):
        list_of_distractors = exchange_letter(
            'न', 'ण', word.text, list_of_distractors)

    if ('श' in word.text or 'ष' in word.text):
        list_of_distractors = exchange_letter(
            'श', 'ष', word.text, list_of_distractors)

    if ('श' in word.text or 'स' in word.text):
        list_of_distractors = exchange_letter(
            'श', 'स', word.text, list_of_distractors)

    if ('स' in word.text or 'ष' in word.text):
        list_of_distractors = exchange_letter(
            'स', 'ष', word.text, list_of_distractors)

    return list_of_distractors

# Mistakes with similar looking letters
def letter_changer(word, list_of_distractors):
    if ('ज' in word.text or 'ज्ञ' in word.text):
        list_of_distractors = exchange_letter(
            'ज', 'ज्ञ', word.text, list_of_distractors)

    if ('क्ष' in word.text or 'श्र' in word.text):
        list_of_distractors = exchange_letter(
            'क्ष', 'श्र', word.text, list_of_distractors)

    return list_of_distractors

# Create 3 spelling distractors for a given word
def spelling_distractors(word, list_of_distractors = []):
    list_of_distractors = vowel_changer(word, list_of_distractors)
    if len(list_of_distractors) < 3:
        list_of_distractors = consonent_changer(word, list_of_distractors)
        if len(list_of_distractors) < 3:
            list_of_distractors = letter_changer(word, list_of_distractors)

    if len(list_of_distractors) >= 3:
        list_of_distractors = list_of_distractors[:3]
    else:
        list_of_distractors = []

    return list_of_distractors


# Creating Vocabulary Flashcards


In [7]:
def word_flashcards(df_source, id = 0):
    data = []
    for i in range(len(df_source)):
        if df_source["Hindi Word"][i] != "NA":
            id += 1
            data.append([
                "10",
                "Flashcards",
                "Learning vocabulary",
                "",
                str(id),
                "text",
                "text",
                "",
                df_source["ID"][i],
                "French",
                "Hindi",
                str(df_source["French Word"][i]),
                "",
                "",
                str(df_source["Hindi Word"][i]),
                "",
                "",
                "",
                "",
            ])
    return id, data


# add exercise to dataset
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Options",
    "Explanation",
    "Difficulty",
    "Remediation",
]

id = 0

id, data_w_fc_1 = word_flashcards(df_3000_words, id)
df_w_fc_1 = pd.DataFrame(data_w_fc_1, columns=cols)

frames_w_fc = [df_w_fc_1]
df_w_fc = pd.concat(frames_w_fc)
df_w_fc


Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,Target_lang,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation
0,10,Flashcards,Learning vocabulary,,1,text,text,,1,French,Hindi,Beauté,,,,,,,
1,10,Flashcards,Learning vocabulary,,2,text,text,,2,French,Hindi,Verité,,,सच,,,,
2,10,Flashcards,Learning vocabulary,,3,text,text,,3,French,Hindi,Liberté,,,स्वतंत्रता/ स्वाधीनता,,,,
3,10,Flashcards,Learning vocabulary,,4,text,text,,4,French,Hindi,Fraternité,,,भाईचारा,,,,
4,10,Flashcards,Learning vocabulary,,5,text,text,,5,French,Hindi,Bonté,,,दया,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,10,Flashcards,Learning vocabulary,,2996,text,text,,2996,French,Hindi,Plombier,,,,,,,
2996,10,Flashcards,Learning vocabulary,,2997,text,text,,2997,French,Hindi,Garagiste,,,,,,,
2997,10,Flashcards,Learning vocabulary,,2998,text,text,,2998,French,Hindi,Démissionner,,,,,,,
2998,10,Flashcards,Learning vocabulary,,2999,text,text,,2999,French,Hindi,Informaticien,,,,,,,


# Create Spelling MCQ Exercises


In [8]:
def spelling_mcq(df_source, id):
    data = []

    spellings = {
        "Exo_type_id": [],
        "Exo_type": [],
        "Exo_objective": [],
        "Exo_focus": [],
        "Exo_id": [],
        "Source_format": [],
        "Target_format": [],
        "Source_sentence_id": [],
        "Source_word_id": [],
        "Source_lang": [],
        "Target_lang": [],
        "Full_sentence": [],
        "Instruction": [],
        "Sentence_w_blank": [],
        "Right_answer": [],
        "Options": [],
        "Explanation": [],
        "Difficulty": [],
        "Remediation": [],
    }


    for i in range(len(df_source)):
        doc_hi_phrase = nlp_hi_stanza(str(df_source["Hindi Word"][i]))

        for sent in doc_hi_phrase.sentences:
            for word in sent.words:
                list_of_distractors = []

                if word.text not in spellings["Right_answer"]:
                    list_of_distractors = spelling_distractors(word, list_of_distractors)

                    if list_of_distractors != []:
                        list_of_distractors += [word.text]
                        random.shuffle(list_of_distractors)

                        options = ""
                        for dist in list_of_distractors:
                            options +="-" + dist
                        
                        id+=1
                        spellings["Exo_type_id"].append(str("14"))
                        spellings["Exo_type"].append(str("MCQ"))
                        spellings["Exo_objective"].append(str("Learning vocabulary"))
                        spellings["Exo_focus"].append(str("Spellings"))
                        spellings["Exo_id"].append( str(id))
                        spellings["Source_format"].append(str("text"))
                        spellings["Target_format"].append(str("text"))
                        spellings["Source_sentence_id"].append(str(""))
                        spellings["Source_word_id"].append(str(df_source["ID"][i]))
                        spellings["Source_lang"].append(str("French"))
                        spellings["Target_lang"].append(str("Hindi"))
                        spellings["Full_sentence"].append(str(df_source["French Word"][i]))
                        spellings["Instruction"].append(str("Select the correct spelling:"))
                        spellings["Sentence_w_blank"].append(str(""))
                        spellings["Right_answer"].append(str(word.text))
                        spellings["Options"].append(options),
                        spellings["Explanation"].append(str(""))
                        spellings["Difficulty"].append(str(""))
                        spellings["Remediation"].append(str(""))

    return id, spellings


# add exercise to dataset
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Options",
    "Explanation",
    "Difficulty",
    "Remediation"
]

id = 0
id, data_spelling_mcq_1 = spelling_mcq(df_3000_words, id)
df_spelling_mcq_1 = pd.DataFrame(data_spelling_mcq_1, columns=cols)

frames_spelling_mcq = [df_spelling_mcq_1]
df_spelling_mcq = pd.concat(frames_spelling_mcq)
df_spelling_mcq

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,Target_lang,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation
0,14,MCQ,Learning vocabulary,Spellings,1,text,text,,3,French,Hindi,Liberté,Select the correct spelling:,,स्वतंत्रता/,-स्वतंत्रता/-श्वतंत्रता/-स्वतँत्रता/-स्वटंट्रटा/,,,
1,14,MCQ,Learning vocabulary,Spellings,2,text,text,,3,French,Hindi,Liberté,Select the correct spelling:,,स्वाधीनता,-स्वाधीनता-स्वाधीनटा-स्वाढीनता-स्वाधिनता,,,
2,14,MCQ,Learning vocabulary,Spellings,3,text,text,,6,French,Hindi,Méchanceté,Select the correct spelling:,,दुष्टता,-दुष्टटा-दूष्टता-दुष्टता-दुष्तता,,,
3,14,MCQ,Learning vocabulary,Spellings,4,text,text,,7,French,Hindi,Elégance,Select the correct spelling:,,लालित्य/सौष्ठव,-लालीत्य/सौष्ठव-लालिट्य/सौष्ठव-लालित्य/सौष्ठव-...,,,
4,14,MCQ,Learning vocabulary,Spellings,5,text,text,,9,French,Hindi,Idéal,Select the correct spelling:,,आदर्श,-आडर्श-आदर्श-आदर्स-आदर्ष,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
602,14,MCQ,Learning vocabulary,Spellings,603,text,text,,992,French,Hindi,Infection,Select the correct spelling:,,संक्रमण,-सँक्रमण-संक्रमण-शंक्रमण-संक्रमन,,,
603,14,MCQ,Learning vocabulary,Spellings,604,text,text,,993,French,Hindi,S'évanouir,Select the correct spelling:,,बेहोश,-बेहोष-बेहोश-बैहोश-बेहौश,,,
604,14,MCQ,Learning vocabulary,Spellings,605,text,text,,994,French,Hindi,Indigestion,Select the correct spelling:,,बदहज़मी,-बदहज्ञ़मी-बदहज़मी-बडहज़मी-बदहज़मि,,,
605,14,MCQ,Learning vocabulary,Spellings,606,text,text,,995,French,Hindi,Intoxication alimentaire,Select the correct spelling:,,फुड-पॉईज्निंग,-फूड-पॉईज्निंग-फुड-पॉईज्निंग-फुड-पॉईज्नींग-फुड...,,,


# Creating Useful Sentences Flashcards


In [9]:
def sentence_flashcards(df_source):
    data = []
    id = 0
    for i in range(len(df_source)):
        if df_source["Hindi"][i] != "NaN":
            id += 1
            data.append([
                "24",
                "Flashcards",
                "Useful sentence",
                "",
                str(id),
                "text",
                "text",
                df_source["ID"][i],
                "",
                "French",
                "Hindi",
                str(df_source["French"][i]),
                "",
                str(df_source["French"][i]),
                str(df_source["Hindi"][i]),
                "",
                "",
                "",
                ""
            ])
    return data


# add exercise to dataset
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Options",
    "Explanation",
    "Difficulty",
    "Remediation"
]

data_s_fc_3 = sentence_flashcards(df_7000_sents_3)
df_s_fc_3 = pd.DataFrame(data_s_fc_3, columns=cols)

frames_s_fc = [df_s_fc_3]
df_s_fc = pd.concat(frames_s_fc)
df_s_fc


Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,Target_lang,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation
0,24,Flashcards,Useful sentence,,1,text,text,6001,,French,Hindi,Oui,,Oui,हां,,,,
1,24,Flashcards,Useful sentence,,2,text,text,6002,,French,Hindi,Non,,Non,नहीं,,,,
2,24,Flashcards,Useful sentence,,3,text,text,6003,,French,Hindi,S'il-vous-plaît,,S'il-vous-plaît,कृपया,,,,
3,24,Flashcards,Useful sentence,,4,text,text,6004,,French,Hindi,Merci,,Merci,"धन्यवाद, धन्यवाद",,,,
4,24,Flashcards,Useful sentence,,5,text,text,6005,,French,Hindi,Je vous/t' en prie,,Je vous/t' en prie,कोई बात नहीं.,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,24,Flashcards,Useful sentence,,996,text,text,6996,,French,Hindi,Qu'est-ce-qu'il fait chaud aujourd'hui!,,Qu'est-ce-qu'il fait chaud aujourd'hui!,यह गर्म आज नहीं है!,,,,
996,24,Flashcards,Useful sentence,,997,text,text,6997,,French,Hindi,Il y aura du brouillard demain,,Il y aura du brouillard demain,यह धूमिल कल होगा,,,,
997,24,Flashcards,Useful sentence,,998,text,text,6998,,French,Hindi,Au voleur !,,Au voleur !,चोर बंद करो!,,,,
998,24,Flashcards,Useful sentence,,999,text,text,6999,,French,Hindi,On m'a volé,,On m'a volé,मैं चोरी हो गया था,,,,


# Creating Verb Conjugation MCQ Exercises


In [10]:
def verb_conjug_mcq(df_source, id):
    verb_conjug = {
        "Exo_type_id": [],
        "Exo_type": [],
        "Exo_objective": [],
        "Exo_focus": [],
        "Exo_id": [],
        "Source_format": [],
        "Target_format": [],
        "Source_sentence_id": [],
        "Source_word_id": [],
        "Source_lang": [],
        "Target_lang": [],
        "Full_sentence": [],
        "Instruction": [],
        "Sentence_w_blank": [],
        "Right_answer": [],
        "Options": [],
        "Explanation": [],
        "Difficulty": [],
        "Remediation": [],
    }
    for i in range(len(df_source)):
        doc_hi_phrase = nlp_hi_stanza(str(df_source["Hindi"][i]))

        count = 0
        blank = []
        answer = []
        hint = []
        question = []

        for sent in doc_hi_phrase.sentences:
            for word in sent.words:
                if ((word.upos == "VERB" or word.upos == "AUX") and word.text != word.lemma):
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            for b in range(len(blank)):

                question.append(
                    " ".join(
                        [word.text if word.id != blank[b] +
                         1 else "..." for word in sent.words for sent in doc_hi_phrase.sentences]
                    )
                )

                list_of_distractors = []
                list_of_distractors = spelling_distractors(
                    nlp_hi_stanza(answer[b]), list_of_distractors)

                if list_of_distractors != []:
                    list_of_distractors += [word.text]
                    random.shuffle(list_of_distractors)

                    options = ""
                    for dist in list_of_distractors:
                        options +="-" + dist

                    id+=1

                    verb_conjug["Exo_type_id"].append(str("35"))
                    verb_conjug["Exo_type"].append(str("MCQ"))
                    verb_conjug["Exo_objective"].append(str("Grammar"))
                    verb_conjug["Exo_focus"].append(
                        str("Verb Conjugation"))
                    verb_conjug["Exo_id"].append(str(id))
                    verb_conjug["Source_format"].append(str("text"))
                    verb_conjug["Target_format"].append(str("text"))
                    verb_conjug["Source_sentence_id"].append(
                        str(df_source["ID"][i]))
                    verb_conjug["Source_word_id"].append(str(""))
                    verb_conjug["Source_lang"].append(str("French"))
                    verb_conjug["Target_lang"].append(str("Hindi"))
                    verb_conjug["Full_sentence"].append(
                        str(df_source["French"][i]))
                    verb_conjug["Instruction"].append(
                        str("Conjugate the verb correctly:"))
                    verb_conjug["Sentence_w_blank"].append(
                        str(question[b] + " (" + hint[b] + ")"))
                    verb_conjug["Right_answer"].append(str(answer[b]))
                    verb_conjug["Options"].append(options),
                    verb_conjug["Explanation"].append(str(""))
                    verb_conjug["Difficulty"].append(str(""))
                    verb_conjug["Remediation"].append(str(""))

    return id, verb_conjug


# add exercise to dataset
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "Options",
    "Explanation",
    "Difficulty",
    "Remediation"
]

id = 0

id, data_v_conjug_mcq_1 = verb_conjug_mcq(df_7000_sents_1, id)
df_v_conjug_mcq_1 = pd.DataFrame(data_v_conjug_mcq_1, columns=cols)

id, data_v_conjug_mcq_2 = verb_conjug_mcq(df_7000_sents_2, id)
df_v_conjug_mcq_2 = pd.DataFrame(data_v_conjug_mcq_2, columns=cols)

id, data_v_conjug_mcq_3 = verb_conjug_mcq(df_7000_sents_3, id)
df_v_conjug_mcq_3 = pd.DataFrame(data_v_conjug_mcq_3, columns=cols)

frames_v_conjug_mcq = [df_v_conjug_mcq_1, df_v_conjug_mcq_2, df_v_conjug_mcq_3]
df_v_conjug_mcq = pd.concat(frames_v_conjug_mcq)
df_v_conjug_mcq


Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,Target_lang,Full_sentence,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation
0,35,MCQ,Grammar,Verb Conjugation,1,text,text,3,,French,Hindi,"Dans une dictature, la liberté d'expression es...",Conjugate the verb correctly:,तानाशाही मे अभिव्यक्ति की स्वतंत्रता/ स्वाधीनत...,होती,-होति-है-होटी-हौती,,,
1,35,MCQ,Grammar,Verb Conjugation,2,text,text,15,,French,Hindi,Sa mère fait de très bonnes tartes,Conjugate the verb correctly:,उसकी मा बहोत स्वादिष्ट पाय ... है (बना),बनाती,-बनाति-बनाटी-बणाती-है,,,
2,35,MCQ,Grammar,Verb Conjugation,3,text,text,26,,French,Hindi,"Le chat est trop haut, je n'arrive pas à l'att...",Conjugate the verb correctly:,"बिल्ली बहोत उँचाई पर है , मैं वहाँतक पहोच नही ...",सकता,-सकटा-षकता-सकता-शकता,,,
3,35,MCQ,Grammar,Verb Conjugation,4,text,text,30,,French,Hindi,"C'est un petit pas pour l'homme, mais un grand...",Conjugate the verb correctly:,एक मनुष्य के लिए ये भलेहि एक छोटा कदम हो पर मा...,जाती,-ज्ञाती-जाति-जाटी-है,,,
4,35,MCQ,Grammar,Verb Conjugation,5,text,text,32,,French,Hindi,Il s'habille toujours en noir,Conjugate the verb correctly:,वो हमेशा काले कपड़े ... है (पहेन),पहेनता,-पहेनटा-है-पहैनता-पहेणता,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,35,MCQ,Grammar,Verb Conjugation,2063,text,text,6957,,French,Hindi,Je pense qu'on devrait en rester là,Conjugate the verb correctly:,मुझे लगता है हम यहाँ बंद कर ... चाहिए लगता है ...,देना,-है-दैना-डेना-देणा,,,
220,35,MCQ,Grammar,Verb Conjugation,2064,text,text,6966,,French,Hindi,ça te dirait d'aller à un match de football?,Conjugate the verb correctly:,एक फुटबॉल मैच के लिए ... के बारे में क्या ? (जा),जाने,-जानै-जाणे-?-ज्ञाने,,,
221,35,MCQ,Grammar,Verb Conjugation,2065,text,text,6970,,French,Hindi,Est-ce que tu pourrais me rendre un service?,Conjugate the verb correctly:,आप मेरा एक काम कर ... हैं ? (सक),सकते,-?-शकते-सकटे-सकतै,,,
222,35,MCQ,Grammar,Verb Conjugation,2066,text,text,6981,,French,Hindi,Est-ce que je peux parler à mon avocat?,Conjugate the verb correctly:,मैं अपने वकील से बात कर ... हैं ? (सक),सकते,-?-सकटे-शकते-सकतै,,,


# Creating Verb Conjugation Cloze Test Exercises


In [11]:
def verb_conjug_cloze_test(df_source, id):
    data = []
    for i in range(len(df_source)):
        doc_hi_phrase = nlp_hi_stanza(str(df_source["Hindi"][i]))

        count = 0
        blank = []
        answer = []
        hint = []
        question = []

        for sent in doc_hi_phrase.sentences:
            for word in sent.words:
                if ((word.upos == "VERB" or word.upos == "AUX") and word.text != word.lemma):
                    blank.append(count)
                    answer.append(word.text)
                    hint.append(word.lemma)

                count += 1

        if len(blank):
            for b in range(0, len(blank)):
                id += 1
                question.append(" ".join(
                    [word.text if word.id != blank[b] + 1 else "..." for word in sent.words for sent in doc_hi_phrase.sentences]))
                data.append([
                    "38",
                    "Cloze_Test",
                    "Verb_Conjugation",
                    "Grammar",
                    str(id),
                    "text",
                    "text",
                    str(df_source["ID"][i]),
                    "",
                    "French",
                    "Hindi",
                    str(df_source["Hindi"][i]),
                    "Conjugate the verb correctly:",
                    question[b] + " (" + hint[b] + ")",
                    str(answer[b]),
                    "",
                    "",
                    "",
                    "",
                    "",
                    ""
                ])
    return id, data


# add exercise to dataset
cols = [
    "Exo_type_id",
    "Exo_type",
    "Exo_objective",
    "Exo_focus",
    "Exo_id",
    "Source_format",
    "Target_format",
    "Source_sentence_id",
    "Source_word_id",
    "Source_lang",
    "Target_lang",
    "Full_sentence",
    "Instruction",
    "Sentence_w_blank",
    "Right_answer",
    "dist_1",
    "dist_2",
    "dist_3",
    "Explanation",
    "Difficulty",
    "Remediation"
]

id = 0

id, data_v_conjug_ct_1 = verb_conjug_cloze_test(df_7000_sents_1, id)
df_v_conjug_ct_1 = pd.DataFrame(data_v_conjug_ct_1, columns=cols)

id, data_v_conjug_ct_2 = verb_conjug_cloze_test(df_7000_sents_2, id)
df_v_conjug_ct_2 = pd.DataFrame(data_v_conjug_ct_2, columns=cols)

id, data_v_conjug_ct_3 = verb_conjug_cloze_test(df_7000_sents_3, id)
df_v_conjug_ct_3 = pd.DataFrame(data_v_conjug_ct_3, columns=cols)

frames_v_conjug_ct = [df_v_conjug_ct_1, df_v_conjug_ct_2, df_v_conjug_ct_3]
df_v_conjug_ct = pd.concat(frames_v_conjug_ct)
df_v_conjug_ct

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Full_sentence,Instruction,Sentence_w_blank,Right_answer,dist_1,dist_2,dist_3,Explanation,Difficulty,Remediation
0,38,Cloze_Test,Verb_Conjugation,Grammar,1,text,text,3,,French,...,तानाशाही मे अभिव्यक्ति की स्वतंत्रता/ स्वाधीनत...,Conjugate the verb correctly:,तानाशाही मे अभिव्यक्ति की स्वतंत्रता/ स्वाधीनत...,होती,,,,,,
1,38,Cloze_Test,Verb_Conjugation,Grammar,2,text,text,4,,French,...,"स्वतंत्रता, समता और बंधुभाव ये फ्रेंच रिपूब्ली...",Conjugate the verb correctly:,"स्वतंत्रता , समता और बंधुभाव ये फ्रेंच रिपूब्ल...",थे,,,,,,
2,38,Cloze_Test,Verb_Conjugation,Grammar,3,text,text,5,,French,...,उसने आपको दया की भावना से मदद नही की,Conjugate the verb correctly:,उसने आपको दया की भावना से मदद नही ... (कर),की,,,,,,
3,38,Cloze_Test,Verb_Conjugation,Grammar,4,text,text,6,,French,...,उसकी दुष्टता की कोई सीमा नही थी,Conjugate the verb correctly:,उसकी दुष्टता की कोई सीमा नही ... (था),थी,,,,,,
4,38,Cloze_Test,Verb_Conjugation,Grammar,5,text,text,7,,French,...,उसके लालित्य और सौष्ठव से सभा प्रभावित हो गयी,Conjugate the verb correctly:,उसके लालित्य और सौष्ठव से सभा प्रभावित हो ... ...,गयी,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1083,38,Cloze_Test,Verb_Conjugation,Grammar,9596,text,text,6995,,French,...,इंटरनेट का काम नहीं करता,Conjugate the verb correctly:,इंटरनेट का काम नहीं ... (कर),करता,,,,,,
1084,38,Cloze_Test,Verb_Conjugation,Grammar,9597,text,text,6997,,French,...,यह धूमिल कल होगा,Conjugate the verb correctly:,यह धूमिल कल ... (हो),होगा,,,,,,
1085,38,Cloze_Test,Verb_Conjugation,Grammar,9598,text,text,6999,,French,...,मैं चोरी हो गया था,Conjugate the verb correctly:,मैं चोरी हो ... था (जा),गया,,,,,,
1086,38,Cloze_Test,Verb_Conjugation,Grammar,9599,text,text,7000,,French,...,आप तोड़ रहे हैं,Conjugate the verb correctly:,आप तोड़ ... हैं (रह),रहे,,,,,,


# Merge All Exercise Dataframes


In [12]:
frames_hi_exercises = [df_w_fc, df_spelling_mcq, df_s_fc, df_v_conjug_mcq, df_v_conjug_ct]
df_hi_exercises = pd.concat(frames_hi_exercises)
df_hi_exercises

Unnamed: 0,Exo_type_id,Exo_type,Exo_objective,Exo_focus,Exo_id,Source_format,Target_format,Source_sentence_id,Source_word_id,Source_lang,...,Instruction,Sentence_w_blank,Right_answer,Options,Explanation,Difficulty,Remediation,dist_1,dist_2,dist_3
0,10,Flashcards,Learning vocabulary,,1,text,text,,1,French,...,,,,,,,,,,
1,10,Flashcards,Learning vocabulary,,2,text,text,,2,French,...,,,सच,,,,,,,
2,10,Flashcards,Learning vocabulary,,3,text,text,,3,French,...,,,स्वतंत्रता/ स्वाधीनता,,,,,,,
3,10,Flashcards,Learning vocabulary,,4,text,text,,4,French,...,,,भाईचारा,,,,,,,
4,10,Flashcards,Learning vocabulary,,5,text,text,,5,French,...,,,दया,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1083,38,Cloze_Test,Verb_Conjugation,Grammar,9596,text,text,6995,,French,...,Conjugate the verb correctly:,इंटरनेट का काम नहीं ... (कर),करता,,,,,,,
1084,38,Cloze_Test,Verb_Conjugation,Grammar,9597,text,text,6997,,French,...,Conjugate the verb correctly:,यह धूमिल कल ... (हो),होगा,,,,,,,
1085,38,Cloze_Test,Verb_Conjugation,Grammar,9598,text,text,6999,,French,...,Conjugate the verb correctly:,मैं चोरी हो ... था (जा),गया,,,,,,,
1086,38,Cloze_Test,Verb_Conjugation,Grammar,9599,text,text,7000,,French,...,Conjugate the verb correctly:,आप तोड़ ... हैं (रह),रहे,,,,,,,


# Extract Exercise Dataframe to an Excel File


In [None]:
df_hi_exercises.to_excel("Hindi_Exercises.xlsx", index=False)