In [1]:
"""
Used for making a quiz on https://quizizz.com/admin

To identify a word:
    • First use pattern.en to get all variations
    • If no match, use fuzzywuzzy to match a word. Note that for "meaning" there may not be a match

May need manual inspection:
    • See if the wrong word is marked
    • (optional) Suppose "in the mood" is desired instead of "mood" as the blank, then we need to manually change that.
    • What if we don't have a sent? For now we can just ignore such cases?

Input:
    • a list of PIE roots
    • percentage of old questions
    • time limit of each question
    • learned_roots.txt
Output:
    • fill-in-the-blank questions in .csv
    • updated learned_roots.txt
"""
import os
import random
import yaml
import xlsxwriter
from tqdm import tqdm
from pattern import en
from argparse import ArgumentParser

In [2]:
# TODO: add param "freq_threshold" so that any words above it are filtered out
# TODO: add iteration over related words
# TODO: add cmd_args

roots = ['*bheidh-', '*bheid-']

# Constants
HEADERS = [
    'Question Text',
    'Question Type',
    'Option 1',
    'Option 2',
    'Time in seconds']
SEP = ' ' + '|' * 3 + ' '
BLANK = '_' * 5

In [3]:
class Workbook:
    def __init__(self, file, time=300):
        self.file = file
        self.time = time

        self._row = 0

    def write(self, question, answer):
        assert isinstance(question, str) and isinstance(answer, str)
        self.worksheet.write(self._row, 0, question)
        self.worksheet.write(self._row, 1, 'Fill-in-the-Blank')
        self.worksheet.write(self._row, 2, answer)
        self.worksheet.write(self._row, 4, self.time)
        self._row += 1

    def close(self):
        self.workbook.close()

    @property
    def worksheet(self):
        if not hasattr(self, '_worksheet'):
            self._worksheet = self.workbook.add_worksheet()
            # Write headers
            for col, header in enumerate(HEADERS):
                self._worksheet.write(0, col, header)
            self._row += 1
        return self._worksheet

    @property
    def workbook(self):
        if not hasattr(self, '_workbook'):
            self._workbook = xlsxwriter.Workbook(self.file)
        return self._workbook

In [4]:
class Question:
    def __init__(self, word, meaning, subword2root, exp, sent=None):
        """
        :param word:
        :param exp:
        :param sent: there may not be a sent
        :param subword2root: each root_dict is a dict of root --> root_exp
        """
        self.word = word.strip()
        self.meaning = meaning.strip()
        if sent is None:
            self.sent = sent
        else:
            self.sent = sent.strip()
        self.subword2root = subword2root
        for subword, root_str in self.subword2root.items():
            subword2root[subword] = root_str.strip()
        self.exp = exp.strip()

        self.answer_strs = []

    def get_question_and_answer(self):
        question = self._get_question()
        answer = self._get_answer()
        return question, answer

    def _get_question(self):
        """
        Example: word (answer) = blaze
        Question:
            1. Meaning: When a fire ____, it burns strongly and brightly.
            2. Sentence: Three people died as wreckage ____, and rescuers fought to release trapped drivers.
            3. Roots: __ = *bhel- | to shine, flash, burn
        Answers:
            1. blazes
            2. blazed
            3. bl (<'exp' field which explains the roots, etc...>)
        """
        # question = 'Instruction: please format your answer like "<blank_1>, <blank_2>, ..., <blank_n>" with NO extra spaces. Note that when filling the blanks, you need to get the inflections (e.g., tense) right. \n\n'
        question = ''
        if self.sent_with_blank is not None:
            question += f'{self.sent_with_blank}{SEP}'
        question += f'{self.meaning_with_blank}{SEP}'
        question += f'Roots:'
        for root_with_blank in self.roots_with_blank:
            question += f' [ {root_with_blank} ]'
        return question

    def _get_answer(self):
        answer = ', '.join(self.answer_strs)
        # answer += SEP
        # answer += f'Explanation: {self.exp}'
        return answer

    def _get_sent_with_blank(self, sent):
        """
        :param word: any inflection of it could be matched
        :param sent: could be either exp or sent
        :return:
        """
        variations = []
        variations.extend(en.lexeme(self.word)) # include all inflections
        variations.append(en.pluralize(self.word))
        variations.append(en.singularize(self.word))
        variations.sort(key=len, reverse=True) # match with maximal munch
        cap_variations = [variation.capitalize() for variation in variations]
        variations.extend(cap_variations)
        for variation in variations:
            if variation in sent:
                # 5 b/c the avg. number of characters in an English word is about 4.9
                self.answer_strs.append(variation)
                sent = sent.replace(variation, BLANK)
                return sent
        else:
            return sent # return sent untouched when there is no match

    @property
    def roots_with_blank(self):
        if not hasattr(self, '_roots_with_blank'):
            self._roots_with_blank = [
                f'{BLANK}: {root_str}'
                for subword, root_str
                in self.subword2root.items()
            ]
            self.answer_strs.extend(list(self.subword2root.keys()))
        return self._roots_with_blank

    @property
    def meaning_with_blank(self):
        if not hasattr(self, '_meaning_with_blank'):
            self._meaning_with_blank = self._get_sent_with_blank(self.meaning)
        return self._meaning_with_blank

    @property
    def sent_with_blank(self):
        if not hasattr(self, '_sent_with_blank'):
            if self.sent is None:
                self._sent_with_blank = None
            else:
                self._sent_with_blank = self._get_sent_with_blank(self.sent)
        return self._sent_with_blank

In [5]:
def parse_args():
    parser = ArgumentParser()
    parser.add_argument('--')

In [6]:
if __name__ == '__main__':
    with open('book.yml') as fp:
        book = yaml.safe_load(fp)
        print('Loaded book.')
    # Filter entries with the desired PIE roots
    entries = [
        entry for entry in book
        if 'roots' in entry and len(set(entry['roots'].keys()) & set(roots)) != 0]
    random.shuffle(entries)

    path = 'quiz'
    if not os.path.exists(path):
        os.makedirs(path)
    filename = '_'.join(roots).replace('*', '').replace('-', '') + '.xlsx'
    file = os.path.join(path, filename)
    workbook = Workbook(file)
    for entry in tqdm(book):
        for root_str in entry['roots'].values():
            root = root_str.split('|')[0].strip()
            if root in roots:
                break
        else:
            continue
        kwargs = {}
        if 'sent' in entry:
            kwargs['sent'] = entry['sent']
        question_obj = Question(
            entry['word'], entry['meaning'], entry['roots'],
            entry['exp'], **kwargs)
        question, answer = question_obj.get_question_and_answer()
        workbook.write(question, answer)

    workbook.close()

100%|██████████| 356/356 [00:00<00:00, 10926.18it/s]


Loaded book.
