# 0. Import Dependencies, Load Questions and Answers, Create folder for storing augmented FAQ

In [None]:
from Bani.Bani import Bani
from Bani.core.FAQ import FAQ
from Bani.core.generation import GenerateManager
from Bani.generation.t5_paraphrase_gen.t5_paraphrase import T5Generator

import pickle
def load_dict(path):
    # Load .pickle format as dictionary
    with open(path, 'rb') as f:
        return pickle.load(f)

def extractQA(orignalDct):
    # Return list of questions and answers in list format
    # originalDct format: {"question_to_label": {<question 1>: <label that maps to answer>,...}, 
    #                      "answer_to_label": {<answer question 1>: <label for questions to be mapped to>}}
    # Handles 1-1 mapping of question-answer OR many-1 mapping of question-answer
    q2L = orignalDct["question_to_label"]
    a2L = orignalDct["answer_to_label"]
    

    l2A = dict()
    for a,l in a2L.items():
        l2A[l] = a


    questions = []
    answers = []

    for q,l in q2L.items():
        questions.append(q)
        answers.append(l2A[l])
    return questions , answers

In [None]:
original_dict = load_dict("./tutorialFAQs/baby_bonus_orignal.pkl")
babybonus_questions, babybonus_answers = extractQA(original_dict)


In [None]:
import os

faq_store = './t5_qqp_tutorial'

if not os.path.exists(faq_store):
    os.makedirs(faq_store)

# 1. Instantiate generatorManager

## 1a. Either use T5Generator on its Own

In [None]:
generate_n_paraphrases = 30
keep_top_k_paraphrases = 10

qqp_producer = T5Generator(model_path="t5_qqp", num_return=generate_n_paraphrases)

names = ["T5"]
quantity = [keep_top_k_paraphrases]

generatorManager = GenerateManager(
    producers=[
        qqp_producer,
    ],
    names=names,
    nums=quantity,
)

## 1b. Or use T5Generator with the other existing producers

In [None]:
generate_n_paraphrases = 30
keep_top_k_paraphrases = 10
qqp_producer = T5Generator(model_path="t5_qqp",
                           top_p=0.98, num_return=generate_n_paraphrases, max_len=128, top_k=120, is_early_stopping=True)

symsub_producer = SymSubGenerator(dummyEN("lite"))
fpm_producer = FPMGenerator()
eda_producer = EDAGenerator()

names = ["T5", "SymSub", "FPM", "EDA"]
quantity = [keep_top_k_paraphrases, 3, 3, 3]

generatorManager = GenerateManager(
    producers=[
        qqp_producer,
        symsub_producer,
        fpm_producer,
        eda_producer
    ],
    names=names,
    nums=quantity,
)


# 2. FAQ Augmentation!

In [None]:
babybonusFAQ = FAQ(name="babyBonus", questions=babybonus_questions, answers=babybonus_answers)

babybonusFAQ.buildFAQ(generatorManager)

babybonusFAQ.save(faq_store)

You will notice that your augmented FAQ will be successfully saved to faq_store. 

NOTE: Refer to `Tutorial.ipynb` in root directory on how to load the saved FAQ object and instantiate the chatbot using the loaded FAQ object. You may choose to either train the chatbot or use it directly, as shown in the `Tutorial.ipynb`.

# 3. Other Features

## 3a. Generate for a single question

In [None]:
target = "How can I save in the Child Development Account (CDA)?"
n = 40
k = 15
candidate_paraphrases = qqp_producer.adhoc_generate(input_question = target, generate_n_paraphrases = n, 
                                                    keep_top_k_paraphrases = k, original_questions = babybonus_questions)

print(f"Input Question: {input_question}\n")
print(f"Candidate Paraphrases: \n")

count = 1
for paraphrase in candidate_paraphrases:
    print(f"{count}. {paraphrase}")
    print("\n")
    count += 1

## 3b. Find Similar Questions within FAQ dataset

In [None]:
qqp_producer.find_similar_questions_within_faq(babybonus_questions)