In [194]:
import os.path
from bs4 import BeautifulSoup as bs
import re

def get_pairs(file) -> dict:
    scrape = bs(file)
    all_questions = scrape.select('div.text')
    pairs = []
    for q in all_questions:
        question_text = clean_up(q.select("div[class='question_text user_content']")[0].text)
        answers = q.select('div[class="answer answer_for_ selected_answer correct_answer"]')
        answer_texts = []
        for a in answers:
            answer_texts.append(clean_up(a['title'], 'answer'))
        pairs.append([question_text, answer_texts])
    return pairs

def clean_up(txt, object_type=''):
    txt = str(txt).strip().replace('\n', '').replace(';', '.').replace('\t', ' ')
    if object_type == 'answer':
        txt = txt.replace(". This was the correct answer.", '')
        txt = txt.replace(". You selected this answer", '')
    return txt

def write_pairs(pairs: dict, location: str):
    with open(location, 'a', encoding="utf8") as f:
        for pair in pairs:
            answer_as_text = '\n'.join(pair[1])
            f.write(f"{pair[0]}\t{answer_as_text};\n")

def write_html_file(file, path):
    f = open(path, 'w')
    f.write(file)

def handle_input_output(input_path, output_path):
    split = os.path.split(input_path)
    output_path = split[0] + "/output/" + split[1].split('.')[0] + "-output.txt"

    file = open(input_path, "r", encoding="utf8").read().strip()
    pairs = get_pairs(file)

    if len(pairs)<1:
        print("Write Failed!")
        return

    write_pairs(pairs, output_path)

    print(f'Wrote to {output_path}.')


def get_HTML_paths_from_directory(dir_path):
    results = []
    for file in os.listdir(dir_path):
        if file.endswith(".html"):
            results.append(os.path.join(dir_path, file))
    return results

def main():
    # input_path = input("Enter the Input File Path")
    # input_path = "/Users/zak/dev/canvas-to-quizlet/examples/example_test_1.html"
    # perform_actions(input_path)

    html_paths = get_HTML_paths_from_directory("/Users/zak/dev/canvas-to-quizlet/examples")

    output_path = "/Users/zak/dev/canvas-to-quizlet/examples/full-size-output.txt"
    
    for f in html_paths:
        print(f)
        handle_input_output(f, output_path)
        print("\n")
    

if __name__ == "__main__":
    main()