In [88]:
import openai
from dotenv import load_dotenv
import os
import re
import random

load_dotenv()

class OpenAIGenerator():
    
    def __init__(self) -> None:
        ''' init '''
        self.API_KEY = os.environ.get("OPENAI_API_KEY")
        openai.api_key = self.API_KEY
        self.models = openai.Model.list()
        self.engines = openai.Engine.list()
    
    def generate_random_answer_option(self):
        potential_answers = ['A', 'B', 'C', 'D']
        return random.choice(potential_answers)
    
    def format_text_regex(self, text, ans):
        
        lines = text.strip().split("\n")
        # question_pattern = re.compile(r"\s*(?P<question>.+\?)")
        question_pattern = re.compile(r"\s*(?P<question>.+\?)|(Q:|Question:)\s*.+")
        answer_pattern = re.compile(r"({}:|{}\.|{}\))\s*(?P<answer>.+)?".format(ans, ans, ans))
        choice_pattern = re.compile(r"[a-zA-Z]+(:|\.|\))\s*(?P<choice>.+)")
        distractors = []

        for line in lines:
            question_match = question_pattern.search(line)
            answer_match = answer_pattern.search(line)
            choice_match = choice_pattern.search(line)
            if question_match:
                question = question_match.group('question').replace('\n', '')
            elif answer_match:
                answer = answer_match.group('answer')
            elif choice_match:
                choice = choice_match.group('choice')
                distractors.append(choice)
        
        return question, answer, distractors

    def select_language(self, lang):
        
        if lang == 'fr':
            return 'in french '
        else:
            return ''
    
    def generate(self, context: str, desired_count: int = 1, lang='en') -> tuple:
        
        questions_list = []
        answers_list = []
        distractors_list = []
        
        answer_option = self.generate_random_answer_option()
        language = self.select_language(lang)
        
        response = openai.Completion.create(
            engine="text-davinci-003",
            prompt=(f'''generate {desired_count} question with 4 options {language}on the following text: "{context}". the true answer is option {answer_option}'''),
            max_tokens=256,
            n = 1,
            stop = None,
            temperature = 0.2,
            top_p = 1,
        )
        
        print(f'''generate {desired_count} question with 4 options on the following text: "{context}". the true answer is option {answer_option}''')
        
        output = response["choices"]

        for qad_pair in output:
            print(qad_pair)
            try:
                question, answer, distractors = self.format_text_regex(qad_pair['text'], answer_option)
            except Exception:
                break
            questions_list.append(question)
            answers_list.append(answer)
            distractors_list.append(distractors)

        if len(answers_list) < 1:
            answers_list.append('')
            
        if len(questions_list) < 1:
            questions_list.append('')
            
        if len(distractors_list) < 1:
            questions_list.append([])
            
        while len(distractors_list[0]) < 3:
            distractors_list[0].append('')
        
        return answers_list[0], questions_list[0], distractors_list[0][0], distractors_list[0][1], distractors_list[0][2]
    


In [89]:
generator = OpenAIGenerator()

In [90]:
context = '''Program execution might be likened to reading a book. While a person will normally read each word and line in sequence, they may at times jump back to an earlier place in the text or skip sections that are not of interest. Similarly, a computer may sometimes go back and repeat the instructions in some section of the program over and over again until some internal condition is met. This is called the flow of control within the program and it is what allows the computer to perform tasks repeatedly without human intervention.'''

In [91]:
output = generator.generate(context)

generate 1 question with 4 options on the following text: "Program execution might be likened to reading a book. While a person will normally read each word and line in sequence, they may at times jump back to an earlier place in the text or skip sections that are not of interest. Similarly, a computer may sometimes go back and repeat the instructions in some section of the program over and over again until some internal condition is met. This is called the flow of control within the program and it is what allows the computer to perform tasks repeatedly without human intervention.". the true answer is option A
{
  "finish_reason": "stop",
  "index": 0,
  "logprobs": null,
  "text": "\n\nQ: What is the flow of control within a program?\nA. The instructions that are repeated until an internal condition is met\nB. The sequence of instructions that are read by the computer\nC. The ability of a computer to perform tasks without human intervention\nD. The ability of a person to jump back to 

In [92]:
output

('The instructions that are repeated until an internal condition is met',
 'Q: What is the flow of control within a program?',
 'The sequence of instructions that are read by the computer',
 'The ability of a computer to perform tasks without human intervention',
 'The ability of a person to jump back to an earlier place in the text')

In [62]:
answers

["Setting the map's agenda and selecting traits of the object to be mapped"]

In [23]:
distractors

[['Representing the terrain of the mapped object on flat media',
  'Eliminating characteristics of the mapped object that are not relevant',
  'Reducing the complexity of the characteristics that will be mapped',
  'Orchestrating the elements of the map to best convey its message']]