In [6]:

import os
import openai

from openai import OpenAI
from langchain.prompts import HumanMessagePromptTemplate
from llama_index import download_loader, SimpleDirectoryReader


client = OpenAI()

In [7]:
from ragas.testset.prompts import SEED_QUESTION, EVOLUTION_ELIMINATION, REWRITE_QUESTION, FILTER_QUESTION

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
def llm2(prompt, **kwargs):
    response = client.chat.completions.create(
        model=kwargs.get("model", "gpt-3.5-turbo-16k"),
        messages=[{"role": "system", "content": prompt}],
        temperature=kwargs.get("temperature", 0),
        top_p=kwargs.get("top_p", 1),
        frequency_penalty=kwargs.get("frequency_penalty", 0.0),
        presence_penalty=kwargs.get("presence_penalty", 0.0),
        max_tokens=kwargs.get("max_tokens", 500),
        n=kwargs.get("n", 1),
    )
    return response

In [6]:
reader = SimpleDirectoryReader("./arxiv-papers/",num_files_limit=5)
documents = reader.load_data()

In [129]:
seed_question = """
Generate two questions from given context satisfying the rules given below:
    2.The question should be framed such that it must be clearly understood without providing context.
    3.The question should be fully answerable from information present in given context.
    
{demonstration}


Context:
{context}
Questions: """

In [18]:
demonstrations = [
    {"context":"The Eiffel Tower in Paris was originally intended as a temporary structure, built for the 1889 World's Fair. It was almost dismantled in 1909 but was saved because it was repurposed as a giant radio antenna.",
     "questions":[
     {"question_Why": "Why was the Eiffel Tower originally planned to be a temporary structure?"},
     {"question_Was":"Was the Eiffel Tower originally designed to be a permanent structure?"},
     {"question_What":"What was the original purpose of the Eiffel Tower when it was built for the 1889 World's Fair?"},
     {"question_How":"How did the Eiffel Tower avoid being dismantled in 1909?"},
     {"question_Where":"Where is the Eiffel Tower?"}]     
    },
    
    {
      "context": "Photosynthesis is a process used by plants, algae, and certain bacteria to convert light energy into chemical energy.",
      "questions": [
        {"question_Why": "Why do plants perform photosynthesis?"},
        {"question_Was": "Was photosynthesis discovered in plants, algae, or bacteria first?"},
        {"question_What": "What converts light energy into chemical energy in photosynthesis?"},
        {"question_How": "How do plants capture light energy for photosynthesis?"},
        {"question_Where": "Where in plants does photosynthesis primarily occur?"},
        {"question_Can": "Can photosynthesis occur in the absence of light?"}
      ]
    },

]

In [19]:
import numpy as np
sample = np.random.choice(demonstrations,1)[0]
questions = np.random.choice(sample['questions'],2,replace=False)

In [20]:
questions = "{"+str({k:v for dic in questions.tolist() for k,v in dic.items()}).replace("'",'"')+"}"


In [21]:
questions

'{{"question_Why": "Why do plants perform photosynthesis?", "question_Was": "Was photosynthesis discovered in plants, algae, or bacteria first?"}}'

In [167]:
print(f'Context:{sample["context"]}\nQuestions:{questions}')

Context:The Eiffel Tower in Paris was originally intended as a temporary structure, built for the 1889 World's Fair. It was almost dismantled in 1909 but was saved because it was repurposed as a giant radio antenna.
Questions:{{'question_How': 'How did the Eiffel Tower avoid being dismantled in 1909?', 'question_Where': 'Where is the Eiffel Tower?'}}


In [124]:
context = documents[10].get_content()

In [125]:
output = llm2(seed_question.format(context=context))

In [126]:
output

ChatCompletion(id='chatcmpl-8Nbs8F3zJTbjhfxfcjtrYnUUMUHcY', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='{"question_what": "What are the two closed-set tasks evaluated in the experiments?",\n"question_how": "How does SELF-RAG tailor its behavior during the inference phase?"}', role='assistant', function_call=None, tool_calls=None))], created=1700637816, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=38, prompt_tokens=1148, total_tokens=1186))

In [31]:
print(context)

RETA-LLM: A Retrieval-Augmented Large Language Model Toolkit
Jiongnan Liu1, Jiajie Jin2, Zihan Wang1, Jiehan Cheng1, Zhicheng Dou1∗,andJi-Rong Wen1
1Gaoling School of Artificial Intelligence, Renmin University of China
2University of Science and Technology of China
1{liujn, wangzihan0527, jiehan_cheng, dou, jrwen}@ruc.edu.cn
2jinjiajie@mail.ustc.edu.cn
Abstract
Although Large Language Models (LLMs)
have demonstrated extraordinary capabilities
in many domains, they still have a tendency
to hallucinate and generate fictitious responses
to user requests. This problem can be allevi-
ated by augmenting LLMs with information
retrieval (IR) systems (also known as retrieval-
augmented LLMs). Applying this strategy,
LLMs can generate more factual texts in re-
sponse to user input according to the relevant
content retrieved by IR systems from external
corpora as references. In addition, by incorpo-
rating external knowledge, retrieval-augmented
LLMs can answer in-domain questions that
cannot be 

In [9]:
prompt = "\nGenerate two questions from given context satisfying the rules given below:\n    2.The question should be framed such that it must be clearly understood without providing context.\n    3.The question should be fully answerable from information present in given context.\n    \nContext:Photosynthesis is a process used by plants, algae, and certain bacteria to convert light energy into chemical energy.\nQuestions:{{'question_How': 'How do plants capture light energy for photosynthesis?', 'question_Where': 'Where in plants does photosynthesis primarily occur?'}}\n\n\nContext:\nBased on manual examinations we further ﬁlter out examples where\nevidence is too long, where history is not well-formed (in so me of the examples there are missing turns) or where the golde n\nanswer is a single word. See Appendix for the full impact of ﬁl tering on the dataset size.\nFor our experiments we randomly select 100 examples from the remaining after ﬁltering 324 examples in the dev split. We al so\nmanually verify these 100 examples to ensure dialog quality .\n4 Human Evals\n4.1 Pilot\nMeena paper [ 2] introduces a proxy for ﬂuency in the form of Sensibleness an d Speciﬁcity Average metric (SSA), while\npaper [ 10] presents an evaluation framework called Attributable to I dentiﬁed Sources (AIS) for assessing the output of natural\nlanguage generation models for attributability.\nBoth SSA a nd AIS assume the use of human raters and in the ideal world wit h\ninﬁnite resources all our experiments would be evaluated th is way.\nTo get a sense of the problem, we started with conducting smal l pilot human eval, 400 examples in total. We sampled 100\ndialogs from QReCC, as described above, and generated respo nses for them using PaLM 540B model [ 16] with 4 different\nsetups (see next section for the speciﬁcs about how we use PaL M’s native dialog prompt):\n1. temperature = 0.0 and “no evidence”\n2. temperature = 0.7 and “no evidence”\n3. temperature = 0.0 and “golden evidence”\n4. temperature = 0.7 and “golden evidence”\nWe have run human SSA eval for all 4 pilot setups and human AIS e val for setups #3 and #4.\nQuestions:"

In [10]:
output = llm2(prompt)

In [14]:
import json

In [15]:
json.loads(output.choices[0].message.content)

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)

In [16]:
output.choices[0].message.content

"{'question_What': 'What is the purpose of the Sensibleness and Specificity Average metric (SSA)?', 'question_How': 'How many examples were sampled for the pilot human evaluation?'}"

In [17]:
print(prompt)


Generate two questions from given context satisfying the rules given below:
    2.The question should be framed such that it must be clearly understood without providing context.
    3.The question should be fully answerable from information present in given context.
    
Context:Photosynthesis is a process used by plants, algae, and certain bacteria to convert light energy into chemical energy.
Questions:{{'question_How': 'How do plants capture light energy for photosynthesis?', 'question_Where': 'Where in plants does photosynthesis primarily occur?'}}


Context:
Based on manual examinations we further ﬁlter out examples where
evidence is too long, where history is not well-formed (in so me of the examples there are missing turns) or where the golde n
answer is a single word. See Appendix for the full impact of ﬁl tering on the dataset size.
For our experiments we randomly select 100 examples from the remaining after ﬁltering 324 examples in the dev split. We al so
manually verify th