In [12]:
question = "How did New York City get its name?"

In [10]:
# building a dataset from llamaindex

from llama_index import VectorStoreIndex, SimpleDirectoryReader, Document

with open("./nyc_text.txt") as f:
    docs = [Document(text=f.read())]

In [14]:
from langchain.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI()
loader = TextLoader("nyc_text.txt")
index = VectorstoreIndexCreator().from_loaders([loader])

In [15]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=index.vectorstore.as_retriever(),
    return_source_documents=True
)
result = qa_chain({"query": question})
print(len(result['source_documents']))

list(result.keys())

4


['query', 'result', 'source_documents']

In [9]:
from llama_index.evaluation import DatasetGenerator

question_generator = DatasetGenerator.from_documents(docs)
eval_questions = question_generator.generate_questions_from_nodes(5)

eval_questions

['What is the population of New York City as of 2020?',
 'Which borough of New York City has the highest population?',
 'What is the economic significance of New York City?',
 'How did New York City get its name?',
 'What is the significance of the Statue of Liberty in New York City?']

In [None]:
im

In [17]:
from tqdm import tqdm 

results = []

for q in tqdm(eval_questions):
    results.append(qa_chain({"query": q}))

100%|████████████████████████████████████████████████████████████| 5/5 [00:14<00:00,  2.95s/it]


In [22]:
from langsmith import Client

client = Client()
dataset_name = "NYC Wiki"

dataset = client.read_dataset(dataset_name=dataset_name)
dataset.id

UUID('2294b0a7-0a89-4530-ba2b-e5b1ced7f23f')

In [18]:
results

[{'query': 'What is the population of New York City as of 2020?',
  'result': 'The population of New York City as of 2020 is 8,804,190.',
  'source_documents': [Document(page_content="New York City is the most populous city in the United States, with 8,804,190 residents incorporating more immigration into the city than outmigration since the 2010 United States census. More than twice as many people live in New York City as compared to Los Angeles, the second-most populous U.S. city; and New York has more than three times the population of Chicago, the third-most populous U.S. city. New York City gained more residents between 2010 and 2020 (629,000) than any other U.S. city, and a greater amount than the total sum of the gains over the same decade of the next four largest U.S. cities, Los Angeles, Chicago, Houston, and Phoenix, Arizona combined. New York City's population is about 44% of New York State's population, and about 39% of the population of the New York metropolitan area. The 

In [23]:
for e in results:
    client.create_example(
        inputs={
            "question": e["query"],
            "answer": e["result"],
            "contexts": [s.page_content for s in e["source_documents"]],
        },
        dataset_id=dataset.id,
    )

In [None]:
from typing import Optional

from evaluate import load
from langsmith.evaluation import EvaluationResult, RunEvaluator
from langsmith.schemas import Example, Run


class Ragas(RunEvaluator):
    def __init__(self, prediction_key: Optional[str] = None, model_id: str = "gpt-2"):
        self.prediction_key = prediction_key
        self.model_id = model_id
        self.metric_fn = load("perplexity", module_type="metric")

    def evaluate_run(
        self, run: Run, example: Optional[Example] = None
    ) -> EvaluationResult:
        if run.outputs is None:
            raise ValueError("Run outputs cannot be None")
        prediction = run.outputs[self.prediction_key]
        results = self.metric_fn.compute(
            predictions=[prediction], model_id=self.model_id
        )
        ppl = results["perplexities"][0]
        return EvaluationResult(key="Perplexity", score=ppl)