In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from langchain.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator

loader = TextLoader("nyc_text.txt")
index = VectorstoreIndexCreator().from_loaders([loader])

In [11]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI()
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=index.vectorstore.as_retriever(),
    return_source_documents=True
)

question = "How did New York City get its name?"
result = qa_chain({"query": question})
result["result"]

'New York City got its name when it was renamed by the British in 1664 after King Charles II of England granted the lands to his brother, the Duke of York. The city was originally founded as a trading post called New Amsterdam by Dutch colonists in approximately 1624.'

In [60]:
def create_qa_chain():
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=index.vectorstore.as_retriever(),
        return_source_documents=True
    )
    return qa_chain

In [62]:
qa_chain = create_qa_chain()

result = qa_chain({"query": question})
result.keys()

dict_keys(['query', 'result', 'source_documents'])

In [36]:
from langchain.callbacks.tracers import LangChainTracer
from langchain.callbacks.manager import CallbackManagerForChainRun

ragas_callback = LangChainTracer()

In [73]:
from langchain.schema import RUN_KEY
from ragas.metrics.base import Metric
import typing as t

class RagasEvalutorChain(Chain):
    metric: t.Any
        
    @property
    def input_keys(self):
        return ["query", "result", "source_documents"]
    
    @property
    def output_keys(self):
        return [f"{self.metric.name}_score"]

    def _call(
        self,
        inputs,
        run_manager = None,
    ):
        """Call the evaluation chain."""
        answer = inputs["result"]
        question = inputs["query"]
        contexts = []
        if "source_documents" in inputs:
            for d in inputs["source_documents"]:
                if isinstance(d, dict):
                    contexts.append(d["page_content"])
                else:
                    contexts.append(d.page_content)        
        
        # set callback
        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
        callbacks = _run_manager.get_child()
        score = self.metric.score_single({
            "question": question,
            "answer": answer,
            "contexts": contexts,
        }, callbacks=callbacks)
        print(score)
        return {f"{self.metric.name}_score": score}

In [44]:
type(answer_relevancy)

ragas.metrics.answer_relevance.AnswerRelevancy

In [74]:
from ragas.metrics import answer_relevancy

answer_rel = RagasEvalutorChain(metric=answer_relevancy)

In [46]:
answer_rel(result, include_run_info=True)

0.9031754441691303


{'query': 'How did New York City get its name?',
 'result': 'New York City got its name when it was renamed by the British in 1664 after King Charles II of England granted the lands to his brother, the Duke of York. The city was originally founded as a trading post called New Amsterdam by Dutch colonists in approximately 1624.',
 'source_documents': [Document(page_content="== Etymology ==\n\nIn 1664, New York was named in honor of the Duke of York, who would become King James II of England. James's elder brother, King Charles II, appointed the Duke as proprietor of the former territory of New Netherland, including the city of New Amsterdam, when England seized it from Dutch control.\n\n\n== History ==", metadata={'source': 'nyc_text.txt'}),
  Document(page_content="During the Wisconsin glaciation, 75,000 to 11,000 years ago, the New York City area was situated at the edge of a large ice sheet over 2,000 feet (610 m) in depth. The erosive forward movement of the ice (and its subsequent 

In [75]:
from typing import Optional

from ragas import evaluate
from ragas.metrics import faithfulness
from langchain.chains.base import Chain

from langsmith.evaluation import EvaluationResult, RunEvaluator
from langsmith.schemas import Example, Run

dataset_name = "NYC test"

class RagasEvaluator(RunEvaluator):
    def __init__(self, metric):
        self.metric = metric
        
        self.metric.init_model()
    
    
    def _prepare_evaluator_output(self, output):
        print(output)
        

    
    def evaluate_run(
        self, run: Run, example: Optional[Example] = None
    ) -> EvaluationResult:
        if run.outputs is None:
            raise ValueError("Run outputs cannot be None")
        eval_input = run.outputs["query"] = run.inputs["query"]
        eval_output = answer_rel(run.outputs, include_run_info=True)

        
        score_name = f"{self.metric.name}_score"
        evaluation_result = EvaluationResult(
            key=self.metric.name, 
            score=eval_output[score_name]
        )
        if RUN_KEY in eval_output:
            evaluation_result.evaluator_info[RUN_KEY] = eval_output[RUN_KEY]
        return evaluation_result

In [76]:
from langsmith import Client
from langchain.smith import RunEvalConfig, run_on_dataset

from ragas.metrics.critique import harmfulness, coherence
from ragas.metrics import faithfulness, context_relevancy, answer_relevancy

client = Client()

evaluation_config = RunEvalConfig(
    custom_evaluators = [
        RagasEvaluator(metric=answer_relevancy)
    ],
    prediction_key="result"
)

run_on_dataset(
    client,
    dataset_name,
    create_qa_chain,
    evaluation=evaluation_config,
    input_mapper=lambda x: x
)

View the evaluation results for project '2023-08-10-19-05-35-RetrievalQA' at:
https://smith.langchain.com/projects/p/874669fc-9241-4d27-a35e-20c9132ffe24?eval=true
0.9686248361386427
0.9239109844858733
0.934866442971953
0.9985288482453205
1.0000000000000004


{'project_name': '2023-08-10-19-05-35-RetrievalQA',
 'results': {'6ddbfe30-309c-401e-8a6b-e17f091cfa1b': [{'query': 'What is the significance of the Statue of Liberty in New York City?',
    'result': 'The Statue of Liberty in New York City holds great significance as a symbol of the United States and its ideals of liberty and peace. It served as a welcoming sight to millions of immigrants who arrived in the U.S. by ship in the late 19th and early 20th centuries. The statue represents freedom and cultural diversity and has become an iconic landmark in New York City.',
    'source_documents': [Document(page_content='from 1785 until 1790, and has been the largest U.S. city since 1790. The Statue of Liberty greeted millions of immigrants as they came to the U.S. by ship in the late 19th and early 20th centuries, and is a symbol of the U.S. and its ideals of liberty and peace. In the 21st century, New York City has emerged as a global node of creativity, entrepreneurship, and as a symbol o

In [6]:
from langchain.smith import RunEvalConfig, run_on_dataset
from langsmith import Client

client = Client()
eval_config = RunEvalConfig(
    evaluators=[
      "qa",
  ],
    reference_key = "answer"
)

In [7]:
run_on_dataset(
    client=client,
    dataset_name=dataset_name,
    llm_or_chain_factory=create_qa_chain,
    evaluation=eval_config,
    verbose=True,
    input_mapper=lambda x: x
)

View the evaluation results for project '2023-08-03-12-40-25-RetrievalQA' at:
https://smith.langchain.com/projects/p/ff921a02-7dcc-453b-9500-4ee160fda9d7?eval=true
> [0;32m/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py[0m(1375)[0;36mevaluate_run[0;34m()[0m
[0;32m   1374 [0;31m        [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m [0;31m# fmt: skip[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 1375 [0;31m        return self.create_feedback(
[0m[0;32m   1376 [0;31m            [0mrun_[0m[0;34m.[0m[0mid[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> source_info
{'__run': RunInfo(run_id=UUID('a7327fbf-bc3b-45d2-9fb3-44c117903a4a'))}
ipdb> qssed


Error evaluating run ee863bac-6c20-4474-82eb-7a7ebca0c93e with StringRunEvaluatorChain: 
Traceback (most recent call last):
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langchain/callbacks/tracers/evaluation.py", line 109, in _evaluate_in_project
    self.client.evaluate_run(run, evaluator)
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 90, in trace_dispatch
    return self.dispatch_line(frame)
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 115, in dispatch_line
    if self.quitting: raise BdbQuit
bdb.BdbQuit


> [0;32m/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py[0m(1375)[0;36mevaluate_run[0;34m()[0m
[0;32m   1374 [0;31m        [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m [0;31m# fmt: skip[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 1375 [0;31m        return self.create_feedback(
[0m[0;32m   1376 [0;31m            [0mrun_[0m[0;34m.[0m[0mid[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> q


Error evaluating run 502cda78-6d4b-47e1-a445-435ac1e1b017 with StringRunEvaluatorChain: 
Traceback (most recent call last):
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langchain/callbacks/tracers/evaluation.py", line 109, in _evaluate_in_project
    self.client.evaluate_run(run, evaluator)
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 90, in trace_dispatch
    return self.dispatch_line(frame)
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 115, in dispatch_line
    if self.quitting: raise BdbQuit
bdb.BdbQuit


> [0;32m/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py[0m(1375)[0;36mevaluate_run[0;34m()[0m
[0;32m   1374 [0;31m        [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m [0;31m# fmt: skip[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 1375 [0;31m        return self.create_feedback(
[0m[0;32m   1376 [0;31m            [0mrun_[0m[0;34m.[0m[0mid[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> q


Error evaluating run c27d2ebe-e103-419e-8fdd-988c6489ba95 with StringRunEvaluatorChain: 
Traceback (most recent call last):
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langchain/callbacks/tracers/evaluation.py", line 109, in _evaluate_in_project
    self.client.evaluate_run(run, evaluator)
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 90, in trace_dispatch
    return self.dispatch_line(frame)
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 115, in dispatch_line
    if self.quitting: raise BdbQuit
bdb.BdbQuit


> [0;32m/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py[0m(1375)[0;36mevaluate_run[0;34m()[0m
[0;32m   1374 [0;31m        [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m [0;31m# fmt: skip[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 1375 [0;31m        return self.create_feedback(
[0m[0;32m   1376 [0;31m            [0mrun_[0m[0;34m.[0m[0mid[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> q


Error evaluating run 34d8c89d-bf71-4c6d-b589-1d2cd2e37ac1 with StringRunEvaluatorChain: 
Traceback (most recent call last):
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langchain/callbacks/tracers/evaluation.py", line 109, in _evaluate_in_project
    self.client.evaluate_run(run, evaluator)
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 90, in trace_dispatch
    return self.dispatch_line(frame)
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 115, in dispatch_line
    if self.quitting: raise BdbQuit
bdb.BdbQuit


> [0;32m/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py[0m(1375)[0;36mevaluate_run[0;34m()[0m
[0;32m   1374 [0;31m        [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m [0;31m# fmt: skip[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 1375 [0;31m        return self.create_feedback(
[0m[0;32m   1376 [0;31m            [0mrun_[0m[0;34m.[0m[0mid[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> q


Error evaluating run 20d970a1-77e5-481b-8ada-e89b5cc02e1d with StringRunEvaluatorChain: 
Traceback (most recent call last):
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langchain/callbacks/tracers/evaluation.py", line 109, in _evaluate_in_project
    self.client.evaluate_run(run, evaluator)
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/envs/notes/lib/python3.10/site-packages/langsmith/client.py", line 1375, in evaluate_run
    return self.create_feedback(
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 90, in trace_dispatch
    return self.dispatch_line(frame)
  File "/home/jjmachan/.pyenv/versions/3.10.12/lib/python3.10/bdb.py", line 115, in dispatch_line
    if self.quitting: raise BdbQuit
bdb.BdbQuit


{'project_name': '2023-08-03-12-40-25-RetrievalQA',
 'results': {'6ddbfe30-309c-401e-8a6b-e17f091cfa1b': [{'query': 'What is the significance of the Statue of Liberty in New York City?',
    'result': 'The Statue of Liberty in New York City holds great significance. It served as a symbol of freedom and peace, welcoming millions of immigrants as they arrived in the U.S. in the late 19th and early 20th centuries. It represents the ideals of liberty and is recognized globally as a symbol of the United States. The Statue of Liberty is managed by the National Park Service and is a major landmark in New York City.'}],
  '8d0bf6f3-096b-4092-b2e9-cacc0b5f21de': [{'query': 'How did New York City get its name?',
    'result': 'New York City got its name in 1664 when it was renamed after the Duke of York, who later became King James II of England. The city was originally called New Amsterdam when it was founded by Dutch colonists in 1624, but it was renamed New York after the English took control

In [None]:
cli