In [94]:
import json
import os
from getpass import getpass
from urllib.request import urlopen

import nest_asyncio
import numpy as np
import pandas as pd
import phoenix as px
from langchain.chains import RetrievalQA
from langchain.retrievers import KNNRetriever
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from phoenix.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    RelevanceEvaluator,
    run_evals,
)
from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents
from phoenix.trace import DocumentEvaluations, SpanEvaluations
from phoenix.trace.langchain import LangChainInstrumentor
from tqdm import tqdm
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

nest_asyncio.apply()  # needed for concurrent evals in notebook environments

In [106]:
px.close_app()

In [110]:
session = px.launch_app()

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [111]:
if os.environ.get("OPENAI_API_KEY") is None:
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
    os.environ["OPENAI_API_KEY"] = openai_api_key

In [97]:
def retrieve_info(query):
    similar_response = db.similarity_search(query, k=3)
    page_contents_array = [doc.page_content for doc in similar_response]
    return page_contents_array

def generate_response(message):
    best_practice = retrieve_info(message)
    response = chain.run(message=message, best_practice=best_practice)
    return response

In [115]:
loader = CSVLoader(file_path="response.csv", encoding='iso-8859-1')
documents = loader.load()

embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(documents, embeddings)

llm = ChatOpenAI(model_name="gpt-4-turbo-2024-04-09")
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # Options include "stuff", "refine", "map_reduce", "map_rerank"
    retriever=db.as_retriever(),
    metadata={"application_type": "question_answering"}
)

In [113]:
LangChainInstrumentor().instrument()

WARNI [opentelemetry.instrumentation.instrumentor] Attempting to instrument while already instrumented


In [100]:
import json
json_file_path = "test_queries.json"
with open("test_queries.json", 'r') as file:
    test_queries = json.load(file)
queries = [item['query'] for item in test_queries]

In [116]:
for query in tqdm(queries[5:6]):
    res = chain.invoke(query)

  0%|          | 0/1 [00:00<?, ?it/s]ERROR [asyncio] Task was destroyed but it is pending!
task: <Task pending name='Task-5' coro=<BulkInserter._bulk_insert() running at /Users/baichuan/.pyenv/versions/3.11.9/envs/finalenv/lib/python3.11/site-packages/phoenix/db/bulk_inserter.py:103> wait_for=<Future pending cb=[Task.__wakeup()]>>
ERROR [asyncio] Task was destroyed but it is pending!
task: <Task pending name='Task-2683' coro=<BulkInserter._bulk_insert() running at /Users/baichuan/.pyenv/versions/3.11.9/envs/finalenv/lib/python3.11/site-packages/phoenix/db/bulk_inserter.py:103> wait_for=<Future pending cb=[Task.__wakeup()]>>
ERROR [asyncio] Task was destroyed but it is pending!
task: <Task pending name='Task-2690' coro=<BulkInserter._bulk_insert() running at /Users/baichuan/.pyenv/versions/3.11.9/envs/finalenv/lib/python3.11/site-packages/phoenix/db/bulk_inserter.py:103> wait_for=<Future finished result=None>>
ERROR [asyncio] Task was destroyed but it is pending!
task: <Task pending nam

{'query': 'How do I get started with Roboflow’s Outsource Labeling service?', 'result': "I don't know the specific steps to get started with Roboflow’s Outsource Labeling service. You might want to visit the Roboflow website or contact their customer support for detailed guidance on how to access and use the Outsource Labeling service."}





In [103]:
queries_df = get_qa_with_reference(px.Client())
retrieved_documents_df = get_retrieved_documents(px.Client())

No retrieval documents found.


In [105]:
print(queries_df)
print(retrieved_documents_df)


None
Empty DataFrame
Columns: [reference, document_score, context.trace_id, input]
Index: []


In [13]:
# trace_df = px.Client().get_spans_dataframe()
# trace_df

In [89]:
eval_model = OpenAIModel(
    model="gpt-4-turbo-preview",
)
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_correctness_evaluator = QAEvaluator(eval_model)
relevance_evaluator = RelevanceEvaluator(eval_model)

hallucination_eval_df, qa_correctness_eval_df = run_evals(
    dataframe=queries_df,
    evaluators=[hallucination_evaluator, qa_correctness_evaluator],
    provide_explanation=True,
)
relevance_eval_df = run_evals(
    dataframe=retrieved_documents_df,
    evaluators=[relevance_evaluator],
    provide_explanation=True,
)[0]

px.Client().log_evaluations(
    SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval_df),
    SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_eval_df),
    DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df),
)

run_evals |██████████| 12/12 (100.0%) | ⏳ 00:11<00:00 |  1.09it/s
run_evals |██████████| 24/24 (100.0%) | ⏳ 00:11<00:00 |  2.10it/s


In [24]:
print(f"🚀 Open the Phoenix UI if you haven't already: {session.url}")

🚀 Open the Phoenix UI if you haven't already: http://localhost:6006/
