In [None]:
import os, re, json

from phoenix.evals import (
    llm_classify,
    LiteLLMModel,
    llm_generate,
    RelevanceEvaluator,
    run_evals
)

import litellm
litellm._turn_on_debug()

os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "http://localhost:6006"
os.environ["HUGGINGFACE_API_KEY"] = "api"

import phoenix as px
import pandas as pd

from phoenix.trace.dsl import SpanQuery
from phoenix.trace import SpanEvaluations, using_project

In [9]:
pd.set_option("display.max_colwidth", None)

def normalize_newline(s: str) -> str:
    """Chuyển '\\n' literal thành newline thật."""
    return s.replace("\\n", "\n") if isinstance(s, str) else s

def split_ref_items(s: str):
    """
    Tách chuỗi ref dạng:
    [1] Intel focuses...
    [2] AMD Ryzen offers...
    [4] Laptop battery life...
    Thành list [(id, text), ...]
    """
    if not isinstance(s, str):
        return []
    s = normalize_newline(s)
    pattern = re.compile(r"\[(\d+)\]\s*(.*?)(?=(?:\n\[\d+\])|$)", flags=re.S)
    return pattern.findall(s)

def explode_refs(df: pd.DataFrame, ref_col: str = "ref") -> pd.DataFrame:
    """
    Nhận DataFrame có cột 'ref', trả về DataFrame chỉ gồm context.trace_id và ref_text.
    """
    tmp = df.copy()
    tmp["ref_items"] = tmp[ref_col].apply(split_ref_items)
    out = tmp.explode("ref_items", ignore_index=True)
    out[["ref_id", "ref_text"]] = pd.DataFrame(out["ref_items"].tolist(), index=out.index)
    out["ref_id"] = out["ref_id"].astype(int)
    return out[["ref_text"]]

def output_parser(response: str, index: int):
    try:
        return json.loads(response)
    except json.JSONDecodeError as e:
        return {"__error__": str(e)}

In [2]:
import os
# Set the phoenix collector endpoint. Commonly http://localhost:6006 
os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "http://localhost:6006"

In [2]:
import phoenix as px
# Initialize a phoenix client
client = px.Client()
# Get the current dataset version. You can omit the version for the latest.
dataset = client.get_dataset(name="Retrieval", version_id="RGF0YXNldFZlcnNpb246Mg==")

In [10]:
query = SpanQuery().where("span_kind == 'CHAIN'", ).select("trace_id", input="input.value", output="output.value")
df = px.Client().query_spans(query, project_name="hugging-face")

reference = SpanQuery().where("span_kind == 'TOOL'").select("trace_id", ref="prompt.context.preview")
spans_with_docs_df = px.Client().query_spans(reference, project_name="hugging-face")
print(len(spans_with_docs_df))

document_chunks_df = explode_refs(spans_with_docs_df)
print("Exploded format:\n", document_chunks_df)

2
Exploded format:
                                                                                                                             ref_text
0        "AMD's Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat
1  nerations. "\n    "The launch of AMD's EPYC server processors disrupted the data center market, offering higher core counts and b
2  nd pre-built PCs. "\n    "Both companies are expanding into GPU technologies, with AMD focusing on Radeon graphics and Intel laun
3  ntel’s in both price and raw performance. "\n    "Looking forward, both AMD and Intel are betting on AI, high-performance computi
4  hem attractive for content creators and gamers alike. "\n    "Intel has been working on improving its efficiency cores and hybrid
5  t, Intel still maintains strong relationships with OEMs, ensuring a steady presence in laptops and pre-built PCs. "\n    "Both co


In [13]:
generate_questions_template = """\
Context information is below.

---------------------
{ref_text}
---------------------

Given the context information and not prior knowledge.
generate only questions based on the below query.

You are a Teacher/ Professor. Your task is to setup \
3 questions for an upcoming \
quiz/examination. The questions should be diverse in nature \
across the document. Restrict the questions to the \
context information provided."

Output the questions in JSON format with the keys question_1, question_2, question_3.
"""

In [36]:
def normalize_newline(s: str) -> str:
    """Chuyển '\\n' literal thành newline thật."""
    return s.replace("\\n", "\n") if isinstance(s, str) else s

def split_ref_items(s: str):
    """
    Tách chuỗi ref dạng:
    [1] Intel focuses...
    [2] AMD Ryzen offers...
    [4] Laptop battery life...
    Thành list [(id, text), ...]
    """
    if not isinstance(s, str):
        return []
    s = normalize_newline(s)
    pattern = re.compile(r"\[(\d+)\]\s*(.*?)(?=(?:\n\[\d+\])|$)", flags=re.S)
    return pattern.findall(s)

def explode_refs(df: pd.DataFrame, ref_col: str = "ref") -> pd.DataFrame:
    """
    Nhận DataFrame có cột 'ref', trả về DataFrame chỉ gồm context.trace_id và ref_text.
    """
    tmp = df.copy()
    tmp["ref_items"] = tmp[ref_col].apply(split_ref_items)
    out = tmp.explode("ref_items", ignore_index=True)
    out[["ref_id", "ref_text"]] = pd.DataFrame(out["ref_items"].tolist(), index=out.index)
    out["ref_id"] = out["ref_id"].astype(int)
    return out[["context.trace_id", "ref_text"]]

def output_parser(response: str, index: int):
    try:
        return json.loads(response)
    except json.JSONDecodeError as e:
        return {"__error__": str(e)}

In [18]:
def robust_output_parser(response: str, index: int):
    s = (response or "").strip()
    # log vài mẫu đầu để bạn xem raw
    if index < 3:
        print(f"\nRAW[{index}]:\n{repr(s)}\n")

    if not s:
        return {"__error__": "empty", "question_1": None, "question_2": None, "question_3": None}

    # 1) bỏ code fences ```json ... ```
    if s.startswith("```"):
        s = re.sub(r"^```[a-zA-Z]*\s*", "", s)
        s = re.sub(r"\s*```$", "", s, flags=re.S)

    # 2) lấy block JSON đầu tiên
    start, end = s.find("{"), s.rfind("}")
    if start != -1 and end != -1 and end > start:
        try:
            obj = json.loads(s[start:end+1])
            # đảm bảo trả về dict có các key mong muốn
            return {
                "question_1": obj.get("question_1"),
                "question_2": obj.get("question_2"),
                "question_3": obj.get("question_3"),
            }
        except json.JSONDecodeError as e:
            pass

    # 3) fallback: nếu model trả plain text, nhét vào question_1
    return {"question_1": s, "question_2": None, "question_3": None}

In [19]:
model = LiteLLMModel(
    model="huggingface/together/Qwen/Qwen2.5-7B-Instruct",
    temperature=0.0,
)

questions_df = llm_generate(
    dataframe=document_chunks_df,
    template=generate_questions_template,
    model=model,
    output_parser=robust_output_parser,
    concurrency=20,
)

🐌!! If running inside a notebook, patching the event loop with nest_asyncio will allow asynchronous eval submission, and is significantly faster. To patch the event loop, run `nest_asyncio.apply()`.
llm_generate |          | 0/6 (0.0%) | ⏳ 00:00<? | ?it/s[92m10:10:35 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:10:35 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m10:10:35 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'content': 'Context information is below.\n\n---------------------\n"AMD\'s Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat\n---------------------\n\nGiven the context information and not prior knowledge.\ngenerate only questions based on the below query.\n\nYou are a Teacher/ Professor. Your task is to setup 3 questions for an upcoming quiz/examination. The questions should be diverse in nature across


RAW[0]:
'```json\n{\n  "question_1": "What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?",\n  "question_2": "How does the multi-core performance of AMD\'s Ryzen processors benefit content creation tasks?",\n  "question_3": "Based on the context, which type of users are likely to find AMD\'s Ryzen processors particularly appealing?"\n}\n```'



`max_retries` is not supported. It will be ignored.
[92m10:10:42 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': 'Context information is below.\n\n---------------------\nnerations. "\n    "The launch of AMD\'s EPYC server processors disrupted the data center market, offering higher core counts and b\n---------------------\n\nGiven the context information and not prior knowledge.\ngenerate only questions based on the below query.\n\nYou are a Teacher/ Professor. Your task is to setup 3 questions for an upcoming quiz/examination. The questions should be diverse in nature across the document. Restrict the questions to the context information provided."\n\nOutput the questions in JSON format with the keys question_1, question_2, question_3.


RAW[1]:
'```json\n{\n  "question_1": "What was the impact of AMD\'s EPYC server processors on the data center market according to the given context?",\n  "question_2": "Based on the information provided, what specific feature of AMD\'s EPYC processors is mentioned as a key factor in disrupting the market?",\n  "question_3": "Summarize the main point of the given context in one sentence."\n}\n```'



`max_retries` is not supported. It will be ignored.
[92m10:10:44 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': 'Context information is below.\n\n---------------------\nnd pre-built PCs. "\n    "Both companies are expanding into GPU technologies, with AMD focusing on Radeon graphics and Intel laun\n---------------------\n\nGiven the context information and not prior knowledge.\ngenerate only questions based on the below query.\n\nYou are a Teacher/ Professor. Your task is to setup 3 questions for an upcoming quiz/examination. The questions should be diverse in nature across the document. Restrict the questions to the context information provided."\n\nOutput the questions in JSON format with the keys question_1, question_2, question_3.\


RAW[2]:
'```json\n{\n  "question_1": "What are the two companies mentioned in the context that are expanding into GPU technologies?",\n  "question_2": "Which company is focusing on Radeon graphics according to the context?",\n  "question_3": "What is the focus of Intel\'s expansion into GPU technologies based on the given information?"\n}\n```'



`max_retries` is not supported. It will be ignored.
[92m10:10:45 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': 'Context information is below.\n\n---------------------\nntel’s in both price and raw performance. "\n    "Looking forward, both AMD and Intel are betting on AI, high-performance computi\n---------------------\n\nGiven the context information and not prior knowledge.\ngenerate only questions based on the below query.\n\nYou are a Teacher/ Professor. Your task is to setup 3 questions for an upcoming quiz/examination. The questions should be diverse in nature across the document. Restrict the questions to the context information provided."\n\nOutput the questions in JSON format with the keys question_1, question_2, question_3.\

In [20]:
questions_df.head()

Unnamed: 0,question_1,question_2,question_3
0,What feature of AMD's Ryzen processors has contributed to their popularity among content creators?,How does the multi-core performance of AMD's Ryzen processors benefit content creation tasks?,"Based on the context, which type of users are likely to find AMD's Ryzen processors particularly appealing?"
1,What was the impact of AMD's EPYC server processors on the data center market according to the given context?,"Based on the information provided, what specific feature of AMD's EPYC processors is mentioned as a key factor in disrupting the market?",Summarize the main point of the given context in one sentence.
2,What are the two companies mentioned in the context that are expanding into GPU technologies?,Which company is focusing on Radeon graphics according to the context?,What is the focus of Intel's expansion into GPU technologies based on the given information?
3,"According to the context, what are both AMD and Intel focusing on in the future?",How does the context describe the current state of price and raw performance between AMD and Intel?,Summarize the main points mentioned about AMD and Intel's future strategies in the given context.
4,What type of users is Intel's recent focus on improving efficiency cores and hybrid technology particularly attractive to?,What specific areas of technology is Intel focusing on to enhance the performance of its processors?,"Based on the context, what can be inferred about Intel's strategy in the current market for content creators and gamers?"


In [22]:
# Construct a dataframe of the questions and the document chunks
questions_with_document_chunk_df = pd.concat([questions_df, document_chunks_df], axis=1)
questions_with_document_chunk_df = questions_with_document_chunk_df.melt(
    id_vars=["ref_text"], value_name="question"
).drop("variable", axis=1)
# If the above step was interrupted, there might be questions missing. Let's run this to clean up the dataframe.
questions_with_document_chunk_df = questions_with_document_chunk_df[
    questions_with_document_chunk_df["question"].notnull()
]

In [23]:
questions_with_document_chunk_df.head(10)

Unnamed: 0,ref_text,question
0,"""AMD's Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat",What feature of AMD's Ryzen processors has contributed to their popularity among content creators?
1,"nerations. ""\n ""The launch of AMD's EPYC server processors disrupted the data center market, offering higher core counts and b",What was the impact of AMD's EPYC server processors on the data center market according to the given context?
2,"nd pre-built PCs. ""\n ""Both companies are expanding into GPU technologies, with AMD focusing on Radeon graphics and Intel laun",What are the two companies mentioned in the context that are expanding into GPU technologies?
3,"ntel’s in both price and raw performance. ""\n ""Looking forward, both AMD and Intel are betting on AI, high-performance computi","According to the context, what are both AMD and Intel focusing on in the future?"
4,"hem attractive for content creators and gamers alike. ""\n ""Intel has been working on improving its efficiency cores and hybrid",What type of users is Intel's recent focus on improving efficiency cores and hybrid technology particularly attractive to?
5,"t, Intel still maintains strong relationships with OEMs, ensuring a steady presence in laptops and pre-built PCs. ""\n ""Both co","According to the context, what strategy does Intel use to maintain its presence in the laptop and pre-built PC markets?"
6,"""AMD's Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat",How does the multi-core performance of AMD's Ryzen processors benefit content creation tasks?
7,"nerations. ""\n ""The launch of AMD's EPYC server processors disrupted the data center market, offering higher core counts and b","Based on the information provided, what specific feature of AMD's EPYC processors is mentioned as a key factor in disrupting the market?"
8,"nd pre-built PCs. ""\n ""Both companies are expanding into GPU technologies, with AMD focusing on Radeon graphics and Intel laun",Which company is focusing on Radeon graphics according to the context?
9,"ntel’s in both price and raw performance. ""\n ""Looking forward, both AMD and Intel are betting on AI, high-performance computi",How does the context describe the current state of price and raw performance between AMD and Intel?


In [26]:
import os
import pandas as pd
from tqdm import tqdm
from opentelemetry.trace import Status, StatusCode
from Phoenix.trace.tracing import tracer

from LiteLLM.lite import LiteLLMClient
from LiteLLM.Response import ResponseInput
from tools.rag import build_prompt

# --------- CONFIG ---------
# df nguồn: lấy cột "question"
SOURCE_DF = questions_with_document_chunk_df   # <- đã có sẵn ở phía bạn
QUESTION_COL = "question"
TOP_K = 3          # build_prompt(query, top_k=TOP_K)
MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini")  # đổi nếu cần
TEMP = float(os.getenv("LLM_TEMP", "0.2"))
MAX_NUM = None     # giới hạn số câu chạy (None = chạy hết)
# --------------------------

def main():
    # Chuẩn bị danh sách câu hỏi
    queries = (
        SOURCE_DF[QUESTION_COL]
        .dropna()
        .map(lambda s: str(s).strip())
        .loc[lambda s: s.ne("")]
        .tolist()
    )
    if MAX_NUM is not None:
        queries = queries[:MAX_NUM]

    print(f"Running {len(queries)} questions…")

    # Client LLM (tuỳ môi trường bạn cấu hình API key/endpoint)
    client = LiteLLMClient()

    rows = []  # lưu kết quả

    for i, query in enumerate(tqdm(queries, desc="RAG batch", unit="q")):
        with tracer.start_as_current_span("Thought") as span:
            span.set_attribute("openinference.span.kind", "CHAIN")
            span.set_attribute("input.value", query)

            try:
                # tạo prompt từ RAG (lấy context top-k cho câu hỏi)
                prompt = build_prompt(query, top_k=TOP_K)

                # gọi model
                msg = ResponseInput(prompt)
                resp = client.complete([msg])           # one-by-one (ổn định)
                answer = resp.transform()
                usage = None
                try:
                    usage = resp.usage()
                except Exception:
                    pass

                # log lên trace
                span.set_attribute("output.value", (answer or "")[:400])
                span.set_status(Status(StatusCode.OK))

                rows.append({
                    "index": i,
                    "question": query,
                    "prompt": prompt,
                    "answer": answer,
                    "usage": usage,
                    "error": None,
                })

            except Exception as e:
                # ghi lỗi nhưng không dừng batch
                span.record_exception(e)
                span.set_status(Status(StatusCode.ERROR, str(e)))
                rows.append({
                    "index": i,
                    "question": query,
                    "prompt": None,
                    "answer": None,
                    "usage": None,
                    "error": str(e),
                })

    # Thành DataFrame kết quả
    results_df = pd.DataFrame(rows)
    pd.set_option("display.max_colwidth", None)
    print("\n=== SAMPLE RESULTS ===")
    print(results_df.head(10)[["question", "answer", "error"]])

    # (tuỳ chọn) lưu file
    # results_df.to_csv("rag_batch_results.csv", index=False)
    return results_df

if __name__ == "__main__":
    _ = main()


Running 18 questions…


RAG batch:   0%|          | 0/18 [00:00<?, ?q/s]You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
[92m10:23:54 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:23:54 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m10:23:54 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'role': 'user', 'content': 'You are a careful assistant. Use the context below to support your answer.\n\nContext (numbered chunks):\n[1] \n    "AMD\'s Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat\n[2] hem attractive for content creators and gamers alike. "\n    "Intel has been working on improving its efficiency cores and hybrid\n[3]  its efficiency cores and hybrid archite


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



[92m10:24:48 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:24:48 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m10:24:48 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'role': 'user', 'content': 'You are a careful assistant. Use the context below to support your answer.\n\nContext (numbered chunks):\n[1] t, Intel still maintains strong relationships with OEMs, ensuring a steady presence in laptops and pre-built PCs. "\n    "Both co\n[2] ufacturing leadership and reduce dependency on external foundries. "\n    "AMD relies on TSMC’s advanced process nodes to fabrica\n[3]  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n\nQuestion: The context mentions relationships with OEMs. What does OEM stand for in this context?\n'}], temperature=0.7)[0m
[92m10:24:48 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:24:4


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



[92m10:24:51 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:24:51 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m10:24:51 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'role': 'user', 'content': 'You are a careful assistant. Use the context below to support your answer.\n\nContext (numbered chunks):\n[1] \n    "AMD\'s Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat\n[2] hem attractive for content creators and gamers alike. "\n    "Intel has been working on improving its efficiency cores and hybrid\n[3] s without owning its own fabs. "\n    "While AMD gained significant momentum in the desktop market, Intel still maintains strong \n\nQuestion: Based on the context, which type of users are likely to find AMD\'s Ryzen processors particularly appealing?\n'}], temperature=0.7)[0m
[92m10:24:51 - LiteLLM:DEBUG[0m: utils.


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



[92m10:24:55 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:24:55 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m10:24:55 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'role': 'user', 'content': 'You are a careful assistant. Use the context below to support your answer.\n\nContext (numbered chunks):\n[1] s without owning its own fabs. "\n    "While AMD gained significant momentum in the desktop market, Intel still maintains strong \n[2] n Radeon graphics and Intel launching its ARC series to compete directly with NVIDIA. "\n    "Intel has faced challenges with del\n[3] ffering higher core counts and better performance per watt compared to Intel Xeon chips. "\n    "Intel recently invested billions\n\nQuestion: Summarize the main point of the given context in one sentence.\n'}], temperature=0.7)[0m
[92m10:24:55 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:24:55 - LiteLLM:DEBUG[0m: l


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



[92m10:24:59 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:24:59 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m10:24:59 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'role': 'user', 'content': 'You are a careful assistant. Use the context below to support your answer.\n\nContext (numbered chunks):\n[1] nd pre-built PCs. "\n    "Both companies are expanding into GPU technologies, with AMD focusing on Radeon graphics and Intel laun\n[2] ntel’s in both price and raw performance. "\n    "Looking forward, both AMD and Intel are betting on AI, high-performance computi\n[3] n Radeon graphics and Intel launching its ARC series to compete directly with NVIDIA. "\n    "Intel has faced challenges with del\n\nQuestion: What is the focus of Intel\'s expansion into GPU technologies based on the given information?\n'}], temperature=0.7)[0m
[92m10:24:59 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



[92m10:25:02 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:25:02 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m10:25:02 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'role': 'user', 'content': 'You are a careful assistant. Use the context below to support your answer.\n\nContext (numbered chunks):\n[1] ntel’s in both price and raw performance. "\n    "Looking forward, both AMD and Intel are betting on AI, high-performance computi\n[2] nd pre-built PCs. "\n    "Both companies are expanding into GPU technologies, with AMD focusing on Radeon graphics and Intel laun\n[3] s without owning its own fabs. "\n    "While AMD gained significant momentum in the desktop market, Intel still maintains strong \n\nQuestion: Summarize the main points mentioned about AMD and Intel\'s future strategies in the given context.\n'}], temperature=0.7)[0m
[92m10:25:02 - LiteLLM:DEBUG[0m: utils.py:349 - 




[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



[92m10:25:07 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:25:07 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m10:25:07 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'role': 'user', 'content': 'You are a careful assistant. Use the context below to support your answer.\n\nContext (numbered chunks):\n[1] hem attractive for content creators and gamers alike. "\n    "Intel has been working on improving its efficiency cores and hybrid\n[2] s without owning its own fabs. "\n    "While AMD gained significant momentum in the desktop market, Intel still maintains strong \n[3] ntel’s in both price and raw performance. "\n    "Looking forward, both AMD and Intel are betting on AI, high-performance computi\n\nQuestion: Based on the context, what can be inferred about Intel\'s strategy in the current market for content creators and gamers?\n'}], temperature=0.7)[0m
[92m10:25:07 - LiteLLM:DEBUG


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



[92m10:25:10 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10:25:10 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m10:25:10 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'role': 'user', 'content': 'You are a careful assistant. Use the context below to support your answer.\n\nContext (numbered chunks):\n[1] t, Intel still maintains strong relationships with OEMs, ensuring a steady presence in laptops and pre-built PCs. "\n    "Both co\n[2] s without owning its own fabs. "\n    "While AMD gained significant momentum in the desktop market, Intel still maintains strong \n[3] n Radeon graphics and Intel launching its ARC series to compete directly with NVIDIA. "\n    "Intel has faced challenges with del\n\nQuestion: Based on the given information, what does Intel ensure through its relationships with OEMs?\n'}], temperature=0.7)[0m
[92m10:25:10 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m10


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


=== SAMPLE RESULTS ===
                                                                                                                                   question  \
0                                        What feature of AMD's Ryzen processors has contributed to their popularity among content creators?   
1                             What was the impact of AMD's EPYC server processors on the data center market according to the given context?   
2                                             What are the two companies mentioned in the context that are expanding into GPU technologies?   
3                                                          According to the context, what are both AMD and Intel focusing on in the future?   
4                What type of 




In [66]:
client = px.Client()

query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER'")
    .select("context.span_id", "context.trace_id", "input.value", "retrieval.documents")
)

df = client.query_spans(query, project_name="hugging-face")
df = df.reset_index().rename(columns={"index": "context.span_id"})
print(df.columns)
print(df.head())

Index(['context.span_id', 'context.trace_id', 'input.value',
       'retrieval.documents'],
      dtype='object')
    context.span_id                  context.trace_id  \
0  0224631cead9f25d  500b3bdc9e9c658bee05bdea991a793a   
1  6fe4de290b8d7478  582261bea40ac7541550f4369e7342b1   
2  6b384c0a008fcdc1  2bb550be5926f49d23cef2b2443893d0   
3  4abdb3f2ef462568  3969182ba42aa8040638bd5352d2339c   
4  5551e37d1a9fdeef  58481aa2e6a562825c0a9c419ac0582d   

                                                                                                     input.value  \
0                                                                           I want to know information about AMD   
1                                                                            I want some information about Intel   
2             What feature of AMD's Ryzen processors has contributed to their popularity among content creators?   
3  What was the impact of AMD's EPYC server processors on the data center marke

In [67]:
df_exploded = df.explode("retrieval.documents", ignore_index=True)

print(df_exploded.columns)

Index(['context.span_id', 'context.trace_id', 'input.value',
       'retrieval.documents'],
      dtype='object')


In [68]:
# normalize nested dict
docs = pd.json_normalize(df_exploded["retrieval.documents"])

# gộp lại với trace_id
retrieved_documents_df = pd.concat([df_exploded.drop(columns=["retrieval.documents"]), docs], axis=1)

print(retrieved_documents_df.columns)

Index(['context.span_id', 'context.trace_id', 'input.value',
       'document.content', 'document.id', 'document.score'],
      dtype='object')


In [69]:
retrieved_documents_df = retrieved_documents_df.rename(
    columns={
        "document.content": "reference",
        "input.value": "input"   # nếu bạn có cột input.value thì rename
    }
)

In [70]:
from phoenix.evals import (
    RelevanceEvaluator,
    run_evals,
)

relevance_evaluator = RelevanceEvaluator(model)

retrieved_documents_relevance_df = run_evals(
    evaluators=[relevance_evaluator],
    dataframe=retrieved_documents_df,
    provide_explanation=True,
    concurrency=20,
)[0]

🐌!! If running inside a notebook, patching the event loop with nest_asyncio will allow asynchronous eval submission, and is significantly faster. To patch the event loop, run `nest_asyncio.apply()`.
run_evals |          | 0/60 (0.0%) | ⏳ 00:00<? | ?it/s[92m11:06:50 - LiteLLM:DEBUG[0m: utils.py:349 - 

[92m11:06:50 - LiteLLM:DEBUG[0m: utils.py:349 - [92mRequest to litellm:[0m
[92m11:06:50 - LiteLLM:DEBUG[0m: utils.py:349 - [92mlitellm.completion(model='huggingface/together/Qwen/Qwen2.5-7B-Instruct', messages=[{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: I want to know information about AMD\n    ************\n    [Reference text]: \n    "AMD\'s Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat\n    ************\n    [END D


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 1: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:09 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 2: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:10 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 3: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:10 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 4: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:11 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 5: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:12 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 6: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:12 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 7: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:13 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 8: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:13 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 9: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:14 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Exception in worker on attempt 10: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}
Retrying...


`max_retries` is not supported. It will be ignored.
[92m11:07:15 - LiteLLM:DEBUG[0m: litellm_logging.py:929 - [92m

POST Request Sent from LiteLLM:
curl -X POST \
https://router.huggingface.co/together/v1/chat/completions \
-H 'content-type: application/json' -H 'Authorization: Be****SP' \
-d '{'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'messages': [{'content': '\nYou are comparing a reference text to a question and trying to determine if the reference text\ncontains information relevant to answering the question. Here is the data:\n    [BEGIN DATA]\n    ************\n    [Question]: What feature of AMD\'s Ryzen processors has contributed to their popularity among content creators?\n    ************\n    [Reference text]:  its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. "\n    "The launch of \n    ************\n    [END DATA]\nCompare the Question above to the Reference text. You must determine whether the Reference text\ncon


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m

Retries exhausted after 11 attempts: litellm.APIError: HuggingfaceException - {"error":"You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits."}


In [75]:
retrieved_documents_relevance_df.head()

Unnamed: 0,label,score,explanation
0,relevant,1.0,EXPLANATION: The question asks for information about AMD. The reference text mentions AMD's Ryzen processors and provides specific information about their performance. This information is directly related to AMD and can help answer the question about the company.\nLABEL: relevant
1,relevant,1.0,"EXPLANATION: The question asks for information about AMD. The reference text mentions AMD's EPYC server processors and their impact on the data center market. This information is directly related to AMD and provides details about its products and market presence. Therefore, the reference text contains relevant information to answer the question.\nLABEL: relevant"
2,relevant,1.0,"EXPLANATION: The question asks for information about AMD. The reference text mentions AMD specifically, noting its focus on Radeon graphics. This information is directly relevant to answering the question about AMD.\nLABEL: relevant"
3,relevant,1.0,"EXPLANATION: The question asks for information about Intel. The reference text mentions Intel in the context of its pricing and performance, and also notes Intel's future focus on AI and high-performance computing. While the text does not provide extensive details, it does mention Intel and its areas of focus, which can be relevant to answering the question.\nLABEL: relevant"
4,relevant,1.0,"EXPLANATION: The question asks for information about Intel. The reference text mentions Intel and discusses its work on improving its efficiency cores and hybrid features, which are relevant to Intel's technological advancements. This information could help answer the question by providing details about Intel's current focus and capabilities.\nLABEL: relevant"


In [80]:
documents_with_relevance_df = pd.concat(
    [retrieved_documents_df, retrieved_documents_relevance_df.add_prefix("eval_")], axis=1
)
documents_with_relevance_df

Unnamed: 0,context.span_id,context.trace_id,input,reference,document.id,document.score,eval_label,eval_score,eval_explanation
0,0224631cead9f25d,500b3bdc9e9c658bee05bdea991a793a,I want to know information about AMD,"\n ""AMD's Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat",6e13a8d5-0deb-4982-8f6a-601164f66a73,0.025658,relevant,1.0,EXPLANATION: The question asks for information about AMD. The reference text mentions AMD's Ryzen processors and provides specific information about their performance. This information is directly related to AMD and can help answer the question about the company.\nLABEL: relevant
1,0224631cead9f25d,500b3bdc9e9c658bee05bdea991a793a,I want to know information about AMD,"nerations. ""\n ""The launch of AMD's EPYC server processors disrupted the data center market, offering higher core counts and b",825177e1-6c77-4de7-bb02-c332e8dbaf54,0.00709,relevant,1.0,"EXPLANATION: The question asks for information about AMD. The reference text mentions AMD's EPYC server processors and their impact on the data center market. This information is directly related to AMD and provides details about its products and market presence. Therefore, the reference text contains relevant information to answer the question.\nLABEL: relevant"
2,0224631cead9f25d,500b3bdc9e9c658bee05bdea991a793a,I want to know information about AMD,"nd pre-built PCs. ""\n ""Both companies are expanding into GPU technologies, with AMD focusing on Radeon graphics and Intel laun",a23b9fae-a936-451e-b8d5-573ab30f08dd,0.004895,relevant,1.0,"EXPLANATION: The question asks for information about AMD. The reference text mentions AMD specifically, noting its focus on Radeon graphics. This information is directly relevant to answering the question about AMD.\nLABEL: relevant"
3,6fe4de290b8d7478,582261bea40ac7541550f4369e7342b1,I want some information about Intel,"ntel’s in both price and raw performance. ""\n ""Looking forward, both AMD and Intel are betting on AI, high-performance computi",def89f0c-d7d3-4c57-964a-4689cfda6ad3,0.023789,relevant,1.0,"EXPLANATION: The question asks for information about Intel. The reference text mentions Intel in the context of its pricing and performance, and also notes Intel's future focus on AI and high-performance computing. While the text does not provide extensive details, it does mention Intel and its areas of focus, which can be relevant to answering the question.\nLABEL: relevant"
4,6fe4de290b8d7478,582261bea40ac7541550f4369e7342b1,I want some information about Intel,"hem attractive for content creators and gamers alike. ""\n ""Intel has been working on improving its efficiency cores and hybrid",36084d3b-9775-4e1b-87a6-c542507e0074,0.023743,relevant,1.0,"EXPLANATION: The question asks for information about Intel. The reference text mentions Intel and discusses its work on improving its efficiency cores and hybrid features, which are relevant to Intel's technological advancements. This information could help answer the question by providing details about Intel's current focus and capabilities.\nLABEL: relevant"
5,6fe4de290b8d7478,582261bea40ac7541550f4369e7342b1,I want some information about Intel,"t, Intel still maintains strong relationships with OEMs, ensuring a steady presence in laptops and pre-built PCs. ""\n ""Both co",dc8a928d-6cc7-4393-a0c5-3754e4cb8cb9,0.010969,relevant,1.0,"EXPLANATION: The question asks for information about Intel. The reference text mentions Intel's relationships with OEMs and its presence in laptops and pre-built PCs. While the information is specific to Intel, it does not directly provide broad information about the company such as its history, products, or financials. However, it does offer context about Intel's market position, which could be relevant to understanding the company's role in the industry. Given that the question is broad and seeks information about Intel, the provided text, though specific, can contribute to answering the question by providing insight into Intel's market presence.\n\nLABEL: ""relevant"""
6,6b384c0a008fcdc1,2bb550be5926f49d23cef2b2443893d0,What feature of AMD's Ryzen processors has contributed to their popularity among content creators?,"\n ""AMD's Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat",6e13a8d5-0deb-4982-8f6a-601164f66a73,0.995588,relevant,1.0,"EXPLANATION: The question asks about a specific feature of AMD's Ryzen processors that has contributed to their popularity among content creators. The reference text mentions that these processors have strong multi-core performance, which is a feature that could contribute to their popularity. While the reference text does not explicitly state that this feature contributes to popularity among content creators, it does provide a relevant feature that could be the answer. Therefore, the reference text contains information that can help answer the question.\n\nLABEL: ""relevant"""
7,6b384c0a008fcdc1,2bb550be5926f49d23cef2b2443893d0,What feature of AMD's Ryzen processors has contributed to their popularity among content creators?,"hem attractive for content creators and gamers alike. ""\n ""Intel has been working on improving its efficiency cores and hybrid",36084d3b-9775-4e1b-87a6-c542507e0074,0.018183,unrelated,0.0,"EXPLANATION: The question asks about a specific feature of AMD's Ryzen processors that has contributed to their popularity among content creators. The reference text does not mention AMD, Ryzen processors, or any specific features that might contribute to their popularity. It only makes a general statement about processors being attractive for content creators and gamers, without providing any details. Therefore, the reference text does not contain information that can help answer the question.\n\nLABEL: ""unrelated"""
8,6b384c0a008fcdc1,2bb550be5926f49d23cef2b2443893d0,What feature of AMD's Ryzen processors has contributed to their popularity among content creators?,"its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. ""\n ""The launch of",a0149ea1-fb03-4f53-be3a-891702d05314,0.000928,,,
9,4abdb3f2ef462568,3969182ba42aa8040638bd5352d2339c,What was the impact of AMD's EPYC server processors on the data center market according to the given context?,"nerations. ""\n ""The launch of AMD's EPYC server processors disrupted the data center market, offering higher core counts and b",825177e1-6c77-4de7-bb02-c332e8dbaf54,0.977319,,,


In [86]:
documents_with_relevance_df = documents_with_relevance_df.rename(
    columns={
        "eval_label": "label",
        "eval_score": "score",
        "eval_explanation": "explanation",
    }
)

print(documents_with_relevance_df.head())

    context.span_id                  context.trace_id  \
0  0224631cead9f25d  500b3bdc9e9c658bee05bdea991a793a   
1  0224631cead9f25d  500b3bdc9e9c658bee05bdea991a793a   
2  0224631cead9f25d  500b3bdc9e9c658bee05bdea991a793a   
3  6fe4de290b8d7478  582261bea40ac7541550f4369e7342b1   
4  6fe4de290b8d7478  582261bea40ac7541550f4369e7342b1   

                                  input  \
0  I want to know information about AMD   
1  I want to know information about AMD   
2  I want to know information about AMD   
3   I want some information about Intel   
4   I want some information about Intel   

                                                                                                                           reference  \
0  \n    "AMD's Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat   
1  nerations. "\n    "The launch of AMD's EPYC server processors disrupted the data center market, offering higher core counts

In [89]:
documents_with_relevance_df

Unnamed: 0,context.span_id,context.trace_id,input,reference,document.id,document.score,label,score,explanation
0,0224631cead9f25d,500b3bdc9e9c658bee05bdea991a793a,I want to know information about AMD,"\n ""AMD's Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat",6e13a8d5-0deb-4982-8f6a-601164f66a73,0.025658,relevant,1.0,EXPLANATION: The question asks for information about AMD. The reference text mentions AMD's Ryzen processors and provides specific information about their performance. This information is directly related to AMD and can help answer the question about the company.\nLABEL: relevant
1,0224631cead9f25d,500b3bdc9e9c658bee05bdea991a793a,I want to know information about AMD,"nerations. ""\n ""The launch of AMD's EPYC server processors disrupted the data center market, offering higher core counts and b",825177e1-6c77-4de7-bb02-c332e8dbaf54,0.00709,relevant,1.0,"EXPLANATION: The question asks for information about AMD. The reference text mentions AMD's EPYC server processors and their impact on the data center market. This information is directly related to AMD and provides details about its products and market presence. Therefore, the reference text contains relevant information to answer the question.\nLABEL: relevant"
2,0224631cead9f25d,500b3bdc9e9c658bee05bdea991a793a,I want to know information about AMD,"nd pre-built PCs. ""\n ""Both companies are expanding into GPU technologies, with AMD focusing on Radeon graphics and Intel laun",a23b9fae-a936-451e-b8d5-573ab30f08dd,0.004895,relevant,1.0,"EXPLANATION: The question asks for information about AMD. The reference text mentions AMD specifically, noting its focus on Radeon graphics. This information is directly relevant to answering the question about AMD.\nLABEL: relevant"
3,6fe4de290b8d7478,582261bea40ac7541550f4369e7342b1,I want some information about Intel,"ntel’s in both price and raw performance. ""\n ""Looking forward, both AMD and Intel are betting on AI, high-performance computi",def89f0c-d7d3-4c57-964a-4689cfda6ad3,0.023789,relevant,1.0,"EXPLANATION: The question asks for information about Intel. The reference text mentions Intel in the context of its pricing and performance, and also notes Intel's future focus on AI and high-performance computing. While the text does not provide extensive details, it does mention Intel and its areas of focus, which can be relevant to answering the question.\nLABEL: relevant"
4,6fe4de290b8d7478,582261bea40ac7541550f4369e7342b1,I want some information about Intel,"hem attractive for content creators and gamers alike. ""\n ""Intel has been working on improving its efficiency cores and hybrid",36084d3b-9775-4e1b-87a6-c542507e0074,0.023743,relevant,1.0,"EXPLANATION: The question asks for information about Intel. The reference text mentions Intel and discusses its work on improving its efficiency cores and hybrid features, which are relevant to Intel's technological advancements. This information could help answer the question by providing details about Intel's current focus and capabilities.\nLABEL: relevant"
5,6fe4de290b8d7478,582261bea40ac7541550f4369e7342b1,I want some information about Intel,"t, Intel still maintains strong relationships with OEMs, ensuring a steady presence in laptops and pre-built PCs. ""\n ""Both co",dc8a928d-6cc7-4393-a0c5-3754e4cb8cb9,0.010969,relevant,1.0,"EXPLANATION: The question asks for information about Intel. The reference text mentions Intel's relationships with OEMs and its presence in laptops and pre-built PCs. While the information is specific to Intel, it does not directly provide broad information about the company such as its history, products, or financials. However, it does offer context about Intel's market position, which could be relevant to understanding the company's role in the industry. Given that the question is broad and seeks information about Intel, the provided text, though specific, can contribute to answering the question by providing insight into Intel's market presence.\n\nLABEL: ""relevant"""
6,6b384c0a008fcdc1,2bb550be5926f49d23cef2b2443893d0,What feature of AMD's Ryzen processors has contributed to their popularity among content creators?,"\n ""AMD's Ryzen processors have been praised for their strong multi-core performance, making them attractive for content creat",6e13a8d5-0deb-4982-8f6a-601164f66a73,0.995588,relevant,1.0,"EXPLANATION: The question asks about a specific feature of AMD's Ryzen processors that has contributed to their popularity among content creators. The reference text mentions that these processors have strong multi-core performance, which is a feature that could contribute to their popularity. While the reference text does not explicitly state that this feature contributes to popularity among content creators, it does provide a relevant feature that could be the answer. Therefore, the reference text contains information that can help answer the question.\n\nLABEL: ""relevant"""
7,6b384c0a008fcdc1,2bb550be5926f49d23cef2b2443893d0,What feature of AMD's Ryzen processors has contributed to their popularity among content creators?,"hem attractive for content creators and gamers alike. ""\n ""Intel has been working on improving its efficiency cores and hybrid",36084d3b-9775-4e1b-87a6-c542507e0074,0.018183,unrelated,0.0,"EXPLANATION: The question asks about a specific feature of AMD's Ryzen processors that has contributed to their popularity among content creators. The reference text does not mention AMD, Ryzen processors, or any specific features that might contribute to their popularity. It only makes a general statement about processors being attractive for content creators and gamers, without providing any details. Therefore, the reference text does not contain information that can help answer the question.\n\nLABEL: ""unrelated"""
8,6b384c0a008fcdc1,2bb550be5926f49d23cef2b2443893d0,What feature of AMD's Ryzen processors has contributed to their popularity among content creators?,"its efficiency cores and hybrid architecture, especially with the Alder Lake and Raptor Lake generations. ""\n ""The launch of",a0149ea1-fb03-4f53-be3a-891702d05314,0.000928,,,
9,4abdb3f2ef462568,3969182ba42aa8040638bd5352d2339c,What was the impact of AMD's EPYC server processors on the data center market according to the given context?,"nerations. ""\n ""The launch of AMD's EPYC server processors disrupted the data center market, offering higher core counts and b",825177e1-6c77-4de7-bb02-c332e8dbaf54,0.977319,,,


In [None]:
px.Client().log_evaluations(
    SpanEvaluations(eval_name="RAG", dataframe=documents_with_relevance_df),
)

Keyword argument `project_name` is no longer necessary and is ignored.
