In [3]:
!pip install datasets langchain chromadb openai deepeval langchain-community

Collecting langchain-community
  Obtaining dependency information for langchain-community from https://files.pythonhosted.org/packages/c8/bc/f8c7dae8321d37ed39ac9d7896617c4203248240a4835b136e3724b3bb62/langchain_community-0.3.27-py3-none-any.whl.metadata
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Obtaining dependency information for dataclasses-json<0.7,>=0.5.7 from https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl.metadata
  Using cached dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Obtaining dependency information for httpx-sse<1.0.0,>=0.4.0 from https://files.pythonhosted.org/packages/25/0a/6269e3473b09aed2dab8aa1a600c70f31f00ae1349bee30658f7e358a159/httpx_sse-0.4.1-py3-none-any.whl.metadata
  Downloading httpx_ss


[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
from datasets import load_dataset
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
import json

In [7]:
ds = load_dataset("virattt/financial-qa-10K")
dataset = ds["train"]

Generating train split: 100%|██████████| 7000/7000 [00:00<00:00, 40975.78 examples/s]


In [21]:
ds

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'context', 'ticker', 'filing'],
        num_rows: 7000
    })
})

In [12]:
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
docs = []
for row in dataset:
    context = row['context']
    docs.extend(splitter.create_documents([context]))

In [16]:
import os
OPENAI_API_KEY="sk-C6d4lxoZnn2bJBx2rHkOxHjCF6PF_cDb9k8BdWseD6T3BlbkFJm9dOsFS-w59fFqk4qNbkWmzhM4jETehAXdNn6jho0A"
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [17]:
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(docs, embeddings)

In [18]:
retriever = db.as_retriever(search_kwargs={"k": 1})
llm = ChatOpenAI(model='gpt-4o')

  llm = ChatOpenAI(model='gpt-4o')


In [42]:
results = []

# Use proper dataset iteration - select first 20 rows
sample_dataset = dataset.select(range(20))

for row in sample_dataset:
    query = row['question']
    ground_truth = row['answer']

    retrieved_docs = retriever.get_relevant_documents(query)
    retrieved_context = "\n\n".join([doc.page_content for doc in retrieved_docs])

    prompt = f"""Answer the following question based only on the context below.

Context:
{retrieved_context}

Question:
{query}
"""

    answer = llm.predict(prompt)

    results.append({
        "query": query,
        "context":context,
        "retrieved_context": retrieved_context,
        "generated_answer": answer,
        "ground_truth": ground_truth
    })

print(f"Processed {len(results)} questions successfully!")

Processed 20 questions successfully!


In [43]:
print(results[:5])  # Display first 5 results for verification

[{'query': 'What area did NVIDIA initially focus on before expanding to other computationally intensive fields?', 'context': 'As of December 31, 2023, SGD 3.69 billion (approximately $2.79 billion at exchange rates in effect on December 31, 2023) remains available to be drawn under the Singapore Delayed Draw Term Facility once the construction cost estimate and construction schedule for the MBS Expansion Project are delivered to lenders.', 'retrieved_context': 'NVIDIA as the leader in computer graphics.', 'generated_answer': 'NVIDIA initially focused on computer graphics.', 'ground_truth': 'NVIDIA initially focused on PC graphics.'}, {'query': 'What are some of the recent applications of GPU-powered deep learning as mentioned by NVIDIA?', 'context': 'As of December 31, 2023, SGD 3.69 billion (approximately $2.79 billion at exchange rates in effect on December 31, 2023) remains available to be drawn under the Singapore Delayed Draw Term Facility once the construction cost estimate and c

In [48]:

from deepeval.metrics import (
    AnswerRelevancyMetric, HallucinationMetric, BiasMetric,
    FaithfulnessMetric,
    ContextualRelevancyMetric,ContextualRecallMetric,ContextualPrecisionMetric
)


In [49]:
from deepeval.test_case import LLMTestCase

# Prepare test cases for RAGMetric
test_cases = [
    LLMTestCase(
        input=entry['query'],
        actual_output=entry['generated_answer'],
        context=[entry['context']],
        retrieval_context=[entry['retrieved_context']],
        expected_output=entry['ground_truth']
    )
    for entry in results
]

print(f"Prepared {len(test_cases)} test cases for evaluation.")
print(test_cases[:5])  # Display first 5 test cases for verification

# Initialize RAGMetric
metrics = [
    AnswerRelevancyMetric(),
    HallucinationMetric(),
    BiasMetric(),
    ContextualRelevancyMetric(),
    ContextualRecallMetric(),
    ContextualPrecisionMetric(),
    FaithfulnessMetric(),
]

 

# # Run evaluations

metric_results = {}
for metric in metrics:
    scores = []
    for test_case in test_cases:
        metric.measure(test_case)
        scores.append(metric.score)
    average_score = sum(scores) / len(scores)
    metric_results[metric.__class__.__name__] = average_score
# # Evaluate RAG metric
# rag_scores = rag_metric.evaluate(test_cases)

# # Print the average RAG score
# avg_rag_score = sum(rag_scores) / len(rag_scores)
# print(f"Average RAG Metric Score: {avg_rag_score}")

Prepared 20 test cases for evaluation.
[LLMTestCase(input='What area did NVIDIA initially focus on before expanding to other computationally intensive fields?', actual_output='NVIDIA initially focused on computer graphics.', expected_output='NVIDIA initially focused on PC graphics.', context=['As of December 31, 2023, SGD 3.69 billion (approximately $2.79 billion at exchange rates in effect on December 31, 2023) remains available to be drawn under the Singapore Delayed Draw Term Facility once the construction cost estimate and construction schedule for the MBS Expansion Project are delivered to lenders.'], retrieval_context=['NVIDIA as the leader in computer graphics.'], additional_metadata=None, tools_called=None, comments=None, expected_tools=None, token_cost=None, completion_time=None, name=None, tags=None), LLMTestCase(input='What are some of the recent applications of GPU-powered deep learning as mentioned by NVIDIA?', actual_output='Some of the most recent applications of GPU-pow

In [50]:
print(metric_results)

{'AnswerRelevancyMetric': 0.8761904761904763, 'HallucinationMetric': 1.0, 'BiasMetric': 0.0, 'ContextualRelevancyMetric': 0.725, 'ContextualRecallMetric': 0.7416666666666667, 'ContextualPrecisionMetric': 0.75, 'FaithfulnessMetric': 1.0}


In [59]:
import requests


BASE_URL = "https://qa-backend.cognitiveview.com"
AUTH_TOKEN ="Bearer eyJhbGciOiJSUzI1NiIsImNhdCI6ImNsX0I3ZDRQRDIyMkFBQSIsImtpZCI6Imluc18yckY5Qll3RDh6WHBnMGI1T0t1bnlUeFA4d0UiLCJ0eXAiOiJKV1QifQ.eyJhenAiOiJodHRwczovL3FhLWZyb250ZW5kLmNvZ25pdGl2ZXZpZXcuY29tIiwiZXhwIjoxNzUxNDk5ODI1LCJpYXQiOjE3NTE0Mzk4MjUsImlzcyI6Imh0dHBzOi8vc3VwZXJiLW9jdG9wdXMtOTIuY2xlcmsuYWNjb3VudHMuZGV2IiwianRpIjoiYTk4OWJiMDdmOTRlNmIzYzlmOTUiLCJuYmYiOjE3NTE0Mzk2MjUsInN1YiI6InVzZXJfMnhkS1AyZGRDVFlXQ1YyQzJWTjZTTmZYRlRUIiwidXNlcl9pbmZvIjp7ImZpcnN0X25hbWUiOiJBc2h1dG9zaCAiLCJpZCI6InVzZXJfMnhkS1AyZGRDVFlXQ1YyQzJWTjZTTmZYRlRUIiwiaW5mbyI6eyJjbGllbnRfaWQiOiJDNDczNDIxIiwidGVuYW50X2lkIjoiVDE4MTc1MSIsInVzZXJfaWQiOiJVLTcwODcyMSJ9LCJsYXN0X25hbWUiOiJQYWRoaSIsIm1ldGFfZGF0YSI6eyJjbGllbnRfaWQiOiJDNDczNDIxIiwidGVuYW50X2lkIjoiVDE4MTc1MSIsInVzZXJfaWQiOiJVLTcwODcyMSJ9fX0.HAu6Qm-BBId5xLHv_IfbWO2pG_6wmGHry8IWd6ru0yo1sLKJSMoLc1ODZUDSJgyydh1zIEc44lBRrxUhG_-e8ApdWmBPYSnHXkxXnHV3VDARjHekYyiCMqIFdOk07esFcbDM4MQvNSZ-nlMm3639Lrlow7ZwRJHjVxeb6wZ5JUNgRwU-XWGnJDoeolk-afqB1ryn0P9OaqigyjMZK-IJN30Vz_IbW3vIDicarzL9P6w4yWa4XhIYyXW1zEcJfHdT-_OHyyJQC2HSGlO7yMIvayoxNfhb-518VBH8TLvg_3rnOLQD7oIjRW1hB3p-fd41i2BHWSJ_2ZFIFlPINMW6iA"  # Replace with your actual token
url = f"{BASE_URL}/cv/v1/metrics"

headers = {
    "Authorization": AUTH_TOKEN,
    "Content-Type": "application/json",
    "X-User-Id": "C473421_T181751",  
}

payload = {
  "metric_metadata": {
    "application_name": "chat-application",
    "version": "1.0.0",
    "resource_name": "chat-completion",
    "resource_id": "R-756",
    "provider": "deepeval",
    "use_case": "transportation"
  },
  "metric_data": {
    "resource_id": "res_123456",
    "resource_name": "chat-completion",
    "deepeval": metric_results,
  } 
}

response = requests.post(url, headers=headers, json=payload)

# Output the response
print(f"Status Code: {response.status_code}")
print("Response JSON:", response.json())

Status Code: 201
Response JSON: {'message': 'Metrics ingested, and evaluation completed.', 'report_id': 'gW3eQpV63sRTrQey9uJPNp'}


In [64]:
import requests

def fetch_report_result(report_id, auth_token, user_id):
    """
    Fetches the result of a report from the CognitiveView API.

    Args:
        report_id (str): The ID of the report to fetch.
        auth_token (str): The authorization token for the API.
        user_id (str): The user ID for the API.

    Returns:
        dict: The JSON response from the API if successful, else None.
    """
    base_url = "https://qa-backend.cognitiveview.com"
    endpoint = f"/cv/v1/metrics/{report_id}"
    url = base_url + endpoint

    headers = {
        "Authorization": auth_token,
        "Content-Type": "application/json",
        "X-User-Id": user_id,
    }

    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch report. Status code: {response.status_code}")
        return None

# Example usage:
# AUTH_TOKEN = "Bearer eyJhbGciOiJSUzI1NiIsImNhdCI6ImNsX0I3ZDRQRDIyMkFBQSIsImtpZCI6Imluc18yckY5Qll3RDh6WHBnMGI1T0t1bnlUeFA4d0UiLCJ0eXAiOiJKV1QifQ.eyJhenAiOiJodHRwczovL3FhLWZyb250ZW5kLmNvZ25pdGl2ZXZpZXcuY29tIiwiZXhwIjoxNzUxNDk5ODI1LCJpYXQiOjE3NTE0Mzk4MjUsImlzcyI6Imh0dHBzOi8vc3VwZXJiLW9jdG9wdXMtOTIuY2xlcmsuYWNjb3VudHMuZGV2IiwianRpIjoiYTk4OWJiMDdmOTRlNmIzYzlmOTUiLCJuYmYiOjE3NTE0Mzk2MjUsInN1YiI6InVzZXJfMnhkS1AyZGRDVFlXQ1YyQzJWTjZTTmZYRlRUIiwidXNlcl9pbmZvIjp7ImZpcnN0X25hbWUiOiJBc2h1dG9zaCAiLCJpZCI6InVzZXJfMnhkS1AyZGRDVFlXQ1YyQzJWTjZTTmZYRlRUIiwiaW5mbyI6eyJjbGllbnRfaWQiOiJDNDczNDIxIiwidGVuYW50X2lkIjoiVDE4MTc1MSIsInVzZXJfaWQiOiJVLTcwODcyMSJ9LCJsYXN0X25hbWUiOiJQYWRoaSIsIm1ldGFfZGF0YSI6eyJjbGllbnRfaWQiOiJDNDczNDIxIiwidGVuYW50X2lkIjoiVDE4MTc1MSIsInVzZXJfaWQiOiJVLTcwODcyMSJ9fX0.HAu6Qm-BBId5xLHv_IfbWO2pG_6wmGHry8IWd6ru0yo1sLKJSMoLc1ODZUDSJgyydh1zIEc44lBRrxUhG_-e8ApdWmBPYSnHXkxXnHV3VDARjHekYyiCMqIFdOk07esFcbDM4MQvNSZ-nlMm3639Lrlow7ZwRJHjVxeb6wZ5JUNgRwU-XWGnJDoeolk-afqB1ryn0P9OaqigyjMZK-IJN30Vz_IbW3vIDicarzL9P6w4yWa4XhIYyXW1zEcJfHdT-_OHyyJQC2HSGlO7yMIvayoxNfhb-518VBH8TLvg_3rnOLQD7oIjRW1hB3p-fd41i2BHWSJ_2ZFIFlPINMW6iA"  # Replace with your actual token
report_id = "gW3eQpV63sRTrQey9uJPNp"  # Replace with the actual report ID you want to fetch
user_id = "C473421_T181751"  # Replace with your actual user ID
report = fetch_report_result("report_id", AUTH_TOKEN, "user_id")


Failed to fetch report. Status code: 500


In [22]:
# Check the first few rows to understand the structure
print("Dataset type:", type(dataset))
print("First row type:", type(dataset[0]))
print("First row:", dataset[0])
print("\nColumn names:", dataset.column_names)
print("Dataset features:", dataset.features)

Dataset type: <class 'datasets.arrow_dataset.Dataset'>
First row type: <class 'dict'>
First row: {'question': 'What area did NVIDIA initially focus on before expanding to other computationally intensive fields?', 'answer': 'NVIDIA initially focused on PC graphics.', 'context': 'Since our original focus on PC graphics, we have expanded to several other large and important computationally intensive fields.', 'ticker': 'NVDA', 'filing': '2023_10K'}

Column names: ['question', 'answer', 'context', 'ticker', 'filing']
Dataset features: {'question': Value('string'), 'answer': Value('string'), 'context': Value('string'), 'ticker': Value('string'), 'filing': Value('string')}
