In [1]:
import os
import openai
import io
import pandas as pd
import llama_index
from pypdf import PdfReader, PdfWriter

In [2]:
import os

# Set your OpenAI API key here
os.environ['OPENAI_API_KEY'] = 'sk'


## Read context document

In [3]:
# !curl https://www.ipcc.ch/report/ar6/wg2/downloads/report/IPCC_AR6_WGII_Chapter03.pdf --output IPCC_AR6_WGII_Chapter03.pdf

In [4]:
def select_pdf_pages(file, pages):
    """Given a pdf file object and list of pages, return a new pdf (as a bytes-object) with only those pages"""
    pdf_reader = PdfReader(file)
    pdf_writer = PdfWriter()
    for page in pages:
        pdf_writer.add_page(pdf_reader.pages[page])

    buf = io.BytesIO()
    pdf_writer.write(buf)
    buf.seek(0)
    return buf

new_pdf = select_pdf_pages(
    open("IPCC_AR6_WGII_Chapter03.pdf", "rb"), [i for i in range(0, 30)]
)
with open("IPCC_AR6_WGII_Chapter03_subset.pdf", "wb") as f:
    f.write(new_pdf.getvalue())

In [5]:
# Initialize models for LlamaIndex
#from llama_index.llms import OpenAI
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
embed_model = OpenAIEmbedding()

### Load data, split into chunks

In [6]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["IPCC_AR6_WGII_Chapter03_subset.pdf"]
).load_data()

# print(len(documents))

In [7]:
from llama_index.core import ServiceContext, VectorStoreIndex
from llama_index.core.node_parser import SimpleFileNodeParser

## Index-building functions

In [8]:
from llama_index.core import ServiceContext, VectorStoreIndex
from llama_index.core.node_parser import SimpleFileNodeParser

def build_basic_index(documents, llm, embed_model=OpenAIEmbedding()):
    basic_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
    basic_index = VectorStoreIndex.from_documents(
        documents=documents, service_context=basic_context
    )
    return basic_index

def build_sentence_window_index(documents, llm, embed_model=OpenAIEmbedding()):
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original-text"
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser
    )
    sentence_index = VectorStoreIndex.from_documents(
        documents=documents, service_context=sentence_context
    )
    return sentence_index

In [12]:
#from trulens_eval.feedback import Groundedness, GroundTruthAgreement
from trulens_eval.feedback import GroundTruthAgreement
from trulens_eval import Tru
from trulens_eval import OpenAI, Feedback, TruLlama
import numpy as np

In [15]:
import trulens_eval.feedback as feedback
import os

# Check the installed location of the trulens_eval package
trulens_eval_location = os.path.dirname(feedback.__file__)
print(trulens_eval_location)

# List the contents of the feedback module
feedback_module_path = os.path.join(trulens_eval_location)
print(os.listdir(feedback_module_path))


/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback
['embeddings.py', 'provider', '__pycache__', 'groundtruth.py', '__init__.py', 'prompts.py', 'v2', 'feedback.py']


In [18]:
# Display the contents of groundtruth.py
with open('/usr/local/lib/python3.10/dist-packages/trulens_eval/feedback/groundtruth.py', 'r') as file:
    print(file.read())

import logging
from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Union

import numpy as np
import pydantic

from trulens_eval.feedback.provider import Provider
from trulens_eval.utils.generated import re_0_10_rating
from trulens_eval.utils.imports import OptionalImports
from trulens_eval.utils.imports import REQUIREMENT_BERT_SCORE
from trulens_eval.utils.imports import REQUIREMENT_EVALUATE
from trulens_eval.utils.imports import REQUIREMENT_OPENAI
from trulens_eval.utils.pyschema import FunctionOrMethod
from trulens_eval.utils.pyschema import WithClassInfo
from trulens_eval.utils.serial import SerialModel

with OptionalImports(messages=REQUIREMENT_OPENAI):
    from trulens_eval.feedback.provider.openai import OpenAI

with OptionalImports(messages=REQUIREMENT_BERT_SCORE):
    from bert_score import BERTScorer

with OptionalImports(messages=REQUIREMENT_EVALUATE):
    import evaluate

logger = logging.getLogger(__name__)


# TODEP
class GroundTruthAgreement(WithClassInfo,

### Configure TruLens Evaluation framework

In [19]:
from trulens_eval.feedback import GroundTruthAgreement
from trulens_eval import Tru
from trulens_eval import OpenAI, Feedback, TruLlama
import numpy as np

# Initialize the main entry-point to TruLens, Tru
tru = Tru()

openai_provider = OpenAI(model_engine="gpt-3.5-turbo-1106")

qa_df = pd.read_csv("ipcc_test_questions.csv")
qa_set = [{"query": item["Question"], "response": item["Answer"]} for index, item in qa_df.iterrows()]

# Initialize metrics to collect

# Answer relevance
f_qa_relevance = Feedback(
    openai_provider.relevance_with_cot_reasons, name="Answer Relevance"
).on_input_output()

# Context relevance
f_qs_relevance = Feedback(
    openai_provider.relevance_with_cot_reasons, name="Context Relevance"
).on_input().on(TruLlama.select_source_nodes().node.text).aggregate(np.mean)




# Ground truth agreement
f_groundtruth = Feedback(
    GroundTruthAgreement(qa_set).agreement_measure, name="Answer Correctness"
).on_input_output()


metrics = [f_qa_relevance, f_qs_relevance, f_groundtruth]

def get_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        feedbacks=metrics,
        app_id=app_id
    )
    return tru_recorder



🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of Tru` to prevent this.
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Answer Correctness, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Correctness, input response will be set to __record__.main_output or `Select.RecordOutput` .


## Run Evals for Basic Index

In [20]:
basic_query_index = build_basic_index(documents=documents, llm=llm, embed_model=embed_model)
basic_query_engine = basic_query_index.as_query_engine()
basic_recorder = get_trulens_recorder(basic_query_engine, app_id="Basic Query Engine")


  basic_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)


In [21]:
with basic_recorder as recording:
    for q in qa_set:
        basic_query_engine.query(q['query'])

In [22]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
# records.head(5)
tru.run_dashboard()

Starting dashboard ...
npx: installed 22 in 5.158s

Go to this url and submit the ip given here. your url is: https://sad-moons-stay.loca.lt

  Submit this IP Address: 35.231.119.84



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [31]:
records.columns

Index(['app_id', 'app_json', 'type', 'record_id', 'input', 'output', 'tags',
       'record_json', 'cost_json', 'perf_json', 'ts', 'Answer Correctness',
       'Answer Relevance', 'Context Relevance', 'Answer Correctness_calls',
       'Answer Relevance_calls', 'Context Relevance_calls', 'latency',
       'total_tokens', 'total_cost'],
      dtype='object')

In [32]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Correctness,Answer Relevance,Context Relevance,Answer Correctness_calls,Answer Relevance_calls,Context Relevance_calls,latency,total_tokens,total_cost
0,Basic Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_825c390de87f0ab76750747a913ec96e,"""What are the primary impacts of climate chang...","""The primary impacts of climate change on ocea...",-,"{""record_id"": ""record_hash_825c390de87f0ab7675...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-30T18:37:40.936211"", ""...",2024-05-30T18:37:45.375402,1.0,1.0,0.6,[{'args': {'prompt': 'What are the primary imp...,[{'args': {'prompt': 'What are the primary imp...,[{'args': {'prompt': 'What are the primary imp...,4,1532,0.002334
1,Basic Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_9c25378c7bb6625bcb6608da02a10d55,"""How do marine heatwaves affect ocean ecosyste...","""Marine heatwaves can have severe consequences...",-,"{""record_id"": ""record_hash_9c25378c7bb6625bcb6...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-30T18:37:46.151395"", ""...",2024-05-30T18:37:50.440472,0.9,1.0,0.6,[{'args': {'prompt': 'How do marine heatwaves ...,[{'args': {'prompt': 'How do marine heatwaves ...,[{'args': {'prompt': 'How do marine heatwaves ...,4,2186,0.003323
2,Basic Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_d0da89f7013b46e57c057f4b521152fd,"""What role does the ocean play in global clima...","""The ocean plays a crucial role in global clim...",-,"{""record_id"": ""record_hash_d0da89f7013b46e57c0...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-30T18:37:51.111251"", ""...",2024-05-30T18:37:54.440343,1.0,1.0,0.5,[{'args': {'prompt': 'What role does the ocean...,[{'args': {'prompt': 'What role does the ocean...,[{'args': {'prompt': 'What role does the ocean...,3,2155,0.003247
3,Basic Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_3497b1dc852c86e5a24a96f86134672b,"""How is climate change impacting marine biodiv...","""Climate change is impacting marine biodiversi...",-,"{""record_id"": ""record_hash_3497b1dc852c86e5a24...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-30T18:37:55.110530"", ""...",2024-05-30T18:37:59.542107,0.9,1.0,0.9,[{'args': {'prompt': 'How is climate change im...,[{'args': {'prompt': 'How is climate change im...,[{'args': {'prompt': 'How is climate change im...,4,2229,0.003393
4,Basic Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_7b4ff048bb9c3b0119842e0610901640,"""What are the major non-climate drivers affect...","""The major non-climate drivers affecting ocean...",-,"{""record_id"": ""record_hash_7b4ff048bb9c3b01198...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-30T18:38:00.152828"", ""...",2024-05-30T18:38:03.739552,1.0,0.9,0.45,[{'args': {'prompt': 'What are the major non-c...,[{'args': {'prompt': 'What are the major non-c...,[{'args': {'prompt': 'What are the major non-c...,3,2107,0.003151


In [36]:
columns_to_include = [
    'input', 'output', 'Answer Correctness', 'Answer Relevance',
    'Context Relevance', 'Answer Correctness_calls',
    'Answer Relevance_calls', 'Context Relevance_calls'
]

available_columns = [col for col in columns_to_include if col in records.columns]

filtered_records = records[available_columns]
filtered_records

Unnamed: 0,input,output,Answer Correctness,Answer Relevance,Context Relevance,Answer Correctness_calls,Answer Relevance_calls,Context Relevance_calls
0,"""What are the primary impacts of climate chang...","""The primary impacts of climate change on ocea...",1.0,1.0,0.6,[{'args': {'prompt': 'What are the primary imp...,[{'args': {'prompt': 'What are the primary imp...,[{'args': {'prompt': 'What are the primary imp...
1,"""How do marine heatwaves affect ocean ecosyste...","""Marine heatwaves can have severe consequences...",0.9,1.0,0.6,[{'args': {'prompt': 'How do marine heatwaves ...,[{'args': {'prompt': 'How do marine heatwaves ...,[{'args': {'prompt': 'How do marine heatwaves ...
2,"""What role does the ocean play in global clima...","""The ocean plays a crucial role in global clim...",1.0,1.0,0.5,[{'args': {'prompt': 'What role does the ocean...,[{'args': {'prompt': 'What role does the ocean...,[{'args': {'prompt': 'What role does the ocean...
3,"""How is climate change impacting marine biodiv...","""Climate change is impacting marine biodiversi...",0.9,1.0,0.9,[{'args': {'prompt': 'How is climate change im...,[{'args': {'prompt': 'How is climate change im...,[{'args': {'prompt': 'How is climate change im...
4,"""What are the major non-climate drivers affect...","""The major non-climate drivers affecting ocean...",1.0,0.9,0.45,[{'args': {'prompt': 'What are the major non-c...,[{'args': {'prompt': 'What are the major non-c...,[{'args': {'prompt': 'What are the major non-c...
5,"""How does climate change influence the distrib...","""Climate change influences the distribution of...",1.0,0.9,0.8,[{'args': {'prompt': 'How does climate change ...,[{'args': {'prompt': 'How does climate change ...,[{'args': {'prompt': 'How does climate change ...
6,"""What are the effects of ocean acidification o...","""Ocean acidification can lead to decreased gro...",0.9,1.0,0.5,[{'args': {'prompt': 'What are the effects of ...,[{'args': {'prompt': 'What are the effects of ...,[{'args': {'prompt': 'What are the effects of ...
7,"""How does climate change affect coastal commun...","""Climate change impacts coastal communities an...",0.9,1.0,0.5,[{'args': {'prompt': 'How does climate change ...,[{'args': {'prompt': 'How does climate change ...,[{'args': {'prompt': 'How does climate change ...
8,"""What are some adaptation strategies for manag...","""Adaptation strategies for managing climate ch...",0.8,1.0,0.9,[{'args': {'prompt': 'What are some adaptation...,[{'args': {'prompt': 'What are some adaptation...,[{'args': {'prompt': 'What are some adaptation...
9,"""What is the significance of the paleorecord i...","""The paleorecord is significant in understandi...",1.0,1.0,0.9,[{'args': {'prompt': 'What is the significance...,[{'args': {'prompt': 'What is the significance...,[{'args': {'prompt': 'What is the significance...


In [27]:
print(feedback)

['Context Relevance', 'Answer Relevance', 'Answer Correctness']


## Running the Evals for Sentence Window Index

In [30]:
from llama_index.core.postprocessor import SimilarityPostprocessor
sentence_window_index = build_sentence_window_index(documents=documents, llm=llm, embed_model=embed_model)

postproc = MetadataReplacementPostProcessor(target_metadata_key="window")

sentence_query_engine = sentence_window_index.as_query_engine(
    node_postprocessors=[postproc]
)

sentence_recorder = get_trulens_recorder(
    sentence_query_engine, app_id="Sentence Window Query Engine"
)

In [None]:
with sentence_recorder as recording:
    for q in qa_set:
        sentence_query_engine.query(q['query'])

A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x1bb4c527610 is calling an instrumented method <function BaseQueryEngine.query at 0x000001BB1B3D6660>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x1bb22254610) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x1bb4c527610 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x000001BB1BEF7740>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x1bb22254610) using this function.
A new object of type <class 'llama_index.indices.vector_store.retrievers.retriever.VectorIndexRetriever'> at 0x1bb4c527190 is calling an instrumented method <function BaseRetriever.retrieve at 0x000001BB1E1319E0>. The path of this call may be incorrect.
Guessing path of new object is app.retriever based on ot