In [26]:
import os
from dotenv import load_dotenv


load_dotenv()

HUGGINGFACE_KEY = os.environ.get("HUGGINGFACE_KEY")
OPENAI_KEY = os.environ.get("OPENAI_API_KEY")

In [27]:
### importing from Trulens

from trulens_eval import Tru

tru = Tru()

In [28]:
### Simple LLM application

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=['./eBook-How-to-Build-a-Career-in-AI.pdf']
).load_data()


In [29]:
from llama_index.core import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [32]:
from llama_index.core import VectorStoreIndex, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
embedding_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

Settings.llm = llm
Settings.embed_model = embedding_model

index = VectorStoreIndex([document])

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [33]:
index = VectorStoreIndex.from_documents([document])

query_engine = index.as_query_engine()

: 

In [31]:
### Sanity checking for the request
response = query_engine.query("What did the author do in their youth?")
print(str(response))

The author has been privileged to see thousands of students, as well as engineers in companies large and small, navigate careers in AI.


In [9]:
### Initializing feedback functions

import numpy as np

# initializing provider class
from trulens_eval.feedback.provider.openai import OpenAI as fOpenAI()
openai = fOpenAI()


# select context to be used in feedback. the location of the context is app specific?
from trulens_eval.app import App

context = App.select_context(query_engine)


from trulens_eval import Feedback
from trulens_eval.feedback import Groundedness

grounded = Groundedness(groundedness_provider=fOpenAI())

f_groundedness = Feedback(grounded.groundedness_measure_with_cot_reasons) \
                    .on(context.collect()) \
                    .on_output() \
                    .aggregate(grounded.grounded_statements_aggregator)


# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()

# Question/statement relevance between question and each context chunk.
f_qs_relevance = Feedback(openai.qs_relevance)\
                    .on_input() \
                    .on(context) \
                    .aggregate(np.mean)

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In qs_relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In qs_relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .


In [10]:
### Instrumenting the app for logging in TruLens


from trulens_eval import TruLlama

tru_query_engine_recorder = TruLlama(
    query_engine,
    app_id="App1",
    feedbacks=[f_groundedness, f_qs_relevance, f_qa_relevance]
)

In [11]:
questions = []

with open("eval_questions.txt") as f:
    for line in f.readlines():
        questions.append(line.strip())


In [12]:
questions

['What are the keys to building a career in AI?',
 'How can teamwork contribute to success in AI?',
 'What is the importance of networking in AI?',
 'What are some good habits to develop for a successful career?',
 'How can altruism be beneficial in building a career?',
 'What is imposter syndrome and how does it relate to AI?',
 'Who are some accomplished individuals who have experienced imposter syndrome?',
 'What is the first step to becoming good at AI?',
 'What are some common challenges in AI?',
 'Is it normal to find parts of AI challenging?']

In [13]:
from tqdm import tqdm

with tru_query_engine_recorder as recorder:
    for question in tqdm(questions):
        response = query_engine.query(question)

100%|██████████| 10/10 [00:26<00:00,  2.68s/it]


In [14]:
print(str(response))

It is normal to find parts of AI challenging.


In [15]:
rec = recorder.records

display(rec)

[Record(record_id='record_hash_8b46a05396b5529054dda2c6c40fd68a', app_id='App1', cost=Cost(n_requests=2, n_successful_requests=2, n_classes=0, n_tokens=984, n_stream_chunks=0, n_prompt_tokens=975, n_completion_tokens=9, cost=0.001464), perf=Perf(start_time=datetime.datetime(2024, 3, 11, 9, 13, 57, 92659), end_time=datetime.datetime(2024, 3, 11, 9, 13, 58, 544027)), ts=datetime.datetime(2024, 3, 11, 9, 13, 58, 546051), tags='-', meta=None, main_input='What are the keys to building a career in AI?', main_output='Job search, personal discipline, altruism.', main_error=None, calls=[RecordAppCall(stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine, id=139876085465152, init_bindings=None), name='query')), RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=llama_index.core.query_engine.retriever_query_engine.RetrieverQueryEngine, id=139876085465152, init_bindings=None), name='retrieve')), R

In [16]:
# recs = recording.records # use .records if multiple

In [17]:
records, feedback = tru.get_records_and_feedback(app_ids=["App1"])

records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,relevance,qs_relevance,groundedness_measure_with_cot_reasons,relevance_calls,qs_relevance_calls,groundedness_measure_with_cot_reasons_calls,latency,total_tokens,total_cost
0,App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_1a0f573839fd2911f50b30a169eb48ba,"""What did author do when he was young?""","""The author, when he was young, worked on writ...",-,"{""record_id"": ""record_hash_1a0f573839fd2911f50...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-03-08T16:26:38.865500"", ""...",2024-03-08T16:26:41.601896,1.0,0.5,1.0,[{'args': {'prompt': 'What did author do when ...,[{'args': {'question': 'What did author do whe...,[{'args': {'source': ['![](https://s.turbifycd...,2,2155,0.003252
1,App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_0eca43e15d4b10b7ed55f28a707e56a2,"""What did author do when he was young?""","""When the author was young, they worked on wri...",-,"{""record_id"": ""record_hash_0eca43e15d4b10b7ed5...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-03-08T16:27:57.234683"", ""...",2024-03-08T16:27:59.593466,1.0,0.5,1.0,[{'args': {'prompt': 'What did author do when ...,[{'args': {'question': 'What did author do whe...,[{'args': {'source': ['![](https://s.turbifycd...,2,2171,0.003283
2,App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_2d908d0cf41936b8fd4692664019d260,"""What did author do when he was young?""","""The author, when he was young, worked on writ...",-,"{""record_id"": ""record_hash_2d908d0cf41936b8fd4...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-03-08T16:34:13.961058"", ""...",2024-03-08T16:34:16.199118,1.0,0.5,1.0,[{'args': {'prompt': 'What did author do when ...,[{'args': {'question': 'What did author do whe...,[{'args': {'source': ['![](https://s.turbifycd...,2,2155,0.003252
3,App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_4907bf81ea1fef087961f882ae612bc4,"""What did author do when he was young?""","""When the author was young, they worked on wri...",-,"{""record_id"": ""record_hash_4907bf81ea1fef08796...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-03-08T16:36:52.575770"", ""...",2024-03-08T16:36:55.238265,1.0,0.5,1.0,[{'args': {'prompt': 'What did author do when ...,[{'args': {'question': 'What did author do whe...,[{'args': {'source': ['![](https://s.turbifycd...,2,2171,0.003283
4,App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_29778fb4631c49b9f722b33b5072d528,"""What are the keys to building a career in AI?""","""The keys to building a career in AI involve u...",-,"{""record_id"": ""record_hash_29778fb4631c49b9f72...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-03-08T16:38:49.583678"", ""...",2024-03-08T16:38:52.669274,1.0,0.2,0.82,[{'args': {'prompt': 'What are the keys to bui...,[{'args': {'question': 'What are the keys to b...,[{'args': {'source': ['By which I mean the sor...,3,2178,0.003286


In [18]:
tru.get_leaderboard()

Unnamed: 0_level_0,Groundedness,qs_relevance,groundedness_measure_with_cot_reasons,relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Direct Query Engine,0.507292,,,,10.294118,0.018749
App1,,0.429839,0.659849,0.922581,4.241935,0.002066


### Sentence-Window Retrieval

In [20]:
### Utilities
from llama_index.core import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core import Settings

def build_sentence_window_index(
        document, llm, embed_model="BAAI/bge-small-en-v1.5", save_dir="sentence_index"
):
    
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key='window',
        original_text_metadata_key="original_text"
    )
    
    Settings.llm = llm
    Settings.embed_model = embed_model
    Settings.node_parser = node_parser

    if not os.path.exist(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            [document], 
        )
    

In [None]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

sentence_index = build_sentence_window_index(
    document, llm, embed_model="BAAI/bge-small-en-v1.5", save_dir="sentence_index"
)
