In [1]:
import openai
import os
from dotenv import load_dotenv
from trulens_eval import Tru

load_dotenv()
openai.api_key = os.environ.get('OPENAI_API_KEY')

In [2]:
from llama_index import SimpleDirectoryReader
from llama_index import Document

documents = SimpleDirectoryReader(
    input_files=["./document/eBook-How-to-Build-a-Career-in-AI.pdf"] # 예시 pdf, 출처:https://info.deeplearning.ai/how-to-build-a-career-in-ai-book
).load_data()


document = Document(text="\n\n".join([doc.text for doc in documents]))

# Window=1

In [4]:
from utils import get_trulens_recorder, run_evals, build_sentence_window_index, get_sentence_window_query_engine
from llama_index.llms import OpenAI

# 사전에 쓰여진 evaluation용 질문들
eval_questions = []
with open('generated_questions.text', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)
        
sentence_index_1 = build_sentence_window_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=1,
    save_dir="./index/sentence_index_1",
)

sentence_window_engine_1 = get_sentence_window_query_engine(
    sentence_index_1
)

tru_recorder_1 = get_trulens_recorder(
    sentence_window_engine_1,
    app_id='sentence window engine 1'
)

run_evals(eval_questions, tru_recorder_1, sentence_window_engine_1)

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [5]:
Tru().run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.0.4:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [6]:
sentence_window_engine_1.query('how to become AI expert').response

'To become an AI expert, one can start by developing machine learning systems in their spare time. This can eventually become part of their job, providing access to more equipment, compute time, labeling budget, or head count. Working on small-scale projects, either alone or with friends, is also beneficial. For example, re-implementing known algorithms, applying machine learning to hobbies, or building useful systems at work in spare time can help develop expertise. Additionally, encouraging and supporting others behind you in their AI journey can also contribute to becoming an AI expert.'

In [7]:
sentence_window_engine_1.query('Recommnd me an AI project').response

'You could consider implementing a machine learning algorithm to predict power generation from intermittent sources. This project could involve using satellite imagery to map the locations of wind turbines more accurately, estimating the height and generation capacity of wind turbines using satellite imagery, or using weather data to better predict cloud cover and solar irradiance.'

# Window=3

In [8]:
sentence_index_3 = build_sentence_window_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="./index/sentence_index_3",
)

sentence_window_engine_3 = get_sentence_window_query_engine(
    sentence_index_3
)

tru_recorder_3 = get_trulens_recorder(
    sentence_window_engine_3,
    app_id='sentence window engine 3'
)

run_evals(eval_questions, tru_recorder_3, sentence_window_engine_3)
Tru().run_dashboard()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Network URL: http://192.168.0.4:8501



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [9]:
sentence_index_5 = build_sentence_window_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=5,
    save_dir="./index/sentence_index_5",
)

sentence_window_engine_5 = get_sentence_window_query_engine(
    sentence_index_5
)

tru_recorder_5 = get_trulens_recorder(
    sentence_window_engine_5,
    app_id='sentence window engine 5'
)

run_evals(eval_questions, tru_recorder_5, sentence_window_engine_5)
Tru().run_dashboard()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x25423430940 is calling an instrumented method <function BaseQueryEngine.query at 0x00000253DCFC1AB0>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x2542329fc10) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x25423430940 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x00000253E29F8CA0>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x2542329fc10) using this function.
A new object of type <class 'llama_index.indices.vector_store.retrievers.retriever.VectorIndexRetriever'> at 0x25423432c80 is calling an instrumented method <function BaseRetriever.retrieve at 0x00000253DCFC0D30>. The path of this call may be incorrect.
Guessing path of new object is app.retriever based on ot

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Network URL: http://192.168.0.4:8501



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>