# Advanced RAG Pipeline

In [1]:
from llama_index import ServiceContext,SimpleDirectoryReader,VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index import QueryBundle
from llama_index.llms import OpenAI
from llama_index.schema import TextNode, NodeWithScore
from llama_index.response.notebook_utils import display_response
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor,SentenceTransformerRerank
from copy import deepcopy

In [2]:
import utils
from utils import (build_automerging_index,
                   build_sentence_window_index,
                   get_prebuilt_trulens_recorder,
                   get_sentence_window_query_engine,
                   get_automerging_query_engine
                  )


import os
import openai
openai.api_key = utils.get_openai_api_key()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [3]:
documents = SimpleDirectoryReader(
    input_files=["./eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

In [4]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

41 

<class 'llama_index.schema.Document'>
Doc ID: 076f88dd-75f8-4d1e-aa2c-c77e19ab37b4
Text: PAGE 1Founder, DeepLearning.AICollected Insights from Andrew Ng
How to  Build Your Career in AIA Simple Guide


## Basic RAG pipeline

In [5]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [6]:
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

service_context = ServiceContext.from_defaults(
    llm=llm, embed_model="local:BAAI/bge-small-en-v1.5"
)
index = VectorStoreIndex.from_documents([document],service_context=service_context)

In [7]:
query_engine = index.as_query_engine()

In [8]:
response = query_engine.query(
    "What are steps to take when finding projects to build your experience?"
)
print(str(response))

Develop a side hustle, ensure the project will help you grow technically, collaborate with good teammates, and consider if the project can serve as a stepping stone to larger projects.


### Evaluation setup using TruLens

In [9]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

What are the keys to building a career in AI?
How can teamwork contribute to success in AI?
What is the importance of networking in AI?
What are some good habits to develop for a successful career?
How can altruism be beneficial in building a career?
What is imposter syndrome and how does it relate to AI?
Who are some accomplished individuals who have experienced imposter syndrome?
What is the first step to becoming good at AI?
What are some common challenges in AI?
Is it normal to find parts of AI challenging?


In [10]:
# You can try your own question:
new_question = "What is the right AI job for me?"
eval_questions.append(new_question)

In [11]:
print(eval_questions)

['What are the keys to building a career in AI?', 'How can teamwork contribute to success in AI?', 'What is the importance of networking in AI?', 'What are some good habits to develop for a successful career?', 'How can altruism be beneficial in building a career?', 'What is imposter syndrome and how does it relate to AI?', 'Who are some accomplished individuals who have experienced imposter syndrome?', 'What is the first step to becoming good at AI?', 'What are some common challenges in AI?', 'Is it normal to find parts of AI challenging?', 'What is the right AI job for me?']


In [12]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


For the classroom, we've written some of the code in helper functions inside a utils.py file.  
- You can view the utils.py file in the file directory by clicking on the "Jupyter" logo at the top of the notebook.
- In later lessons, you'll get to work directly with the code that's currently wrapped inside these helper functions, to give you more options to customize your RAG pipeline.

In [13]:
tru_recorder = get_prebuilt_trulens_recorder(query_engine,app_id="Direct Query Engine")

In [14]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

In [15]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [16]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Context Relevance,Groundedness,Answer Relevance_calls,Context Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_0fee759f171bf9b181f978070f66c08d,"""What are the keys to building a career in AI?""","""Learning foundational technical skills, worki...",-,"{""record_id"": ""record_hash_0fee759f171bf9b181f...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-17T09:59:28.611161"", ""...",2024-09-17T09:59:29.753353,1.0,1.0,1.0,[{'args': {'prompt': 'What are the keys to bui...,[{'args': {'prompt': 'What are the keys to bui...,"[{'args': {'source': 'PAGE 1Founder, DeepLearn...",1,2066,0.003123
1,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_bbc295ff8a4e87c7722e49b80548be0d,"""How can teamwork contribute to success in AI?""","""Teamwork can contribute to success in AI by a...",-,"{""record_id"": ""record_hash_bbc295ff8a4e87c7722...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-17T09:59:29.880213"", ""...",2024-09-17T09:59:31.246351,1.0,0.5,0.933333,[{'args': {'prompt': 'How can teamwork contrib...,[{'args': {'prompt': 'How can teamwork contrib...,[{'args': {'source': 'Hopefully the previous c...,1,1697,0.002582
2,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_3d1e25208555f683e17685ee9886f9f3,"""What is the importance of networking in AI?""","""Networking is crucial in AI as it helps indiv...",-,"{""record_id"": ""record_hash_3d1e25208555f683e17...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-17T09:59:31.378145"", ""...",2024-09-17T09:59:32.838038,1.0,0.4,,[{'args': {'prompt': 'What is the importance o...,[{'args': {'prompt': 'What is the importance o...,,1,1694,0.002576
3,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_e48d3a26ca17edcca33929879edf29b2,"""What are some good habits to develop for a su...","""Developing good habits in areas such as eatin...",-,"{""record_id"": ""record_hash_e48d3a26ca17edcca33...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-17T09:59:32.960165"", ""...",2024-09-17T09:59:34.350813,1.0,1.0,1.0,[{'args': {'prompt': 'What are some good habit...,[{'args': {'prompt': 'What are some good habit...,[{'args': {'source': 'Hopefully the previous c...,1,1631,0.002465
4,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_8c33bcf37d5e6e324873370eedd9a317,"""How can altruism be beneficial in building a ...","""Helping others during one's career journey ca...",-,"{""record_id"": ""record_hash_8c33bcf37d5e6e32487...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-17T09:59:34.463710"", ""...",2024-09-17T09:59:35.236780,1.0,,,[{'args': {'prompt': 'How can altruism be bene...,,,0,1610,0.002423


In [17]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

Unnamed: 0,input,output,Groundedness,Answer Relevance,Context Relevance
0,"""What are the keys to building a career in AI?""","""Learning foundational technical skills, working on projects to deepen skills and create impact, and finding a job are the keys to building a career in AI. These steps are interconnected and build upon each other to help individuals navigate the evolving field of AI successfully.""",1.0,1.0,1.0
1,"""How can teamwork contribute to success in AI?""","""Teamwork can contribute to success in AI by allowing individuals to collaborate effectively, influence others, and be influenced by them. This collaboration helps in leveraging diverse perspectives and expertise, leading to more innovative solutions and better outcomes in tackling large projects. Interpersonal and communication skills play a crucial role in fostering teamwork, enabling effective collaboration and the sharing of ideas among team members.""",0.933333,1.0,0.5
2,"""What is the importance of networking in AI?""","""Networking is crucial in AI as it helps individuals build a strong professional network that can provide support, guidance, and opportunities. By connecting with others in the field, individuals can gain valuable insights, collaborate on projects, and stay updated on industry trends. Additionally, networking can lead to potential job opportunities and partnerships that can further one's career in AI.""",,1.0,0.4
3,"""What are some good habits to develop for a successful career?""","""Developing good habits in areas such as eating, exercise, sleep, personal relationships, work, learning, and self-care can help individuals move forward in their careers while staying healthy.""",1.0,1.0,1.0
4,"""How can altruism be beneficial in building a career?""","""Helping others during one's career journey can lead to better outcomes for oneself.""",,1.0,
5,"""What is imposter syndrome and how does it relate to AI?""","""Imposter syndrome is when someone, regardless of their success, doubts their abilities and feels like a fraud in a particular field or community. In the context of AI, newcomers to the field sometimes experience imposter syndrome, questioning their belonging in the AI community despite their achievements. It is highlighted that imposter syndrome is common among many individuals, including accomplished figures, and that it should not discourage anyone from growing in AI. The message conveyed is that experiencing challenges and doubts in AI is normal, and it is important to welcome and support everyone who wants to be part of the AI community.""",,1.0,
6,"""Who are some accomplished individuals who have experienced imposter syndrome?""","""Former Facebook COO Sheryl Sandberg, U.S. first lady Michelle Obama, actor Tom Hanks, and Atlassian co-CEO Mike Cannon-Brookes are some accomplished individuals who have experienced imposter syndrome.""",,,
7,"""What is the first step to becoming good at AI?""","""Learning foundational technical skills is the first step to becoming good at AI.""",,,
8,"""What are some common challenges in AI?""","""Some common challenges in AI include determining feasible and valuable solutions, setting appropriate milestones, securing necessary resources, and ensuring that projects are responsible, ethical, and beneficial to people. Additionally, overcoming imposter syndrome, staying motivated through technical challenges, and finding supportive mentors or peers are also common challenges faced in the field of AI.""",,,
9,"""Is it normal to find parts of AI challenging?""","""It is normal to find parts of AI challenging.""",,,


In [18]:
# launches on http://localhost:8501/
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at https://s172-30-151-242p38560.lab-aws-production.deeplearning.ai/ .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

## Advanced RAG

### Sentence Window retrieval

In [19]:
sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index"
)

In [20]:
sentence_window_engine = get_sentence_window_query_engine(sentence_index)

In [21]:
window_response = sentence_window_engine.query(
    "how do I get started on a personal project in AI?"
)
print(str(window_response))

To get started on a personal project in AI, you can begin by identifying a project that aligns with your career goals and interests. It is important to choose a project that is responsible, ethical, and beneficial to people. Once you have selected a project, you can follow the steps outlined in the chapters provided, such as scoping the project, executing it with an eye toward career development, and building a portfolio that demonstrates skill progression. Additionally, consider seeking guidance from the resources available in the chapters mentioned to help you start your AI project effectively.


In [22]:
tru.reset_database()

tru_recorder_sentence_window = get_prebuilt_trulens_recorder(
    sentence_window_engine,
    app_id = "Sentence Window Query Engine"
)

In [23]:
for question in eval_questions:
    with tru_recorder_sentence_window as recording:
        response = sentence_window_engine.query(question)
        print(question)
        print(str(response))

What are the keys to building a career in AI?
Learning foundational technical skills, working on projects, finding a job, and being part of a supportive community are the keys to building a career in AI.
How can teamwork contribute to success in AI?
Teamwork can contribute to success in AI by allowing individuals to leverage the diverse skills and perspectives of their colleagues. Working collaboratively enables team members to combine their expertise, share insights, and collectively tackle complex AI projects. Additionally, being surrounded by motivated and knowledgeable teammates can inspire individuals to work harder, learn continuously, and strive to create AI solutions that benefit society as a whole.
What is the importance of networking in AI?
Networking in AI is crucial as it can provide valuable insights, guidance, and opportunities for individuals looking to advance in the field. By connecting with professionals who have experience in AI, individuals can gain knowledge about 

In [24]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sentence Window Query Engine,1.0,1.454545,0.000814


In [25]:
# launches on http://localhost:8501/
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path: https://s172-30-151-242p38560.lab-aws-production.deeplearning.ai/


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

###  Auto-merging retrieval

In [26]:
automerging_index = build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index"
)

In [27]:
automerging_query_engine = get_automerging_query_engine(
    automerging_index,
)

In [28]:
auto_merging_response = automerging_query_engine.query(
    "How do I build a portfolio of AI projects?"
)
print(str(auto_merging_response))

> Merging 1 nodes into parent node.
> Parent node id: e36679c2-7984-48b5-b647-4fc9e0886fb0.
> Parent node text: PAGE 21Building a Portfolio of 
Projects that Shows 
Skill Progression CHAPTER 6
PROJECTS

> Merging 1 nodes into parent node.
> Parent node id: 3893e08e-fe16-4de7-8180-ed56d5df34fa.
> Parent node text: PAGE 21Building a Portfolio of 
Projects that Shows 
Skill Progression CHAPTER 6
PROJECTS

Building a portfolio of AI projects involves showcasing a progression from simple to complex undertakings over time. It is important to be able to effectively communicate your thinking to demonstrate the value of your work and gain the trust of others. Identifying worthwhile ideas to work on is a crucial skill for an AI architect, and gaining experience through projects in various industries can help in building a diverse and impactful portfolio.


In [29]:
tru.reset_database()

tru_recorder_automerging = get_prebuilt_trulens_recorder(automerging_query_engine,
                                                         app_id="Automerging Query Engine")

In [30]:
for question in eval_questions:
    with tru_recorder_automerging as recording:
        response = automerging_query_engine.query(question)
        print(question)
        print(response)

A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function BaseQueryEngine.query at 0x7fc177d588b0>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.
A new object of type <class 'llama_index.retrievers.auto_merging_retriever.AutoMergingRetriever'> at 0x7fc0902500a0 is calling an instrumented method <function BaseRetriever.retrieve at 0x7fc177d37be0>. The path of this call may be incorrect.
Guessing path of new object is app.retriever based on other object (0x7

> Merging 2 nodes into parent node.
> Parent node id: 2ddb7bc8-249b-459c-a029-ef53cc80c9eb.
> Parent node text: PAGE 3Table of 
ContentsIntroduction: Coding AI is the New Literacy.
Chapter 1: Three Steps to Ca...

> Merging 1 nodes into parent node.
> Parent node id: ac5b2f7e-2e6d-425e-8d59-0daa0d7951d1.
> Parent node text: PAGE 3Table of 
ContentsIntroduction: Coding AI is the New Literacy.
Chapter 1: Three Steps to Ca...



A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function CompactAndRefine.get_response at 0x7fc1773683a0>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.llm_predictor.base.LLMPredictor'> at 0x7fc120408940 is calling an instrumented method <function LLMPredictor.predict at 0x7fc1a065a320>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthes

What are the keys to building a career in AI?
Learning foundational technical skills, working on projects, finding a job, and being part of a community are essential keys to building a career in AI. Additionally, collaborating with others, influencing, and being influenced by others are critical for success in tackling large projects in AI.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


How can teamwork contribute to success in AI?
Teamwork can contribute to success in AI by enabling individuals to work more effectively on large projects. Collaborating with others allows for a diversity of perspectives and skills to be brought to the table, leading to more innovative solutions. Additionally, the ability to influence and be influenced by team members can help in overcoming challenges and achieving better outcomes in the field of AI.
> Merging 3 nodes into parent node.
> Parent node id: d4b2638a-1cb1-41ca-b73b-8948fcb21678.
> Parent node text: PAGE 35Keys to Building a Career in AI CHAPTER 10
The path to career success in AI is more comple...

> Merging 1 nodes into parent node.
> Parent node id: f8c82889-d106-4d11-ab24-2762614579b3.
> Parent node text: PAGE 35Keys to Building a Career in AI CHAPTER 10
The path to career success in AI is more comple...



A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What is the importance of networking in AI?
Networking in AI is crucial as it helps individuals build a strong professional network within the industry. This network can provide support, guidance, and opportunities for career advancement. By connecting with others in the field, individuals can gain valuable insights, access resources, and establish relationships that can be beneficial in their career growth.
> Merging 2 nodes into parent node.
> Parent node id: bedb4fa6-d6ff-4ce5-a94d-198f2a5cf506.
> Parent node text: PAGE 36Keys to Building a Career in AI CHAPTER 10
Of all the steps in building a career, this 
on...

> Merging 2 nodes into parent node.
> Parent node id: 385e5cf0-3800-4978-908e-b5ea718c633a.
> Parent node text: PAGE 11
The Best Way to Build 
a New Habit
One of my favorite books is BJ Fogg’s, Tiny Habits: Th...

> Merging 1 nodes into parent node.
> Parent node id: 729d3ba8-b652-4118-ac79-13b2fde9564c.
> Parent node text: PAGE 36Keys to Building a Career in AI CHAPTER 1

A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What are some good habits to develop for a successful career?
Good habits to develop for a successful career include habits related to eating, exercise, sleep, personal relationships, work, learning, and self-care. These habits help individuals move forward in their careers while also maintaining their health and well-being.
> Merging 2 nodes into parent node.
> Parent node id: fc900884-aaf0-4b9e-85b1-4b1d3ea1f72d.
> Parent node text: PAGE 30Finding someone to interview isn’t always easy, but many people who are in senior position...

> Merging 1 nodes into parent node.
> Parent node id: 4b1d8678-ac31-44f6-9911-193ba01b72a5.
> Parent node text: PAGE 30Finding someone to interview isn’t always easy, but many people who are in senior position...



A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


How can altruism be beneficial in building a career?
Altruism can be beneficial in building a career by creating a positive impact on others, fostering strong relationships within professional networks, and potentially leading to opportunities for mentorship and guidance. By helping others and lifting them up along the way, individuals can build a reputation as a supportive and collaborative team member, which can enhance their own career prospects. Additionally, practicing altruism can result in a reciprocal exchange of support and assistance, where others may be more inclined to help and support those who have shown kindness and generosity.
> Merging 5 nodes into parent node.
> Parent node id: 7cf839e4-c54a-4713-bcb7-f8a0f128ea7b.
> Parent node text: PAGE 38Before we dive into the final chapter of this book, I’d like to address the serious matter...

> Merging 1 nodes into parent node.
> Parent node id: a7d0b179-8dd4-41a2-bf99-798081131406.
> Parent node text: PAGE 37Overcoming Impos

A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What is imposter syndrome and how does it relate to AI?
Imposter syndrome is when individuals doubt their accomplishments and have a persistent fear of being exposed as a fraud, despite evidence of their competence. In the context of AI, newcomers to the field may experience imposter syndrome as they navigate the complexities and challenges of artificial intelligence. It is highlighted that imposter syndrome is common even among accomplished individuals in the AI community, and the message is to not let it discourage anyone from pursuing growth and success in AI.
> Merging 3 nodes into parent node.
> Parent node id: 7cf839e4-c54a-4713-bcb7-f8a0f128ea7b.
> Parent node text: PAGE 38Before we dive into the final chapter of this book, I’d like to address the serious matter...

> Merging 1 nodes into parent node.
> Parent node id: a7d0b179-8dd4-41a2-bf99-798081131406.
> Parent node text: PAGE 37Overcoming Imposter 
SyndromeCHAPTER 11

> Merging 3 nodes into parent node.
> Parent node id: e4

A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


Who are some accomplished individuals who have experienced imposter syndrome?
Sheryl Sandberg, Michelle Obama, Tom Hanks, and Mike Cannon-Brookes are some accomplished individuals who have experienced imposter syndrome.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What is the first step to becoming good at AI?
The first step to becoming good at AI is to suck at it.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What are some common challenges in AI?
Some common challenges in AI include the highly iterative nature of AI projects, uncertainty in planning due to not knowing in advance how long it will take to achieve target accuracy, technical challenges faced by individuals working on AI projects, and the feeling of imposter syndrome that some may experience when entering the AI community.
> Merging 3 nodes into parent node.
> Parent node id: 7cf839e4-c54a-4713-bcb7-f8a0f128ea7b.
> Parent node text: PAGE 38Before we dive into the final chapter of this book, I’d like to address the serious matter...

> Merging 1 nodes into parent node.
> Parent node id: d181e7eb-8fd6-4ba9-aaf7-7d457458a8b3.
> Parent node text: PAGE 38Before we dive into the final chapter of this book, I’d like to address the serious matter...



A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0902cd180 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


Is it normal to find parts of AI challenging?
It is normal to find parts of AI challenging.
> Merging 1 nodes into parent node.
> Parent node id: 22926a5b-5528-4a8e-90bc-847fd2cee3e1.
> Parent node text: PAGE 31Finding the Right 
AI Job for YouCHAPTER 9
JOBS

> Merging 1 nodes into parent node.
> Parent node id: a49ee832-fe10-4408-a30d-87949c38bbcc.
> Parent node text: If you’re leaving 
a job, exit gracefully. Give your employer ample notice, give your full effort...

> Merging 1 nodes into parent node.
> Parent node id: 7d33d950-afa4-4913-838e-7d2dfefdafea.
> Parent node text: PAGE 28Using Informational 
Interviews to Find 
the Right JobCHAPTER 8
JOBS

> Merging 1 nodes into parent node.
> Parent node id: 1c6a88a0-fbe7-4584-a653-081bbae4f41a.
> Parent node text: PAGE 31Finding the Right 
AI Job for YouCHAPTER 9
JOBS

> Merging 1 nodes into parent node.
> Parent node id: 45c486d5-b894-4730-92c7-4977ed6be2d7.
> Parent node text: PAGE 28Using Informational 
Interviews to Find 
the Right

A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0902cd0f0 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.


What is the right AI job for me?
The right AI job for you would likely depend on whether you are looking to switch roles, industries, or both. If you are seeking your first job in AI, transitioning into a role or industry that aligns with your current experience may be a smoother transition than attempting to switch both roles and industries simultaneously.


In [31]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Automerging Query Engine,1.0,2.363636,0.000873


In [32]:
# launches on http://localhost:8501/
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path: https://s172-30-151-242p38560.lab-aws-production.deeplearning.ai/


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

## RAG Triad of metrics

In [33]:
documents = SimpleDirectoryReader(
    input_files=["./eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

document = Document(text="\n\n".join([doc.text for doc in documents]))

sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index"
)

sentence_window_engine = get_sentence_window_query_engine(sentence_index)

output = sentence_window_engine.query(
    "How do you create your AI portfolio?")
output.response

'You create your AI portfolio by building a collection of projects that demonstrate a progression of skills in the field.'

In [34]:
# from trulens_eval import Tru
# tru = Tru()
tru.reset_database()

### Feedback functions

In [35]:
import nest_asyncio

nest_asyncio.apply()

In [36]:
from trulens_eval import OpenAI as fOpenAI

provider = fOpenAI()

#### Answer Relevance

In [37]:
from trulens_eval import Feedback

f_qa_relevance = Feedback(
    provider.relevance_with_cot_reasons,
    name="Answer Relevance"
).on_input_output()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .


#### Context Relevance

In [38]:
from trulens_eval import TruLlama

context_selection = TruLlama.select_source_nodes().node.text

In [39]:
import numpy as np

f_qs_relevance = (
    Feedback(provider.qs_relevance,
             name="Context Relevance")
    .on_input()
    .on(context_selection)
    .aggregate(np.mean)
)

✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .


In [40]:
import numpy as np

f_qs_relevance = (
    Feedback(provider.qs_relevance_with_cot_reasons,
             name="Context Relevance")
    .on_input()
    .on(context_selection)
    .aggregate(np.mean)
)

✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .


#### Groundedness

In [41]:
from trulens_eval.feedback import Groundedness

grounded = Groundedness(groundedness_provider=provider)

In [42]:
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons,
             name="Groundedness"
            )
    .on(context_selection)
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


### Evaluation of the RAG application

In [43]:
# from trulens_eval import FeedbackMode

tru_recorder = TruLlama(
    sentence_window_engine,
    app_id="App_1",
    feedbacks=[
        f_qa_relevance,
        f_qs_relevance,
        f_groundedness
    ]
)

In [44]:
eval_questions

['What are the keys to building a career in AI?',
 'How can teamwork contribute to success in AI?',
 'What is the importance of networking in AI?',
 'What are some good habits to develop for a successful career?',
 'How can altruism be beneficial in building a career?',
 'What is imposter syndrome and how does it relate to AI?',
 'Who are some accomplished individuals who have experienced imposter syndrome?',
 'What is the first step to becoming good at AI?',
 'What are some common challenges in AI?',
 'Is it normal to find parts of AI challenging?',
 'What is the right AI job for me?']

In [45]:
eval_questions.append("How can I be successful in AI?")

In [46]:
eval_questions

['What are the keys to building a career in AI?',
 'How can teamwork contribute to success in AI?',
 'What is the importance of networking in AI?',
 'What are some good habits to develop for a successful career?',
 'How can altruism be beneficial in building a career?',
 'What is imposter syndrome and how does it relate to AI?',
 'Who are some accomplished individuals who have experienced imposter syndrome?',
 'What is the first step to becoming good at AI?',
 'What are some common challenges in AI?',
 'Is it normal to find parts of AI challenging?',
 'What is the right AI job for me?',
 'How can I be successful in AI?']

In [47]:
for question in eval_questions:
    with tru_recorder as recording:
        response = sentence_window_engine.query(question)
        print(question)
        print(str(response))

A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function BaseQueryEngine.query at 0x7fc177d588b0>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.
A new object of type <class 'llama_index.indices.vector_store.retrievers.retriever.VectorIndexRetriever'> at 0x7fc0ac2c29e0 is calling an instrumented method <function BaseRetriever.retrieve at 0x7fc177d37be0>. The path of this call may be incorrect.
Guessing path of new object is app.retriever based on other obj

What are the keys to building a career in AI?
Learning foundational technical skills, working on projects, finding a job, and being part of a supportive community are the keys to building a career in AI.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


How can teamwork contribute to success in AI?
Teammates play a crucial role in the success of AI projects. Working collaboratively with colleagues who are dedicated, continuously learning, and focused on building AI for the benefit of all can positively influence one's own work ethic and outcomes. The ability to work effectively in a team, leveraging each member's strengths and insights, can lead to improved project outcomes and personal growth as a leader in AI projects.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What is the importance of networking in AI?
Networking in AI is crucial as it can provide valuable insights, guidance, and opportunities for individuals looking to advance in the field. By connecting with professionals who have experience in AI, individuals can gain knowledge about the industry, potential career paths, and current trends. Networking also allows individuals to build relationships with others in the field, which can lead to mentorship, collaboration on projects, and even job opportunities. Overall, networking in AI can help individuals stay informed, grow their skills, and expand their professional connections.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What are some good habits to develop for a successful career?
Developing good habits in areas such as eating, exercise, sleep, personal relationships, work, learning, and self-care can help individuals move forward in their careers while maintaining their health.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


How can altruism be beneficial in building a career?
Helping others and aiming to lift them during one's own career journey can lead to better outcomes for oneself.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What is imposter syndrome and how does it relate to AI?
Imposter syndrome is a phenomenon where individuals doubt their accomplishments and have a persistent fear of being exposed as a fraud. In the context of AI, newcomers to the field may experience imposter syndrome, feeling like they do not truly belong in the AI community despite their success. This can be a significant challenge for individuals in AI, as it may hinder their growth and progress in the field.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


Who are some accomplished individuals who have experienced imposter syndrome?
Former Facebook COO Sheryl Sandberg, U.S. first lady Michelle Obama, actor Tom Hanks, and Atlassian co-CEO Mike Cannon-Brookes are some accomplished individuals who have experienced imposter syndrome.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What is the first step to becoming good at AI?
The first step to becoming good at AI is learning foundational technical skills.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What are some common challenges in AI?
Common challenges in AI include the evolving nature of technologies, the need to stay updated with rapidly changing technology, the difficulty in estimating project timelines and returns on investment, the iterative nature of AI projects leading to uncertainties in project management, the necessity to collaborate with stakeholders lacking AI expertise, and the challenge of reading and understanding complex research papers.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


Is it normal to find parts of AI challenging?
It is normal to find parts of AI challenging.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.
A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x7fc0ac2a9b40 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x7fc17189dc60>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x7fc1204d3370) using this function.


What is the right AI job for me?
The right AI job for you can be found by following the steps outlined in Chapter 9: Finding the Right AI Job for You. By utilizing the information provided in this chapter, you can identify a job that aligns with your skills, interests, and career goals in the field of artificial intelligence.


A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x7fc0ac13f940 is calling an instrumented method <function Refine.get_response at 0x7fc176584160>. The path of this call may be incorrect.
Guessing path of new object is app._response_synthesizer based on other object (0x7fc1204d3ca0) using this function.


How can I be successful in AI?
To be successful in AI, it is important to understand the problem thoroughly before brainstorming potential solutions efficiently. Determining milestones and metrics for the project is crucial once its value has been established. Additionally, having a basic intuitive understanding of calculus can be beneficial, as well as continuously deepening technical knowledge in specific application or technology areas related to AI, such as natural language processing, computer vision, probabilistic graphical models, or scalable software systems.


In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

In [None]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1
App_1,1.5,0.000817


In [None]:
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path: https://s172-30-151-242p38560.lab-aws-production.deeplearning.ai/


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

## Sentence Window retrieval

### Window-sentence retrieval setup

In [None]:
from llama_index.node_parser import SentenceWindowNodeParser

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [None]:
text = "hello. how are you? I am fine!  "

nodes = node_parser.get_nodes_from_documents([Document(text=text)])

In [None]:
print([x.text for x in nodes])

['hello. ', 'how are you? ', 'I am fine!  ']


In [None]:
print(nodes[1].metadata["window"])

hello.  how are you?  I am fine!  


In [None]:
text = "hello. foo bar. cat dog. mouse"

nodes = node_parser.get_nodes_from_documents([Document(text=text)])

In [None]:
print([x.text for x in nodes])

['hello. ', 'foo bar. ', 'cat dog. ', 'mouse']


In [None]:
print(nodes[0].metadata["window"])

hello.  foo bar.  cat dog. 


### Building the index

In [None]:
sentence_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    # embed_model="local:BAAI/bge-large-en-v1.5"
    node_parser=node_parser,
)

In [60]:
sentence_index = VectorStoreIndex.from_documents(
    [document], service_context=sentence_context
)

In [61]:
sentence_index.storage_context.persist(persist_dir="./sentence_index")

In [62]:
# This block of code is optional to check
# if an index file exist, then it will load it
# if not, it will rebuild it
if not os.path.exists("./sentence_index"):
    sentence_index = VectorStoreIndex.from_documents(
        [document], service_context=sentence_context
    )

    sentence_index.storage_context.persist(persist_dir="./sentence_index")
else:
    sentence_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./sentence_index"),
        service_context=sentence_context
    )

### Building the postprocessor

In [63]:
postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window"
)

In [64]:
scored_nodes = [NodeWithScore(node=x, score=1.0) for x in nodes]
nodes_old = [deepcopy(n) for n in nodes]

In [65]:
nodes_old[1].text

'foo bar. '

In [66]:
replaced_nodes = postproc.postprocess_nodes(scored_nodes)

In [67]:
print(replaced_nodes[1].text)

hello.  foo bar.  cat dog.  mouse


### Adding a reranker

In [68]:
# BAAI/bge-reranker-base
# link: https://huggingface.co/BAAI/bge-reranker-base
rerank = SentenceTransformerRerank(
    top_n=2, model="BAAI/bge-reranker-base"
)

In [69]:
query = QueryBundle("I want a dog.")

scored_nodes = [
    NodeWithScore(node=TextNode(text="This is a cat"), score=0.6),
    NodeWithScore(node=TextNode(text="This is a dog"), score=0.4),
]

In [70]:
reranked_nodes = rerank.postprocess_nodes(
    scored_nodes, query_bundle=query
)

In [71]:
print([(x.text, x.score) for x in reranked_nodes])

[('This is a dog', 0.91827416), ('This is a cat', 0.0014040814)]


#### Runing the query engine

In [72]:
sentence_window_engine = sentence_index.as_query_engine(
    similarity_top_k=6, node_postprocessors=[postproc, rerank]
)

In [73]:
window_response = sentence_window_engine.query(
    "What are the keys to building a career in AI?"
)

In [74]:
display_response(window_response)

**`Final Response:`** The keys to building a career in AI include learning foundational technical skills, working on projects, finding a job, and being part of a supportive community.

#### Putting it all Together

In [3]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["./eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

In [4]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [5]:
import os

from llama_index import ServiceContext, VectorStoreIndex, StorageContext,load_index_from_storage
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor,SentenceTransformerRerank

def build_sentence_window_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index",
):
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=sentence_window_size,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            documents, service_context=sentence_context
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index


def get_sentence_window_query_engine(
    sentence_index, similarity_top_k=6, rerank_top_n=2
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

In [6]:
from llama_index.llms import OpenAI

index = build_sentence_window_index(
    [document],
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./sentence_index",
)

In [7]:
query_engine = get_sentence_window_query_engine(index, similarity_top_k=6)

#### TruLens Evaluation

In [9]:
eval_questions = []
with open('generated_questions.text', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

In [10]:
from trulens_eval import Tru

def run_evals(eval_questions, tru_recorder, query_engine):
    for question in eval_questions:
        with tru_recorder as recording:
            response = query_engine.query(question)
#             print(question)
#             print(str(response))            

In [11]:
Tru().reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


#### Sentence window size = 1

In [23]:
sentence_index_1 = build_sentence_window_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=1,
    save_dir="sentence_index_1",
)

In [24]:
sentence_window_engine_1 = get_sentence_window_query_engine(
    sentence_index_1
)

In [25]:
tru_recorder_1 = get_prebuilt_trulens_recorder(
    sentence_window_engine_1,
    app_id='sentence window engine 1'
)

TypeError: Object of type 'OpenAI' is not JSON serializable

In [15]:
run_evals(eval_questions, tru_recorder_1, sentence_window_engine_1)

In [16]:
Tru().run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at https://s172-30-151-242p38560.lab-aws-production.deeplearning.ai/ .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [17]:
eval_questions = []
with open('generated_questions.text', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

#### Sentence window size = 3

In [19]:
sentence_index_3 = build_sentence_window_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index_3",
)

In [20]:
sentence_window_engine_3 = get_sentence_window_query_engine(
    sentence_index_3
)

In [21]:
tru_recorder_3 = get_prebuilt_trulens_recorder(
    sentence_window_engine_3,
    app_id='sentence window engine 3'
)

TypeError: Object of type 'OpenAI' is not JSON serializable

In [None]:
run_evals(eval_questions, tru_recorder_3, sentence_window_engine_3)

In [None]:
Tru().run_dashboard()

###  Auto-merging retrieval

In [None]:
from llama_index.node_parser import HierarchicalNodeParser

# create the hierarchical node parser w/ default settings
node_parser = HierarchicalNodeParser.from_defaults(
    chunk_sizes=[2048, 512, 128]
)

In [None]:
nodes = node_parser.get_nodes_from_documents([document])

In [None]:
from llama_index.node_parser import get_leaf_nodes

leaf_nodes = get_leaf_nodes(nodes)
print(leaf_nodes[30].text)

In [None]:
nodes_by_id = {node.node_id: node for node in nodes}

parent_node = nodes_by_id[leaf_nodes[30].parent_node.node_id]
print(parent_node.text)

### Building the index

In [None]:
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

In [None]:
from llama_index import ServiceContext

auto_merging_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    node_parser=node_parser,
)

In [None]:
from llama_index import VectorStoreIndex, StorageContext

storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

automerging_index = VectorStoreIndex(
    leaf_nodes, storage_context=storage_context, service_context=auto_merging_context
)

automerging_index.storage_context.persist(persist_dir="./merging_index")

In [None]:
# This block of code is optional to check
# if an index file exist, then it will load it
# if not, it will rebuild it

import os
from llama_index import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index import load_index_from_storage

if not os.path.exists("./merging_index"):
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    automerging_index = VectorStoreIndex(
            leaf_nodes,
            storage_context=storage_context,
            service_context=auto_merging_context
        )

    automerging_index.storage_context.persist(persist_dir="./merging_index")
else:
    automerging_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./merging_index"),
        service_context=auto_merging_context
    )


### Defining the retriever and running the query engine

In [None]:
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.retrievers import AutoMergingRetriever
from llama_index.query_engine import RetrieverQueryEngine

automerging_retriever = automerging_index.as_retriever(
    similarity_top_k=12
)

retriever = AutoMergingRetriever(
    automerging_retriever, 
    automerging_index.storage_context, 
    verbose=True
)

rerank = SentenceTransformerRerank(top_n=6, model="BAAI/bge-reranker-base")

auto_merging_engine = RetrieverQueryEngine.from_args(
    automerging_retriever, node_postprocessors=[rerank]
)

In [None]:
auto_merging_response = auto_merging_engine.query(
    "What is the importance of networking in AI?"
)

In [None]:
from llama_index.response.notebook_utils import display_response

display_response(auto_merging_response)

## Putting it all Together

In [None]:
import os

from llama_index import (
    ServiceContext,
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
)
from llama_index.node_parser import HierarchicalNodeParser
from llama_index.node_parser import get_leaf_nodes
from llama_index import StorageContext, load_index_from_storage
from llama_index.retrievers import AutoMergingRetriever
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.query_engine import RetrieverQueryEngine


def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine

In [None]:
from llama_index.llms import OpenAI

index = build_automerging_index(
    [document],
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./merging_index",
)


In [None]:
query_engine = get_automerging_query_engine(index, similarity_top_k=6)

### TruLens Evaluation

In [None]:
from trulens_eval import Tru

Tru().reset_database()

#### Two layers

In [None]:
auto_merging_index_0 = build_automerging_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index_0",
    chunk_sizes=[2048,512],
)

In [None]:
auto_merging_engine_0 = get_automerging_query_engine(
    auto_merging_index_0,
    similarity_top_k=12,
    rerank_top_n=6,
)

In [None]:
tru_recorder = get_prebuilt_trulens_recorder(
    auto_merging_engine_0,
    app_id ='app_0'
)

In [None]:
eval_questions = []
with open('generated_questions.text', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

In [None]:
def run_evals(eval_questions, tru_recorder, query_engine):
    for question in eval_questions:
        with tru_recorder as recording:
            response = query_engine.query(question)

In [None]:
run_evals(eval_questions, tru_recorder, auto_merging_engine_0)

In [None]:
from trulens_eval import Tru

Tru().get_leaderboard(app_ids=[])

In [None]:
Tru().run_dashboard()

#### Three layers

In [None]:
auto_merging_index_1 = build_automerging_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index_1",
    chunk_sizes=[2048,512,128],
)

In [None]:
auto_merging_engine_1 = get_automerging_query_engine(
    auto_merging_index_1,
    similarity_top_k=12,
    rerank_top_n=6,
)


In [None]:
tru_recorder = get_prebuilt_trulens_recorder(
    auto_merging_engine_1,
    app_id ='app_1'
)

In [None]:
run_evals(eval_questions, tru_recorder, auto_merging_engine_1)

In [None]:
from trulens_eval import Tru

Tru().get_leaderboard(app_ids=[])

In [None]:
Tru().run_dashboard()