In [1]:
import os
import sys

# Allowing Jupyter to load modules and clases from the file system
module_path = os.path.abspath(os.path.join("../src"))
if module_path not in sys.path:
    sys.path.append(module_path)

## Advance RAG Pipeline

In [2]:
# import utils
import os
# openai.api_key = utils.get_openai_api_key()

In [3]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["../data/eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

In [4]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

41 

<class 'llama_index.core.schema.Document'>
Doc ID: 05b94117-e0a9-48b9-9d12-c6833e5fa1ad
Text: PAGE 1Founder, DeepLearning.AICollected Insights from Andrew Ng
How to  Build Your Career in AIA Simple Guide


### Basic RAG Pipeline

In [5]:
from llama_index.core import Document

# Join all texts into a single document
document = Document(text="\n\n".join([doc.text for doc in documents]))
# document = documents[0]
# document

In [6]:
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.azure_openai import AzureOpenAI

llm = AzureOpenAI(
    model="gpt-4o",
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_API_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_deployment=os.getenv("AZURE_OPENAI_API_LLM_DEPLOYMENT_NAME"),
    temperature=0.1,
)

# from llama_index.llms.ollama import Ollama
# llm = Ollama(model="llama3.1:latest", request_timeout=300.0)

embeddings = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Indexing the documents
index = VectorStoreIndex.from_documents([document], embed_model=embeddings)

In [7]:
query_engine = index.as_query_engine(llm=llm)

In [8]:
response = query_engine.query(
    "What are steps to take when finding projects to build your experience?"
)
print(str(response))

To build your experience through projects, consider these steps:

1. **Join Existing Projects**: Collaborate with others who have ongoing projects to gain experience and insights.

2. **Engage in Continuous Learning**: Read, take courses, and talk to domain experts to generate new ideas.

3. **Focus on Application Areas**: Explore unique applications of technology in areas that interest you or your organization.

4. **Develop a Side Hustle**: Work on fun projects outside your main job to stimulate creativity and build skills.

5. **Evaluate Project Potential**: Choose projects that challenge you technically and have the potential to be stepping stones to larger endeavors.

6. **Build a Portfolio**: Start with small projects and gradually take on more complex ones to demonstrate skill progression.

7. **Communicate Effectively**: Clearly explain your projects to gain support and resources for future initiatives.


### Evaluation setup using TruLens

In [9]:
eval_questions = []
with open("../data/eval_questions.txt", "r") as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

What are the keys to building a career in AI?
How can teamwork contribute to success in AI?
What is the importance of networking in AI?
What are some good habits to develop for a successful career?
How can altruism be beneficial in building a career?
What is imposter syndrome and how does it relate to AI?
Who are some accomplished individuals who have experienced imposter syndrome?
What is the first step to becoming good at AI?
What are some common challenges in AI?
Is it normal to find parts of AI challenging?


In [10]:
new_question = "What is the right AI job for me?"
eval_questions.append(new_question)

In [11]:
print(eval_questions)

['What are the keys to building a career in AI?', 'How can teamwork contribute to success in AI?', 'What is the importance of networking in AI?', 'What are some good habits to develop for a successful career?', 'How can altruism be beneficial in building a career?', 'What is imposter syndrome and how does it relate to AI?', 'Who are some accomplished individuals who have experienced imposter syndrome?', 'What is the first step to becoming good at AI?', 'What are some common challenges in AI?', 'Is it normal to find parts of AI challenging?', 'What is the right AI job for me?']


In [12]:
from trulens_eval import Tru

tru = Tru()

tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [13]:
from utils import get_prebuilt_trulens_recorder

tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="Direct Query Engine")
# tru_recorder

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [14]:
with tru_recorder as recording:
    for question in eval_questions:
        # print(question)
        response = query_engine.query(question)

In [15]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [16]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,latency,total_tokens,total_cost
0,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_0b062d630bf2c1644dcbf91b831ccfeb,"""What are the keys to building a career in AI?""","""To build a career in AI, focus on learning fo...",-,"{""record_id"": ""record_hash_0b062d630bf2c1644dc...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-20T12:21:05.113983"", ""...",2024-09-20T12:21:06.562362,1,2086,0.01114
1,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_369953d3471ff8167d0fe382d4381238,"""How can teamwork contribute to success in AI?""","""Teamwork can significantly contribute to succ...",-,"{""record_id"": ""record_hash_369953d3471ff8167d0...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-20T12:21:06.787745"", ""...",2024-09-20T12:21:08.331561,1,1689,0.009105
2,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_af6bd4649ea4c7cd990b449c692318d4,"""What is the importance of networking in AI?""","""Networking in AI is important because it help...",-,"{""record_id"": ""record_hash_af6bd4649ea4c7cd990...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-20T12:21:08.538854"", ""...",2024-09-20T12:21:09.810494,1,1674,0.00888
3,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_ebcb755d6ac347c90d7e84c7cd253d57,"""What are some good habits to develop for a su...","""Developing good habits in areas such as eatin...",-,"{""record_id"": ""record_hash_ebcb755d6ac347c90d7...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-20T12:21:10.032224"", ""...",2024-09-20T12:21:11.220913,1,1644,0.00867
4,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_dcd42927e4246980bb9d3e4f7e989935,"""How can altruism be beneficial in building a ...","""Altruism can be beneficial in building a care...",-,"{""record_id"": ""record_hash_dcd42927e4246980bb9...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-09-20T12:21:11.481890"", ""...",2024-09-20T12:21:12.854815,1,1660,0.00892


In [17]:
# launches on http://localhost:8501/
# tru.run_dashboard()

### Advanced RAG Pipeline


1. Sentence Window Retrieval

In [18]:
from llama_index.llms.azure_openai import AzureOpenAI

# llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

llm = AzureOpenAI(
    model="gpt-4o-mini",
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_API_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_deployment="gpt-4o-mini",
    temperature=0.1,
)

# from llama_index.llms.ollama import Ollama

# llm = Ollama(model="llama3.1:latest", request_timeout=300.0)

In [19]:
from utils import build_sentence_window_index

sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index,",
)

In [20]:
from utils import get_sentence_window_query_engine

sentence_window_engine = get_sentence_window_query_engine(llm, sentence_index)

In [21]:
window_response = sentence_window_engine.query(
    "how do I get started on a personal project in AI?"
)
print(str(window_response))

To get started on a personal project in AI, begin by selecting a topic that interests you. Start small; even simple projects can provide valuable learning experiences. For instance, you might train a neural network on a basic function to understand the fundamentals.

As you work on your project, focus on building a portfolio that showcases your skill progression from simpler to more complex tasks. This will be beneficial for future job opportunities. Communication is crucial, so clearly articulate the value of your project to potential collaborators or mentors, which can help you gain support and constructive feedback.

Additionally, consider collaborating with others to enhance your project and develop leadership skills, even if you’re not in a formal leadership role. This collaborative approach can also provide insights into practical challenges in AI.

Lastly, keep learning throughout the process, as the field is constantly evolving. Engaging with a community can also provide suppor

In [22]:
tru.reset_database()

tru_recorder_sentence_window = get_prebuilt_trulens_recorder(
    sentence_window_engine,
    app_id="Sentence Window Query Engine",
)

In [23]:
for question in eval_questions:
    with tru_recorder_sentence_window as recording:
        response = sentence_window_engine.query(question)
        print(question)
        print(str(response))

What are the keys to building a career in AI?
The keys to building a career in AI include:

1. **Teamwork**: Collaborating effectively with others enhances project success and is crucial for career growth.
   
2. **Networking**: Building a community rather than just a personal network can provide support and opportunities.

3. **Job Search**: Finding a job is an important step, but it should be viewed as part of a larger career journey.

4. **Personal Discipline**: Developing good habits in various aspects of life, such as learning and self-care, contributes to long-term success.

5. **Altruism**: Helping others along your career path can lead to better outcomes for yourself as well.
How can teamwork contribute to success in AI?
Teamwork enhances success in AI by fostering collaboration, allowing individuals to share diverse perspectives and skills. Working in teams enables members to influence and be influenced by one another, which is crucial for tackling large projects effectively. 

In [24]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1
Sentence Window Query Engine,1.363636,0.000338


In [25]:
# launches on http://localhost:8501/
# tru.run_dashboard()

### 2. Auto-Merging Retrieval

In [26]:
from llama_index.llms.ollama import Ollama

llm_ollama = Ollama(model="llama3.1:latest", request_timeout=300.0)

In [27]:
from utils import build_automerging_index

automerging_index = build_automerging_index(
    documents,
    llm=llm_ollama,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
)

In [28]:
from utils import get_automerging_query_engine

automerging_query_engine = get_automerging_query_engine(
    llm_ollama,
    automerging_index,
)

In [29]:
auto_merging_response = automerging_query_engine.query(
    "How do I build a portfolio of AI projects?"
)
print(str(auto_merging_response))

> Merging 1 nodes into parent node.
> Parent node id: 9af6dfc7-1551-4013-a761-321245a299bb.
> Parent node text: PAGE 21Building a Portfolio of 
Projects that Shows 
Skill Progression CHAPTER 6
PROJECTS

> Merging 1 nodes into parent node.
> Parent node id: 82b7eaea-510d-4e9c-865c-4378103ce6f8.
> Parent node text: PAGE 21Building a Portfolio of 
Projects that Shows 
Skill Progression CHAPTER 6
PROJECTS

Building a portfolio of projects that showcases progress over time will be a big help when looking for a job. To start, focus on simple undertakings and gradually move to more complex ones. This will demonstrate your ability to grow and take on new challenges. Additionally, being able to effectively communicate the value behind your work is crucial in showcasing your skills to potential employers.


In [30]:
tru.reset_database()

tru_recorder_automerging = get_prebuilt_trulens_recorder(
    automerging_query_engine, app_id="Automerging Query Engine"
)