RAG AGENT

ADD ALL PRE REQS

In [6]:
import os
from getpass import getpass

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") or getpass(
    "Enter OPENAI_API_KEY: "
)

CREATE A BASIC LLM AND ASK UP TO DATE QUESTIONS TO MAKE SURE THE PARAMETRIC KNOWLEDGE IS LIMITED

In [7]:
from agents import Agent

agent = Agent(
    name="Agent",
    model="gpt-4o"
)

In [8]:
from agents import Runner

query = "SHOULD BE THE SAME QUESTION"

result = await Runner.run(
    starting_agent=agent,
    input=query,
)

print(result.final_output)

CREATE ANOTHER LLM WITH ADDITIONAL SOURCE DATA TO SHOW THAT LLMS CAN USE ADDITIONAL DATA TO CREATE AN ANSWER

In [9]:
agent = Agent(
    name="Agent",
    instructions="DATA ABOUT THE QUESTION YOU WANT TO ANSWER",
    model="gpt-4o"
)

In [10]:
query = "SHOULD BE THE SAME QUESTION"

result = await Runner.run(
    starting_agent=agent,
    input=query,
)

print(result.final_output)

GET THE HUGGING FACE DATASET

In [11]:
from datasets import load_dataset

dataset = load_dataset(
    "jamescalam/llama-2-arxiv-papers-chunked",
    split="train"
)

In [12]:
dataset[0]

{'doi': '1102.0183',
 'chunk-id': '0',
 'chunk': 'High-Performance Neural Networks\nfor Visual Object Classi\x0ccation\nDan C. Cire\x18 san, Ueli Meier, Jonathan Masci,\nLuca M. Gambardella and J\x7f urgen Schmidhuber\nTechnical Report No. IDSIA-01-11\nJanuary 2011\nIDSIA / USI-SUPSI\nDalle Molle Institute for Arti\x0ccial Intelligence\nGalleria 2, 6928 Manno, Switzerland\nIDSIA is a joint institute of both University of Lugano (USI) and University of Applied Sciences of Southern Switzerland (SUPSI),\nand was founded in 1988 by the Dalle Molle Foundation which promoted quality of life.\nThis work was partially supported by the Swiss Commission for Technology and Innovation (CTI), Project n. 9688.1 IFF:\nIntelligent Fill in Form.arXiv:1102.0183v1  [cs.AI]  1 Feb 2011\nTechnical Report No. IDSIA-01-11 1\nHigh-Performance Neural Networks\nfor Visual Object Classi\x0ccation\nDan C. Cire\x18 san, Ueli Meier, Jonathan Masci,\nLuca M. Gambardella and J\x7f urgen Schmidhuber\nJanuary 2011\nAbs

CREATE A KNOWLEDGE BASE USING PINECONE

pcsk_7RhCb7_Lb86nZHNDT62DqZuGtekbhTMnSssMffFWtmF9ZurkatW1tDLKYL3H9red4PYTg2

In [15]:
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY") or getpass(
    "Enter PINECONE_API_KEY: "
)

In [None]:
from pinecone import Pinecone, ServerlessSpec
    
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

index_name = "rag-example"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )

index = pc.Index(index_name)

In [31]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}

MAKE BASIC EXAMPLE OF EMBEDDING

In [36]:
from openai import OpenAI

client = OpenAI()

In [39]:
texts = [
    'this is the first chunk of text',
    'then this is the second chunk of text'
]

In [41]:
res = client.embeddings.create(
    input=texts,
    model="text-embedding-3-small"
)

In [45]:
len(res.data), len(res.data[0].embedding)

(2, 1536)

[Embedding(embedding=[0.002454060362651944, 0.02212383970618248, -0.00104707817081362, 0.01900591515004635, -0.012598504312336445, -0.02345156855881214, 0.03210418298840523, -0.0055794441141188145, -0.004699264653027058, 0.0038675693795084953, 0.07936535775661469, -0.006978034973144531, -0.040756795555353165, -0.04454604536294937, 0.04636607691645622, 0.03983186185359955, -0.022452041506767273, 0.0016307566547766328, -0.05048352852463722, 0.023809606209397316, 0.0483054555952549, -0.0015841369749978185, -0.002968741813674569, 0.018707549199461937, -0.03287993371486664, -0.01325490977615118, 0.016022253781557083, 0.02002035826444626, 0.024883724749088287, -0.05066254734992981, -0.027404917404055595, -0.04582901671528816, 0.030463168397545815, -0.022795163094997406, 0.03908594697713852, -0.0015281933592632413, -0.0375344417989254, 0.018483774736523628, 0.01566421426832676, -0.03783280774950981, 0.0005818136851303279, -0.057763658463954926, 0.01400082465261221, 0.0035393666476011276, -0.0

EMBED HUGGING FACE FILES

In [51]:
from tqdm.auto import tqdm

data = dataset.to_pandas()

batch_size = 100

for i in tqdm(range(0, len(data), batch_size)):
    i_end = min(len(data), i+batch_size)

    batch = data.iloc[i:i_end]

    ids = [f"{x['doi']}-{x['chunk-id']}" for i, x in batch.iterrows()]

    texts = [x['chunk'] for _, x in batch.iterrows()]

    embeds = client.embeddings.create(
        input=texts,
        model="text-embedding-3-small"
    )

    vectors = [record.embedding for record in embeds.data]

    metadata = [
        {'text': x['chunk'],
         'source': x['source'],
         'title': x['title']} for i, x in batch.iterrows()
    ]

    index.upsert(vectors=zip(ids, vectors, metadata))
    
        

100%|██████████| 49/49 [04:21<00:00,  5.33s/it]


In [52]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 4838}},
 'total_vector_count': 4838,
 'vector_type': 'dense'}

MAKING RAG CHATBOT

In [53]:
query_embedding = client.embeddings.create(
    input=["what is so special about llama 2"],
    model="text-embedding-3-small"
).data[0].embedding

results = index.query(
    vector=query_embedding,
    top_k=3,
    include_metadata=True
)

for match in results["matches"]:
    print(match["metadata"]["text"])

asChatGPT,BARD,andClaude. TheseclosedproductLLMsareheavilyﬁne-tunedtoalignwithhuman
preferences, which greatly enhances their usability and safety. This step can require signiﬁcant costs in
computeandhumanannotation,andisoftennottransparentoreasilyreproducible,limitingprogresswithin
the community to advance AI alignment research.
In this work, we develop and release Llama 2, a family of pretrained and ﬁne-tuned LLMs, L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle and
L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc , at scales up to 70B parameters. On the series of helpfulness and safety benchmarks we tested,
L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc models generally perform better than existing open-source models. They also appear to
be on par with some of the closed-source models, at least on the human evaluations we performed (see
thisprogressionistheriseofLlama,recognizedforitsfocusoncomputationaleﬃciencyduringinference
(Touvron et al., 2023). A parallel discourse has 

MAKE RAG AS A TOOL FOR AGENT

In [None]:
from agents import function_tool

@function_tool
async def augment_prompt(query: str) -> str:
    # 1. Get the query embedding
    embeds_response = await client.embeddings.create(
        input=[query],
        model="text-embedding-3-small"
    )
    query_embedding = embeds_response.data[0].embedding

    # 2. Query Pinecone
    results = await index.query(
        vector=query_embedding,
        top_k=3,
        include_metadata=True
    )

    # 3. Extract the passages
    source_knowledge = "\n".join(
        match["metadata"]["text"] for match in results["matches"]
    )

    # 4. Build the augmented prompt
    augmented_prompt = f"""using the context below, answer the query.

    contexts:
    {source_knowledge}

    query:
    {query}
    """

    return augmented_prompt

MAKE FINAL AGENT

In [61]:
rag_agent = Agent(
    name="RAG Agent",
    model="gpt-4o",
    instructions="You are a rag assistant, always use the augment_prompt tool to get more information before answering the question.",
    tools=[augment_prompt]
)

In [62]:
query = "What is so special about llama 2?"

result = await Runner.run(
    starting_agent=agent,
    input=query,
)

In [63]:
print(result.final_output)

LLaMA 2, developed by Meta, is an advanced language model that builds upon its predecessor, LLaMA, with several improvements. Here’s what makes it special:

1. **Open Access**: LLaMA 2 is open-weight, allowing researchers and developers to access and utilize the model for various applications, fostering innovation and collaboration.

2. **Improved Performance**: It offers enhanced performance in natural language processing tasks, achieving better results in areas such as text generation, comprehension, and translation compared to similar models.

3. **Scalability**: LLaMA 2 comes in different sizes, allowing users to choose a version that best fits their resource availability and task requirements.

4. **Fine-Tuning**: The model is built to be easily fine-tuned for specific tasks, making it adaptable for specialized applications or different industries.

5. **Research and Development Focus**: By releasing LLaMA 2 with open weights, Meta encourages further research and development, enab