#### BASIC RAG

In [1]:
import os
from dotenv import load_dotenv

# Load all environment variables from .env file
load_dotenv()

True

In [2]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
# Access the environment variables
langchain_tracing_v2 = os.getenv('LANGCHAIN_TRACING_V2')
langchain_endpoint = os.getenv('LANGCHAIN_ENDPOINT')
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')

## LLM
openai_api_key = os.getenv('OPENAI_API_KEY')

## Pinecone Vector Database
pinecone_api_key = os.getenv('PINECONE_API_KEY')
pinecone_api_host = os.getenv('PINECONE_API_HOST')
index_name = os.getenv('PINECONE_INDEX_NAME')

In [4]:
os.environ['USER_AGENT'] = 'myagent'

Part 1: Setup

In [6]:
from pinecone import Pinecone

pc = Pinecone(api_key=pinecone_api_key)

In [7]:
from pprint import pprint
import bs4

In [8]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Pinecone
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore

In [9]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

In [10]:
docs = loader.load()

In [79]:
# docs

In [11]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [12]:
splits[0:3]

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refi

In [11]:
len(splits)

66

In [12]:
pc.list_indexes().names()

['ex-rags-idx0101']

In [None]:
# from pinecone import ServerlessSpec

# cloud = 'aws'
# region = 'us-east-1'

# spec = ServerlessSpec(cloud=cloud, region=region)

# # check if index already exists (it shouldn't if this is first time)
# if index_name not in pc.list_indexes().names():
#     # if does not exist, create index
#     pc.create_index(
#         index_name,
#         dimension=1536,  # dimensionality of text-embedding-ada-002
#         metric='cosine',
#         spec=spec
#     )
# # connect to index
# index = pc.Index(index_name)

In [13]:
# view index stats
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 66}},
 'total_vector_count': 66}

In [14]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [15]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [16]:
from uuid import uuid4
uuids = [str(uuid4()) for _ in range(len(splits))]
uuids[0:3]

['768741ea-ee27-4f64-8e50-c592287bc99a',
 'e61c1b4f-c3e9-4bde-ad16-f4c238fb621d',
 'fe66e18c-8c1f-4c83-87bd-10bf0179b7d9']

In [None]:
vector_store.add_documents(documents=splits, ids=uuids)

In [76]:
retriever = vector_store.as_retriever()

In [71]:
# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)

In [72]:
# Post-processing
def format_docs(splits):
    return "\n\n".join(split.page_content for split in splits)

In [77]:
# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [78]:
# Question
pprint(rag_chain.invoke("How does LangChain use vector stores for efficient data retrieval?"))

('LangChain uses vector stores for efficient data retrieval by saving the '
 'embedding representation of information into a vector store database that '
 'supports fast maximum inner-product search (MIPS). This allows for fast '
 'retrieval of approximately top k nearest neighbors by using approximate '
 'nearest neighbors (ANN) algorithms, trading off a little accuracy for a '
 'significant speedup. The use of vector stores and ANN algorithms optimizes '
 'the retrieval speed for LangChain.')


In [20]:
vector_store.delete(ids=[uuids[-1]])