### RAG using Pinecone

In [2]:
# import libraries
import openai
import langchain
import pinecone
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI

In [3]:
# load all env variables
from dotenv import load_dotenv
import os
load_dotenv()


True

In [5]:
# Reading the PDFs in a directory
def read_doc(directory):
    file_loader = PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents

In [6]:
kb_path = '/Users/aditya/Desktop/Citadel/GenAI/Projects/Rag KBs'
docs = read_doc(kb_path)

In [7]:
len(docs)

15

In [8]:
# split the pdfs to chunks - due to restrictions in token size
def chunk_data (docs, chunk_size = 800, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = chunk_size, chunk_overlap = chunk_overlap)
    docs = text_splitter.split_documents(docs)
    return docs

In [10]:
documents = chunk_data(docs=docs)

In [11]:
len(documents)

62

In [12]:
# Initilaise embedding technique
embeddings = OpenAIEmbeddings(api_key = os.environ["OPENAI_API_KEY"])
embeddings

  embeddings = OpenAIEmbeddings(api_key = os.environ["OPENAI_API_KEY"])


OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x10c145ca0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x10c2c3ad0>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='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', openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None)

In [13]:
# Demonstration of how embeddings work
vectors = embeddings.embed_query("Aditya is an AI Engineer")

In [15]:
len(vectors)

1536

In [29]:
# initilaize Pinecone
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])


In [None]:
# Create an index
index_name = "langchainvectordb"

pc.create_index(
    name=index_name,
    dimension=1536, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [34]:
# connect to index
index = pc.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [None]:
# initialize the vector store
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [37]:
# add document chunks to vector store
vector_store.add_documents(documents)


['1e6eca20-9c5c-4ba4-92a8-c82c4d2c7e77',
 'd8e1e4dc-99c1-4191-96d1-39600e301784',
 '80c710c5-f873-4283-8c5e-672e0a4c83c4',
 '54a94255-2ef1-4e52-b442-bd29f83dcf40',
 '47fd9635-58cb-4c6a-ab0d-283eb419470c',
 '4ceb06bc-b0d8-47c0-8290-a905d3b027c8',
 '9ec3881a-0077-4a81-9b4c-a51281afecd1',
 '8cbc88a5-9b85-4b1f-80f5-69aed3b7896b',
 '686e9643-a3fa-45f9-b8c6-12be54e0bf7b',
 'b7608a98-0f85-42f1-8537-b78e59f0ebcc',
 'd87afa35-7d44-43d9-9e5d-75e7e21f94c2',
 'a0f3d5cd-2234-407f-b3a8-14b3cc927a11',
 '51dd9f80-5c25-4432-838a-c8e9f973ea2d',
 '6a282430-ba41-43a2-a1a7-34abe1b41a46',
 '674ebbfa-8c56-4c73-ae3b-fa9649659f59',
 'dc3aa596-071b-44c2-aa33-959ceec498ff',
 '04a53ebd-a0cf-47dd-8a60-bcc54c44eb83',
 'de3f26e5-6ca7-4655-8c80-8d4955420dfa',
 '389b0dbd-4913-407b-bdb8-d1ea5a40c5b0',
 '1173c9b5-08ba-407e-84b6-cec9c051a254',
 'ec40423c-fa38-45f0-9ff1-e77f2c5345b4',
 '07f86581-bd62-4ea5-950b-0c333ed1afc1',
 '98191f29-d854-4446-b4c9-642b35f7372e',
 'd49b0878-500a-4a80-b659-c522965cf920',
 '9fb8d9b2-8692-

In [43]:
# Create retriever
retriever = vector_store.as_retriever(
    search_kwargs={
        "k": 3,  # Number of documents to retrieve
    }
)


In [None]:
# set-up your LM
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

# Create a RAG chain
prompt = ChatPromptTemplate.from_template("""
Answer the question based only on the following context:
{context}

Question: {question}
""")

llm = ChatOpenAI(model="gpt-3.5-turbo", api_key = os.environ["OPENAI_API_KEY"])

# Create the RAG chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
)

# Invoke the chain
response = rag_chain.invoke("What is self attention?")
print(response)


content='Self-attention, sometimes called intra-attention, is an attention mechanism relating different positions of a single sequence in order to compute a representation of the sequence.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 1185, 'total_tokens': 1217, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-7ee21152-c9fc-414e-b685-c27a4fc6545a-0' usage_metadata={'input_tokens': 1185, 'output_tokens': 32, 'total_tokens': 1217, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
