## 🧾 Introduction
This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) based Question-Answering (QA) system for a business using OpenAI's LLM and Pinecone vector database. The pipeline ingests business documents, indexes them using vector embeddings, and responds to user questions with context-aware answers.

SECTION 1: Setup Environment (Install Required Packages)


In [None]:
!pip install -q openai langchain pinecone python-dotenv unstructured pdfminer.six faiss-cpu sentence-transformers

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/587.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━[0m [32m460.8/587.6 kB[0m [31m13.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.6/587.6 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/240.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25h

SECTION 2: Import Libraries


In [None]:
pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Downloading langchain_community-0.3.27-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx_sse-0.4.1-py3-none-any.whl (8.1 kB)
Downloading pydantic_settings-2.10.1-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: httpx-sse, pydantic-settings, langchain-community
Successfully installed httpx-sse-0.4.1 langchain-community-0.3.27 pydantic-settings-2.10.1


In [None]:
import os
import openai
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain_community.document_loaders import UnstructuredFileLoader



API Key Setup


In [None]:
OPENAI_API_KEY = "openai_api_key"
PINECONE_API_KEY = "pinecone_api_key"
PINECONE_ENVIRONMENT = "gcp-starter"
PINECONE_INDEX_NAME = "business-index"
# Set OpenAI key
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
openai.api_key = OPENAI_API_KEY

SECTION 4: Load Docs + Preprocess (Split into Chunks)

In [None]:
from google.colab import files
uploaded = files.upload()  # Upload your DOCX/PDF file

file_name = list(uploaded.keys())[0]  # Get uploaded file name

from langchain_community.document_loaders import UnstructuredFileLoader
doc_loader = UnstructuredFileLoader(file_name)

raw_docs = doc_loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = splitter.split_documents(raw_docs)

print(f"Loaded {len(documents)} chunks")


Saving CS 3.pdf to CS 3 (4).pdf
Loaded 176 chunks


SECTION 5: Embed Documents & Upload to Pinecone

In [None]:
import os
from pinecone import Pinecone as PineconeClient, ServerlessSpec
from langchain.vectorstores import Pinecone as PineconeVectorStore
from langchain.embeddings import OpenAIEmbeddings

# Initialize Pinecone client
pc = PineconeClient(api_key=PINECONE_API_KEY)
embeddings = OpenAIEmbeddings()

# Check if the index exists, and create it if it doesn't
if PINECONE_INDEX_NAME not in pc.list_indexes().names():
    pc.create_index(
        name=PINECONE_INDEX_NAME,
        dimension=1536,  # This is the dimensionality of OpenAI's Ada-002 embeddings
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1' # Replace with your desired region
        )
    )

# Get the Pinecone index object
pinecone_index = pc.Index(PINECONE_INDEX_NAME)

# Generate embeddings and prepare data for upsert
vectors = embeddings.embed_documents([doc.page_content for doc in documents])
upsert_data = []
for i, doc in enumerate(documents):
    upsert_data.append((f"doc_{i}", vectors[i], {"text": doc.page_content})) # Using index as ID, add metadata

# Upsert data to Pinecone
batch_size = 100
for i in range(0, len(upsert_data), batch_size):
    batch = upsert_data[i:i + batch_size]
    pinecone_index.upsert(vectors=batch)

print(f"Upserted {len(upsert_data)} vectors to Pinecone.")

# Initialize PineconeVectorStore from the existing index for retrieval
vectorstore = PineconeVectorStore.from_existing_index(
    index_name=PINECONE_INDEX_NAME,
    embedding=embeddings
)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

SECTION 6: Query Answering using RAG (Retriever + LLM)

In [None]:
llm = ChatOpenAI(model="gpt-3.5-turbo")
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

  llm = ChatOpenAI(model="gpt-3.5-turbo")


NameError: name 'vectorstore' is not defined

SECTION 7: Test Output (Ask a Question)

In [None]:
query = "What services does our business offer?"
result = qa_chain.invoke(query)
print("\nAnswer:\n", result['result'])
print("\nSource Document Snippets:\n")
for doc in result['source_documents']:
    print(doc.page_content[:300], "...\n")


In [None]:
!pip install "unstructured[pdf]"

Collecting onnx>=1.17.0 (from unstructured[pdf])
  Downloading onnx-1.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting onnxruntime>=1.19.0 (from unstructured[pdf])
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting pdf2image (from unstructured[pdf])
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Collecting pikepdf (from unstructured[pdf])
  Downloading pikepdf-9.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.1 kB)
Collecting pi-heif (from unstructured[pdf])
  Downloading pi_heif-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)
Collecting google-cloud-vision (from unstructured[pdf])
  Downloading google_cloud_vision-3.10.2-py3-none-any.whl.metadata (9.6 kB)
Collecting effdet (from unstructured[pdf])
  Downloading effdet-0.4.1-py3-none-any.whl.metadata (33 kB)
Collecting unstructured-inference>