# 📘 Task 1: RAG QA Bot for Business
Develop a Retrieval-Augmented Generation (RAG) QA bot using OpenAI and Pinecone.


## 🔧 SECTION 1: Setup Environment

In [None]:
!pip install -q openai langchain pinecone-client python-dotenv unstructured pdfminer.six faiss-cpu sentence-transformers

## 📦 SECTION 2: Import Libraries

In [None]:
import os
import openai
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain_community.document_loaders import UnstructuredFileLoader
from google.colab import files

## 🔐 SECTION 3: API Key Setup

In [None]:
# Insert your actual API keys
OPENAI_API_KEY = "sk-proj-..."  # Replace with your key
PINECONE_API_KEY = "pcsk_..."   # Replace with your key
PINECONE_ENVIRONMENT = "gcp-starter"
PINECONE_INDEX_NAME = "business-index"

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
openai.api_key = OPENAI_API_KEY

## 📄 SECTION 4: Upload & Load Document

In [None]:
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

doc_loader = UnstructuredFileLoader(file_name)
raw_docs = doc_loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = splitter.split_documents(raw_docs)
print(f"Loaded {len(documents)} chunks")

## 🧠 SECTION 5: Embed Documents & Upload to Pinecone

In [None]:
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENVIRONMENT)
embeddings = OpenAIEmbeddings()
vectorstore = Pinecone.from_documents(documents, embeddings, index_name=PINECONE_INDEX_NAME)

## 🔍 SECTION 6: Query Answering using RAG

In [None]:
llm = ChatOpenAI(model="gpt-3.5-turbo")
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

## 🧪 SECTION 7: Test the QA Bot

In [None]:
query = "What services does our business offer?"
result = qa_chain.invoke(query)

print("\nAnswer:\n", result['result'])
print("\nSource Document Snippets:\n")
for doc in result['source_documents']:
    print(doc.page_content[:300], "...\n")

## ✅ Task Completed

In [None]:
print('RAG QA Bot setup complete! Test with more queries.')