In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
import os
API_KEY=os.getenv('API_KEY')

In [None]:
import PyPDF2
def pdf_to_txt(pdf_path, txt_path):
    try:
        with open(pdf_path, "rb") as pdf_file:
            reader = PyPDF2.PdfReader(pdf_file)
            with open(txt_path, "w", encoding="utf-8") as txt_file:
                for page in reader.pages:
                    text = page.extract_text()
                    if text:
                        txt_file.write(text + "\n")
        print(f"Text successfully extracted to {txt_path}")
    except Exception as e:
        print(f"Error: {e}")
pdf_to_txt("Your File Name", "output.txt")

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load documents from a text file
loader = TextLoader("output.txt", encoding="utf-8")
documents = loader.load()

# Split documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

# Use Hugging Face Sentence Transformers for embedding
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Store embeddings in FAISS
vector_db = FAISS.from_documents(chunks, embedding_function)

# Save FAISS index for later use (optional)
vector_db.save_local("faiss_index")

In [None]:
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from IPython.display import display,Markdown
# Initialize OpenRouter LLM
llm = ChatOpenAI(
    openai_api_base="https://openrouter.ai/api/v1",
    openai_api_key="API_KEY",
    model_name="deepseek/deepseek-chat:free",
)

embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load FAISS index (if needed)
vector_db = FAISS.load_local("faiss_index", embedding_function, allow_dangerous_deserialization=True)

# Use the retriever from FAISS
retriever = vector_db.as_retriever()

# Create a RAG-based RetrievalQA chain
qa_chain = RetrievalQA.from_llm(llm=llm, retriever=retriever)

# Ask a question
query = input("Enter a prompt:")
response = qa_chain.run(query)

display(Markdown(response))