# Retriever and Chain with LangChain

In [4]:
import os
from dotenv import load_dotenv
load_dotenv("./.env")

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

In [1]:
# Pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("Book.pdf")
docs = loader.load()
docs

[Document(page_content='', metadata={'source': 'Book.pdf', 'page': 0}),
 Document(page_content="Table\tof\tContents\n\t\n1.\t\nTitle\tPage\n2.\t\nFastlane\tResources\n3.\t\nTestimonials\n4.\t\nAcknowledgements\n5.\t\nTable\tof\tContents\n6.\t\nPreface\n7.\t\nIntroduction\n8.\t\nPART\t1:\tWealth\tin\ta\tWheelchair…“Get\tRich\tSlow”\tis\tGet\tRich\tOld\n9.\t\nCHAPTER\t1\t--\tThe\tGreat\tDeception\n10.\t\nCHAPTER\t2\t--\tHow\tI\tScrewed\t“Get\tRich\tSlow”\n11.\t\nPART\t2:\tWealth\tis\tNot\ta\tRoad,\tBut\ta\tRoad\tTrip\n12.\t\nCHAPTER\t3\t--\tThe\tRoad\tTrip\tto\tWealth\n13.\t\nCHAPTER\t4\t--\tThe\tRoadmaps\tto\tWealth\n14.\t\nPART\t3:\tThe\tRoad\tMost\tTraveled:\tThe\tSidewalk\n15.\t\nCHAPTER\t5\t--\tThe\tSidewalk\tRoadmap\n16.\t\nCHAPTER\t6\t--\tHas\tYour\tWealth\tBeen\tToxified?\n17.\t\nCHAPTER\t7\t--\tMisuse\tMoney\tand\tMoney\tWill\tMisuse\tYou\n18.\t\nCHAPTER\t8\t--\tLucky\tBastards\tPlay\tthe\tGame!\n19.\t\nCHAPTER\t9\t--\tWealth\tDemands\tAccountability\n20.\t\nPART\t4:\tMediocrity

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_spilitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
document = text_spilitter.split_documents(docs)
document[:5]

[Document(page_content="Table\tof\tContents\n\t\n1.\t\nTitle\tPage\n2.\t\nFastlane\tResources\n3.\t\nTestimonials\n4.\t\nAcknowledgements\n5.\t\nTable\tof\tContents\n6.\t\nPreface\n7.\t\nIntroduction\n8.\t\nPART\t1:\tWealth\tin\ta\tWheelchair…“Get\tRich\tSlow”\tis\tGet\tRich\tOld\n9.\t\nCHAPTER\t1\t--\tThe\tGreat\tDeception\n10.\t\nCHAPTER\t2\t--\tHow\tI\tScrewed\t“Get\tRich\tSlow”\n11.\t\nPART\t2:\tWealth\tis\tNot\ta\tRoad,\tBut\ta\tRoad\tTrip\n12.\t\nCHAPTER\t3\t--\tThe\tRoad\tTrip\tto\tWealth\n13.\t\nCHAPTER\t4\t--\tThe\tRoadmaps\tto\tWealth\n14.\t\nPART\t3:\tThe\tRoad\tMost\tTraveled:\tThe\tSidewalk\n15.\t\nCHAPTER\t5\t--\tThe\tSidewalk\tRoadmap\n16.\t\nCHAPTER\t6\t--\tHas\tYour\tWealth\tBeen\tToxified?\n17.\t\nCHAPTER\t7\t--\tMisuse\tMoney\tand\tMoney\tWill\tMisuse\tYou\n18.\t\nCHAPTER\t8\t--\tLucky\tBastards\tPlay\tthe\tGame!\n19.\t\nCHAPTER\t9\t--\tWealth\tDemands\tAccountability\n20.\t\nPART\t4:\tMediocrity:\tThe\tSlowlane\tRoadmap\n21.\t\nCHAPTER\t10\t--\tThe\tLie\tYou've\tBee

In [17]:
## Vector Embedding and FAISS Vector Store
from langchain_google_genai import GoogleGenerativeAIEmbeddings
gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GEMINI_API_KEY)

from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(document[:20], gemini_embeddings)

In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.7, top_p=0.85, google_api_key=GEMINI_API_KEY)

In [11]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
                                            Answer the following question based only on the provided context. Think step by step before providing a detailed answer. I will tip you $1000 if the user finds the answer helpful.
                                            <context> {context}</context>
                                            Question: {input}
                                        """)


In [12]:
## Chain Introduction
## Create Stuff Document Chain
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm, prompt)

In [18]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001FF6D473390>)

In [19]:
from langchain.chains import create_retrieval_chain
retriever_chain = create_retrieval_chain(retriever, document_chain)

In [24]:
response = retriever_chain.invoke({"input": "No part of this book may be reproduced in any form"})
response

{'input': 'No part of this book may be reproduced in any form',
 'context': [Document(page_content='by\tMJ\tDeMarco\nCopyright\t2011\tMJ\tDeMarco,\tAll\trights\treserved.\nNo\tpart\tof\tthis\tbook\tmay\tbe\treproduced\tin\tany\tform\tor\tby\tany\telectronic\tor\nmechanical\tmeans,\tincluding\tinformation\tstorage\tand\tretrieval\tsystems,\twithout\npermission\tin\twriting\tfrom\tthe\tpublisher.\tThe\tonly\texception\tis\tby\ta\treviewer,\nwho\tmay\tquote\tshort\texcerpts\tin\ta\tpublished\treview.\nPublished\tby\tViperion\tPublishing\tCorporation\nPO\tBox\t93124,\tPhoenix,\tAZ\t85070\nhttp://www.viperionpublishing.com\nISBN\tEBOOK:\t978-0-9843581-1-3\nISBN\tPAPERBACK:\t978-0-9843581-0-6\nLibrary\tof\tCongress\tControl\tNumber:\t2010934089\nCover\tdesign\tby\tMJ\tDeMarco\nInterior\tdesign\tby\tFiona\tRaven\nPrinted\tin\tthe\tUSA\nThe\tinformation\tpresented\therein\trepresents\tthe\tview\tof\tthe\tauthor\tas\tof\tthe\tdate\nof\tpublication.\tThis\tbook\tis\tpresented\tfor\tinformational

In [28]:
response['answer']

'**Step 1: Identify the relevant information in the context.**\n\nThe provided context is the copyright page of a book titled "The Millionaire Fastlane" by MJ DeMarco.\n\n**Step 2: Locate the specific information requested.**\n\nThe question asks about the copyright notice for the book.\n\n**Step 3: Extract the answer from the context.**\n\nThe copyright notice states: "No part of this book may be reproduced in any form or by any electronic or mechanical means, including information storage and retrieval systems, without permission in writing from the publisher."\n\n**Answer:**\n\nNo part of this book may be reproduced in any form without permission in writing from the publisher.'