In [11]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader
import google.generativeai as genai
import getpass
import os
import markdown

os.environ["GOOGLE_API_KEY"] = getpass.getpass("Google Key: ")
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [24]:
loader = DirectoryLoader('C:/Users/fjdj0/Desktop/Coding/learning/langchain/material/', glob='**/*.pdf', show_progress=True, use_multithreading=True, silent_errors=True)

In [25]:
documents = loader.load()

100%|██████████| 5/5 [00:21<00:00,  4.25s/it]


In [38]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

In [39]:
texts = text_splitter.split_documents(documents)

In [40]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [41]:
docsearch = Chroma.from_documents(texts, embeddings).as_retriever()

In [42]:
from langchain.prompts import ChatPromptTemplate
template = """Answer the question as based only on the following contect: {context}
Question: {question}"""
prompt = ChatPromptTemplate.from_template(template)

In [43]:
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.1, convert_system_message_to_human=True)

In [44]:
query = "Can some some numpy code and why we need to use numpy in simple terms please"
docsearch.get_relevant_documents(query)

[Document(page_content='I will devote a chapter to IPython and how to get the most out of its features. I strongly recommend using it while working through this book.\n\nEssential Python Libraries\n\n| 5\n\nwww.it-ebooks.info', metadata={'source': 'C:\\Users\\fjdj0\\Desktop\\Coding\\learning\\langchain\\material\\Python for Data Analysis Why Python.pdf'}),
 Document(page_content='I will devote a chapter to IPython and how to get the most out of its features. I strongly recommend using it while working through this book.\n\nEssential Python Libraries\n\n| 5\n\nwww.it-ebooks.info', metadata={'source': 'C:\\Users\\fjdj0\\Desktop\\Coding\\learning\\langchain\\material\\Python for Data Analysis Why Python.pdf'}),
 Document(page_content='I will devote a chapter to IPython and how to get the most out of its features. I strongly recommend using it while working through this book.\n\nEssential Python Libraries\n\n| 5\n\nwww.it-ebooks.info', metadata={'source': 'C:\\Users\\fjdj0\\Desktop\\Coding

In [49]:
from langchain.schema.runnable import RunnableMap
chain = RunnableMap({
    "context": lambda x: docsearch.get_relevant_documents(x["question"]),
    "question": lambda x: x["question"]
}) | prompt | model 

In [50]:
response = chain.invoke({"question": query})
print(response)

content='The provided context does not contain any information about numpy code or why we need to use numpy. Therefore, I cannot answer this question.'
