## Retriever And Chain With Langchain


In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [None]:
# from langchain_community.document_loaders import PyPDFLoader

# loader = PyPDFLoader("Jenkins-Readme.pdf")
# docs = loader.load()
# docs

In [None]:
from git import Repo
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import Language


In [None]:
repo_path = "sqadrivers"

if os.path.isdir(repo_path):  # if repo exists, pull newest data
    repo = Repo(repo_path)
    repo.remotes.origin.pull()
# else:  # otherwise, clone from remote
#     repo = Repo.clone_from("https://github.com/jayggg/mth271content", repo_path)

In [None]:
repo

In [None]:
# Load
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[".java"],
    parser=LanguageParser(language=Language.JAVA),
)
documents = loader.load()
len(documents)

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

java_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.JAVA, chunk_size=2000, chunk_overlap=200
)
texts = java_splitter.split_documents(documents)
len(texts)

In [None]:
documents = java_splitter.split_documents(texts)
documents

In [None]:
# from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

# db = FAISS.from_documents(documents[:30], OpenAIEmbeddings())
db = FAISS.from_documents(documents[:30], OllamaEmbeddings(model="starcoder2:3b"))

In [None]:
# from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

# db.save_local("faiss_index")
db = FAISS.load_local(
    "faiss_index",
    OllamaEmbeddings(model="starcoder2:3b"),
    allow_dangerous_deserialization=True,
)
db

In [None]:
query = "iosdrivers"
result = db.similarity_search(query)
result[0].page_content

In [10]:
from langchain_community.llms import Ollama

## Load Ollama LAMA2 LLM model
llm = Ollama(model="starcoder2:3b")
llm

Ollama(model='starcoder2:3b')

In [11]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}"""
)

In [12]:
## Chain Introduction
## Create Stuff Docment Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)

In [13]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002997EE4F0D0>)

In [14]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""

from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [15]:
response = retrieval_chain.invoke({"input": "Write a function to compare two excel documents"})

In [16]:
response["answer"]

'. The function takes the name of two excel file as input and returns True if both files are same. Return False otherwise. \n/TestProject/src/com/simplifyqa/Utility/InitializeDependence.java\npackage com.simplifyqa.Utility;\r\n\r\nimport java.io.BufferedReader;\r\nimport java.io.File;\r\nimport java.io.FileNotFoundException;\r\nimport java.io.FileReader;\r\nimport java.lang.reflect.Field;\r\nimport java.net.URL;\r\nimport java.nio.file.Paths;\r\nimport java.text.DateFormat;\r\nimport java.text.SimpleDateFormat;\r\nimport java.util.Date;\r\n\r\npublic class InitializeDependence {\r\n\tpublic static String currentDirectory = Paths.get("").toAbsolutePath().normalize().toString();\r\n\r\n\tpublic static String param_description_value = null;\r\n\r\n\tpublic static String param_description_name = null;\r\n\t\r\n\tpublic static String brName = "Desktop";\r\n\r\n\r\n\tpublic static String configFile = "Configuration.properties";\r\n\tpublic static String default_browser = "chrome";\r\n\tpubli