# LangChain

In [None]:
!pip install -q -U ragatouille
!pip install -q langchain
!pip install -q langchain-openai
!pip install -q langchain-core
!pip install -q langchain-community
!pip install -q pypdf

In [None]:
from ragatouille import RAGPretrainedModel

RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

In [None]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("Orca_paper.pdf")
pages = loader.load_and_split()



In [None]:
len(pages)

In [None]:
full_document = ""

for page in pages:
  full_document += page.page_content

In [None]:
print(full_document)

In [None]:
type(full_document)

In [None]:
RAG.index(
    collection=[full_document],
    index_name="orca_paper",
    max_document_length=512,
    split_documents=True,
)

### Do Retrieval

In [None]:
results = RAG.search(query="What is instruction tuning?", k=3)


In [None]:
results

### Use as LangChain Retriever

In [None]:
retriever = RAG.as_langchain_retriever(k=3)

In [None]:
retriever.invoke("What is instruction tuning?")

### Create a Chain

In [None]:
import os
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('openai')

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_template(
    """Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}"""
)

llm = ChatOpenAI()

document_chain = create_stuff_documents_chain(llm, prompt)


retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [None]:
retrieval_chain.invoke({"input": "What is instruction tuning?"})

In [None]:
response = retrieval_chain.invoke({"input": "What is instruction tuning?"})

In [None]:
response["answer"]

# Llama-Index

In [None]:
!pip install -q llama-index
!pip install -q llama-hub
!pip install -q llama-index-core
!pip install -q llama-index-llms-openai

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

reader = SimpleDirectoryReader(input_files=["Orca_paper.pdf"])
docs = reader.load_data()

In [None]:
# docs

In [None]:
from llama_index.core.llama_pack import download_llama_pack

# download and install dependencies
RAGatouilleRetrieverPack = download_llama_pack(
    "RAGatouilleRetrieverPack", "./ragatouille_pack"
)

In [None]:
from llama_index.llms.openai import OpenAI

In [None]:
# create the pack
ragatouille_pack = RAGatouilleRetrieverPack(
    docs,  # List[Document]
    llm=OpenAI(model="gpt-3.5-turbo"),
    index_name="orca_paper",
    top_k=5,
)

In [None]:
response = ragatouille_pack.run("What is instruction tuning? ")


In [None]:
response

In [None]:
print(response)