# RAG Model Trails Application

This notebook contains the trails and steps to creating an Application using Langchain and an opensource LLM

### Import the required libraries



In [25]:
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import PromptTemplate
import pymupdf

### Load in PDF from disk

In [2]:
doc = pymupdf.open(r"C:\Users\amuly\OneDrive\Desktop\building-machine-learning-pipelines-automating-model-life-cycles-with-tensorflow-1nbsped-1492053198-9781492053194.pdf")

pages = []
for page in doc: # Iterate through document pages
        text = page.get_text() # get plain text (is in UTF-8)
        pages.append(text)

len(pages)

440

### Create a text splitter and create a knowledge base

In [3]:
# Create a text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Split the documents in the pdf 
split_docs = text_splitter.create_documents(pages)

In [4]:
# Create a knowledge base

# Create embeddings
embeddings = OllamaEmbeddings(model="llama3.1")

# Create a vector store and store the knowledge base
knowledge_base = FAISS.from_documents(split_docs, embeddings)

### Initialize the model and Prompt Template

In [35]:
# Creating Prompt
prompt_template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Provide a concise answer in 1-4 sentences:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [36]:
llm = ChatOllama(model="llama3.1")

### Create a chain and invoke it

In [37]:
# make the knowledge base as retrever
retriever = knowledge_base.as_retriever()

# Create a RaG chain
qa = RetrievalQA.from_chain_type(llm=llm,
                                 chain_type="stuff",
                                 retriever=retriever,
                                 return_source_documents=False,
                                 chain_type_kwargs={"prompt": PROMPT})


question = "What is pipeline orchestration?"

qa(question)["result"]

'Pipeline orchestration refers to the process of managing and automating the flow of tasks within a machine learning (ML) pipeline. It involves scheduling, running, and monitoring multiple components or jobs that make up the pipeline, ensuring they execute reliably and efficiently. With automation, scalability, and reproducibility are key features, allowing data scientists to focus on experimentation rather than manual task management.'