In [2]:
from langchain import  hub
from langchain_community.llms import Ollama
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


In [4]:
PDF_PATH = 'data/fundamentals-of-foodnutrition-and-diet-therapy.pdf'

In [5]:
loader = PyPDFLoader(PDF_PATH)
data = loader.load()

In [13]:
# Split your data up into smaller documents with Chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
splits = text_splitter.split_documents(data)
splits

[Document(page_content='This page\nintentionally left\nblank', metadata={'source': 'data/fundamentals-of-foodnutrition-and-diet-therapy.pdf', 'page': 2}),
 Document(page_content='Copyright © 2007, 1982, New Age International (P) Ltd., Publishers\nPublished by New Age International (P) Ltd., Publishers\nAll rights reserved.\nNo part of this ebook may be reproduced in any form, by photostat, microfilm,\nxerography, or any other means, or incorporated into any information retrievalsystem, electronic or mechanical, without the written permission of the publisher.All inquiries should be emailed to  rights@newagepublishers.com\nPUBLISHING  FOR ONE WORLD\nNEW AGE INTERNATIONAL (P) LIMITED, PUBLISHERS4835/24, Ansari Road, Daryaganj, New Delhi - 110002Visit us at  www.newagepublishers.comISBN (13) : 978-81-224-2972-5', metadata={'source': 'data/fundamentals-of-foodnutrition-and-diet-therapy.pdf', 'page': 4}),
 Document(page_content='Foreword\nFOODS AND NUTRITION  are essential for maintaining g

In [12]:
vector_store = Chroma.from_documents(documents=splits, embedding=GPT4AllEmbeddings())

retriever = vector_store.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'GPT4AllEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x126f62df0>)

In [8]:
# Prompt
prompt = hub.pull("rlm/rag-prompt")

In [9]:

# OpenAI LLM
llm = Ollama(model='llama3')


In [11]:
# Chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [14]:
print(rag_chain.invoke('nutrition for cancer'))

Nutrition plays a crucial role in cancer patients, as it can meet their increased energy demands due to the hypermetabolic state caused by cancer. A diet plan tailored to the patient's needs and preferences is essential, taking into account factors such as appetite, absorption, and specific dietary requirements (e.g., low-residue or soft diets for certain individuals).
