In [None]:
!pip install streamlit

In [None]:
!pip install -qqq streamlit --progress-bar off
!npm install -qqq localtunnel --progress-bar off

In [None]:
!pip3 install -qqq langchain --progress-bar off
!pip3 install -qqq llama-cpp-python --progress-bar off
!pip3 install -qqq sentence_transformers --progress-bar off
!pip3 install -qqq faiss-gpu --progress-bar off

!huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.1-GGUF mistral-7b-instruct-v0.1.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False

In [None]:
!pip install PyPDF2
!pip install pypdf

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# List of file paths for the five PDFs
pdf_files = [
    "/content/Chicken-Recipes-obooko-fd0004.pdf",
    "/content/Cookie-Jar-obooko-fd0018.pdf",
    "/content/InfantFood-obooko-fd0006.pdf",
    "/content/KidsRecipes-obooko-fd0002.pdf",
    "/content/homemade-jam-recipes-obooko.pdf"
]

# Initialize an empty list to store the pages from all PDFs
all_pages = []

# Iterate over each PDF file
for file_path in pdf_files:
    # Load the PDF file using PyPDFLoader
    loader = PyPDFLoader(file_path)
    # Load and split the pages from the PDF
    pages = loader.load_and_split()
    # Extend the list of all pages with the pages from the current PDF
    all_pages.extend(pages)

# Now, the all_pages list contains all the pages from the five PDFs

# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
docs = text_splitter.split_documents(all_pages)

# Now, the docs variable contains the text split into chunks


In [None]:
%%writefile rag_app.py

from langchain.llms import LlamaCpp
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
import streamlit as st

from langchain.text_splitter import RecursiveCharacterTextSplitter



from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# List of file paths for the five PDFs
pdf_files = [
    "/content/Chicken-Recipes-obooko-fd0004.pdf",
    "/content/Cookie-Jar-obooko-fd0018.pdf",
    "/content/InfantFood-obooko-fd0006.pdf",
    "/content/KidsRecipes-obooko-fd0002.pdf",
    "/content/homemade-jam-recipes-obooko.pdf"
]

# Initialize an empty list to store the pages from all PDFs
all_pages = []

# Iterate over each PDF file
for file_path in pdf_files:
    # Load the PDF file using PyPDFLoader
    loader = PyPDFLoader(file_path)
    # Load and split the pages from the PDF
    pages = loader.load_and_split()
    # Extend the list of all pages with the pages from the current PDF
    all_pages.extend(pages)

# Now, the all_pages list contains all the pages from the five PDFs

# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(all_pages)

# Now, the docs variable contains the text split into chunks



# llm
llm = LlamaCpp(model_path="/content/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
               max_tokens=2000,
               temperature=0.7,
               top_p=0.7,
               n_gpu_layers=-1,
               n_ctx=2048)

# splitting
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                               #chunk_overlap=200)

#docs = text_splitter.split_documents(all_pages)

# embeddings
embedding_model = "sentence-transformers/all-MiniLM-l6-v2"
embeddings_folder = "/content/"

embeddings = HuggingFaceEmbeddings(model_name=embedding_model,
                                   cache_folder=embeddings_folder)

# vector database
vector_db = FAISS.from_documents(docs, embeddings)

memory = ConversationBufferMemory(memory_key='chat_history',
                                  return_messages=True,
                                  output_key='answer',  # Ensure this is 'answer'
                                  chat_history=[],
                                  max_history_size=1000)

# prompt

# Define prompt template using combined_pdf_text as context

template = """
<s> [INST]
You are a polite and professional question-answering AI assistant. You must provide a helpful response to the user.

In your response, PLEASE ALWAYS:
  (0) Be a detail-oriented reader: read the question and context and understand both before answering
  (1) Start your answer with a friendly tone, and reiterate the question so the user is sure you understood it
  (2) Provide a concise and to-the-point answer. Avoid lengthy explanations unless necessary.
  (3) If you can't find the answer, respond with an explanation, starting with: "I couldn't find the answer in the information I have access to".
  (4) Ensure your answer answers the question, is helpful, professional, and formatted to be easily readable.
[/INST]
[INST]
Answer the following question using the context provided.
The question is surrounded by the tags <q> </q>.
The context is surrounded by the tags <c> </c>.
<q>
{question}
</q>
<c>
{context}
</c>
[/INST]
</s>
[INST]
Model's Opinion or Feeling:
[INST]
If you're asking about my opinion or feeling, please note that I am not a human. I provide responses based on the information available to me and do not have personal opinions or feelings.
"""




prompt = PromptTemplate(template=template,
                        input_variables=["context", "question"])

# chain
# chain
chain = ConversationalRetrievalChain.from_llm(llm,
                                              retriever=vector_db.as_retriever(search_kwargs={"k": 3}),
                                              memory=memory,
                                              return_source_documents=False,
                                              combine_docs_chain_kwargs={"prompt": prompt})





##### streamlit #####

st.title("Welcome Food Lovers")

# Initialise chat history
# Chat history saves the previous messages to be displayed
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# React to user input
if prompt := st.chat_input("What do you wanna eat today! #ConversationalRetrievalChain"):

    # Display user message in chat message container
    st.chat_message("user").markdown(prompt)

    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

    # Begin spinner before answering question so it's there for the duration
    with st.spinner("Going down the rabbithole for answers..."):

        # send question to chain to get answer
        answer = chain(prompt)

        # extract answer from dictionary returned by chain
        response = answer["answer"]

        # Display chatbot response in chat message container
        with st.chat_message("assistant"):
            st.markdown(answer["answer"])

        # Add assistant response to chat history
        st.session_state.messages.append({"role": "assistant", "content": response})

In [None]:
!streamlit run rag_app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com

34.82.101.238
[K[?25hnpx: installed 22 in 3.268s
your url is: https://cool-dots-speak.loca.lt
