# DO NOT RUN THIS COPY; an exact version is running locally

# We first import all the dependencies

In [2]:
from gemma_instruct import llm as gemma    # This is a new model by Google, I am running this locally
from openai_chatgpt import chatgpt         # This is a chatGPT-3.5 by OpenAI, I am running this using API calls
import os
from generateVectorDB import get_collection, process_pdf  # These functions help me store pdf texts as vectors
from retriever import get_documents_by_query              # This function helps me retrieve the information from the database
import streamlit as st

Overwriting app.py


# Database path setup and connection

In [None]:
upload_dir = "./DB/"

collection = get_collection()

# We will create a sidebar for file upload

In [None]:
# create a sidebar with streamlit
sidebar = st.sidebar
with sidebar.form(key='file-handler', clear_on_submit=True):
    # Create a file uploading module
    uploaded_file = st.file_uploader(label="Upload your doc", type="pdf",
                                     accept_multiple_files=False, key="pdf_upload")
    submitted = st.form_submit_button("Upload")
    if uploaded_file and submitted:
        file_name = os.sep.join([upload_dir, uploaded_file.name])
        with open(file_name, "wb") as f:
            f.write(uploaded_file.getbuffer())
        # once file is uploaded, process the pdf using internal functions and logic
        # In our case, we use tessaract to run OCR on each pdf pages.
        # We read the pdf pages as images
        process_pdf(file_name, collection)

# Some setup to display the messages properly

In [None]:
if "messages" not in st.session_state:
    st.session_state.messages = []

for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["message"])

# Create a choice to use local model or send data to OpenAI servers

In [None]:
bot_choice = st.radio(
    "Which bot would you like to use?",
    [":zany_face: LocalGPT", ":sunglasses: ChatGPT"],
    captions=["gemma", "chatGPT"])

# Take input and process it according to the choices

In [None]:
# Input from the user comes from the chat input function
if query := st.chat_input("Ask a question!"):
    # Add it to the list of messages
    with st.chat_message("user"):
        st.markdown(query)
        st.session_state.messages.append({"role": "user",
                                          "message": query})

    # Process the query by sending it to the selected bot
    with st.chat_message("assistant"):
        with st.spinner("Bot thinking ..."):
            if bot_choice == ":zany_face: LocalGPT":
                # For local bot, process everything using the local gemma bot

                # First step is to make the user query more relevant such that we can retrieve good quality documents
                alternate_search_query = gemma(f"Rewrite the following question encase in backticks, "
                                               f"adding more context, for retrieving information.\n`{query}`")

                # Once we have the documents, combine them together
                context_docs = '===========\n'.join(get_documents_by_query(alternate_search_query, collection))
                print(context_docs)

                # Send the documents and the question to the bot again, to generate an answer
                bot_response = gemma(f"Given the following pages, answer the question to the best of your "
                                     f"ability.\n\nPages:\n{context_docs}\n\nQuestion: {query}")

            else:
                # pretty much the same steps but with ChatGPT
                alternate_search_query = chatgpt(f"Rewrite the following question encase in backticks, "
                                                 f"adding more context, for retrieving information.\n`{query}`")
                context_docs = '===========\n'.join(get_documents_by_query(alternate_search_query, collection))
                print(context_docs)
                bot_response = chatgpt(f"Given the following pages, answer the question to the best of your "
                                       f"ability.\n\nPages:\n{context_docs}\n\nQuestion: {query}")

        st.markdown(bot_response)
        st.session_state.messages.append({"role": "assistant",
                                          "message": bot_response})