In [27]:
import os
import streamlit as st
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.prompts import ChatPromptTemplate

In [28]:
# Set the API key
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    st.error("Please set the OPENAI_API_KEY environment variable.")

In [29]:
#Saving file to disk> Load text file> Split the document into chunks

def load_and_split_text_file(file):
    with open("uploaded_file.txt", "wb") as f:
        f.write(file.getbuffer())
    
    loader = TextLoader("uploaded_file.txt")
    documents = loader.load()
    
    splitter = CharacterTextSplitter(separator="\n", chunk_size=200, chunk_overlap=20)
    chunks = []
    for doc in documents:
        doc_chunks = splitter.split_text(doc.page_content)
        chunks.extend(doc_chunks)
    
    return chunks

In [30]:
def create_embeddings(chunks):
    embedding_model = OpenAIEmbeddings(openai_api_key=api_key)
    embeddings = embedding_model.embed_documents(chunks)
    return embeddings, chunks

In [31]:
def create_faiss_store(embeddings, chunks):
    faiss_index = FAISS.from_embeddings(embeddings, chunks)
    return faiss_index

In [32]:
def get_retriever(faiss_index):
    return faiss_index.as_retriever()

In [33]:
def main():
    st.title("Upload a Text File and Query It")

    uploaded_file = st.file_uploader("Choose a text file", type=["txt"])

    if uploaded_file is not None:
        chunks = load_and_split_text_file(uploaded_file)
        if chunks:
            st.success("File successfully uploaded and processed!")
            st.write("Number of chunks loaded:", len(chunks))

            embeddings, chunk_texts = create_embeddings(chunks)
            faiss_store = create_faiss_store(embeddings, chunk_texts)
            retriever = get_retriever(faiss_store)
            
            st.write("Embeddings and FAISS store created successfully!")

            # Setting up the QA chain
            llm = ChatOpenAI(model_name="gpt-4", temperature=0, openai_api_key=api_key)
            memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
            qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

            # Query input box
            user_query = st.text_input("Ask a question about the document:")

            if user_query:
                # Using the retriever to fetch relevant documents
                result = qa_chain({"query": user_query})
                response = result["result"]

                # Check if the response is relevant
                if not response or "I don't know" in response:
                    st.write("The document did not contain relevant information. Querying GPT model directly.")
                    # Fallback to GPT model directly
                    gpt_response = llm({"input": user_query})["choices"][0]["message"]["content"]
                    st.write("GPT Response:", gpt_response)
                else:
                    st.write("Response:", response)
        else:
            st.error("No chunks were loaded.")
    else:
        st.info("Please upload a text file.")

if __name__ == "__main__":
    main()