In [12]:
! pip install streamlit
! pip install python-dotenv
! pip install langchain
! pip install -U langchain-community
! pip install pypdf
! pip install InstructorEmbedding
! pip install sentence-transformers==2.2.2
! pip install faiss-gpu
! pip install faiss-cpu
! pip install PyPDF2
! pip install ctransformers



In [13]:
import os
from google.colab               import drive
print("Working Directory:", os.getcwd())

# Mount Google Drive
drive.mount('/content/drive')

# Define the folder path
folder_path = "/content/drive/MyDrive/Chatbot-LangChain-RAG"
# Change the working directory to a specific path
os.chdir(folder_path)
# Print the updated working directory
print("Updated Working Directory:", os.getcwd())

Working Directory: /content/drive/MyDrive/Chatbot-LangChain-RAG
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Updated Working Directory: /content/drive/MyDrive/Chatbot-LangChain-RAG


In [19]:
%%writefile app.py

import os
import streamlit                      as st

from dotenv                           import load_dotenv
from PyPDF2                           import PdfReader
from langchain_text_splitters         import CharacterTextSplitter
from langchain_community.embeddings   import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.memory                 import ConversationBufferMemory
from langchain.chains                 import ConversationalRetrievalChain
from langchain_community.llms         import HuggingFaceHub
from htmlTemplates                    import css, bot_template, user_template

def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text


def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks


def get_vectorstore(text_chunks):
    # embeddings = OpenAIEmbeddings()
    embeddings  = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") #hkunlp/instructor-base
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore


def get_conversation_chain(vectorstore):
    # llm = ChatOpenAI()
    # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
    llm = CTransformers(model="llama-2-7b-chat.ggmlv3.q4_0.bin",model_type="llama",
                    config={'max_new_tokens':128,'temperature':0.01})

    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain


def handle_userinput(user_question):
    response = st.session_state.conversation({'question': user_question})
    st.session_state.chat_history = response['chat_history']

    for i, message in enumerate(st.session_state.chat_history):
        if i % 2 == 0:
            st.write(user_template.replace(
                "{{MSG}}", message.content), unsafe_allow_html=True)
        else:
            st.write(bot_template.replace(
                "{{MSG}}", message.content), unsafe_allow_html=True)



load_dotenv()
st.set_page_config(page_title="Chat with PDFs!",
                    page_icon=":speech_balloon:")
st.write(css, unsafe_allow_html=True)

if "conversation" not in st.session_state:
    st.session_state.conversation = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history = None

st.header("Chat with PDFs! :speech_balloon:")
user_question = st.text_input("Ask a question about your uploaded documents:")
if user_question:
    with st.spinner("Processing"):
        handle_userinput(user_question)

with st.sidebar:
    st.subheader("Your documents")
    pdf_docs = st.file_uploader(
        "Upload your PDFs here and click on 'Enter'", accept_multiple_files=True)
    if st.button("Enter"):
        with st.spinner("Processing"):
            # get pdf
            raw_text = get_pdf_text(pdf_docs)

            # get the text chunks
            text_chunks = get_text_chunks(raw_text)

            # create embeddings and vector store
            vectorstore = get_vectorstore(text_chunks)

            # create conversation chain
            st.session_state.conversation = get_conversation_chain(
                vectorstore)



Overwriting app.py


In [15]:
! wget -q -O - ipv4.icanhazip.com

35.221.40.81


In [18]:
! streamlit run appbehi.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.221.40.81:8501[0m
[0m
[K[?25hnpx: installed 22 in 2.342s
your url is: https://floppy-hairs-drum.loca.lt
[34m  Stopping...[0m
^C
