In [12]:
import streamlit as st
from streamlit_chat import message
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import CTransformers
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from sentence_transformers import SentenceTransformer
import torch

In [13]:
def load_documents():
    loader = DirectoryLoader('TUG', glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [14]:
def split_text_into_chunks(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    text_chunks = text_splitter.split_documents(documents)
    return text_chunks

In [15]:
def create_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"})
    return embeddings

In [16]:
def create_vector_store(text_chunks, embeddings):
    vector_store = FAISS.from_documents(text_chunks, embeddings)
    return vector_store

In [17]:
def create_llms_model():
    llm = CTransformers(model='LLM_model/mistral-7b-instruct-v0.1.Q4_K_M.gguf', config={'max_new_tokens':8192, 'temperature':1})
    return llm

In [18]:
def initialize_app():
    st.title=('BWM_CHATBOT')
    st.markdown('<style>h1{color: green; text-align: center;}</style>', unsafe_allow_html=True)

In [19]:
if 'history' not in st.session_state:
    st.session_state['history'] = []
if 'generated' not in st.session_state:
    st.session_state['generated'] = ['Hello I am here to guide you']
if 'past' not in st.session_state:
    st.session_state['past'] = ['Welcome Back']


In [20]:
documents = load_documents()

In [21]:
text_chunks = split_text_into_chunks(documents)
text_chunks[5]

Document(page_content='3.1  Berth Module access . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  8\n3.1.1  Berth Module main view . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  8\n3.2  Terminal Selection . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .  16', metadata={'source': 'TUG/Berth Window Management - Terminal User Guide.pdf', 'page': 2})

In [22]:
embeddings = create_embeddings()

In [23]:
vector_store = create_vector_store(text_chunks, embeddings)

In [24]:
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x173df9ff0>

In [25]:
llm = create_llms_model()

In [26]:
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

In [27]:
chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff', retriever=vector_store.as_retriever(search_kwargs={"k": 2}), memory=memory)

In [28]:
history=[]
def conversation_chat(query):
    result = chain({"question": query, "chat_history": history})
    history.append({query, result["answer"]})
    return result["answer"]

In [29]:
user_input = "What is a Scenario?"
output = conversation_chat(user_input)
print("Bot: ", output)

  warn_deprecated(
