<a href="https://colab.research.google.com/github/gastan81/generative-ai/blob/main/0_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chatbot

## 1. Settings

Installation

In [1]:
%%bash
pip install -qqq -U faiss-cpu
#!pip install -qqq -U langchain
pip install -qqq -U langchain-community
pip install -qqq -U langchain-huggingface
pip install -qqq -U pypdf
pip install -qqq -U streamlit

In [2]:
%%bash
pip install -qqq -U jupyter
pip install -qqq -U ipywidgets

Libraries

In [3]:
import os
#from google.colab import userdata
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain.chains import create_history_aware_retriever
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

Colab: token

In [None]:
# os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get('HF_TOKEN')

Local: token

In [None]:
# token = os.getenv('HUGGINGFACEHUB_API_TOKEN')

Model

In [17]:
hf_model = 'mistralai/Mistral-7B-Instruct-v0.3'
# hf_model = 'microsoft/Phi-3.5-mini-instruct'

# hf_model = 'Qwen/Qwen2.5-7B-Instruct' # empty outputs

# hf_model = 'mistralai/Mistral-Nemo-Instruct-2407' # long time no response
# hf_model = 'microsoft/phi-4-gguf' # ReadTimeout: (ReadTimeoutError("HTTPSConnectionPool(host='api-inference.huggingface.co', port=443): Read timed out. (read timeout=120)")
# hf_model = 'microsoft/phi-4' # The model microsoft/phi-4 is too large to be loaded automatically (29GB > 10GB).

llm = HuggingFaceEndpoint(repo_id=hf_model)

In [5]:
# embedding_model = 'Qwen/Qwen2.5-7B-Instruct'
embedding_model = 'sentence-transformers/all-MiniLM-l6-v2'
embeddings_folder = "data/cache/"

embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model
    , cache_folder=embeddings_folder)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embedding

Create vector from loaded document

In [5]:
# # The document
# # file_id = '19dUK3V5K8cvu1E5uPzdx_G6uPXMT1zFw' # Google drive ID: CIA world factbook 2018-2019
# # file_id = '1uROXFibYrryFmJmd-oD3edD4o4bg_9iu' # Google drive ID: CIA world factbook 2021-2022
# # file_id = '19gAxv0fhFpu_dLrd0jlNNCbOOIAMTtpt' # Google drive ID: CIA world factbook 2023-2024
# # file = 'https://drive.google.com/uc?export=download&id=' + file_id
# file = 'data/The CIA World Factbook 2023-2024.pdf'

In [6]:
# # Read pages
# loader = PyPDFLoader(file)
# pages = []
# async for page in loader.alazy_load():
#     pages.append(page)

In [7]:
# # Split text
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
# docs = text_splitter.split_documents(pages)

In [None]:
# # Review the loaded document
# print(f"{docs[10].metadata}\n")
# print(docs[10].page_content)

In [8]:
# # Create vector
# vector_db = FAISS.from_documents(docs, embeddings)

In [9]:
# # Save vector
# vector_db.save_local("data/CIA_2023_2024_faiss_index")

Load vector from saved index

In [6]:
vector_db = FAISS.load_local("data/CIA_2023_2024_faiss_index", embeddings, allow_dangerous_deserialization=True)

Retriever

In [7]:
retriever = vector_db.as_retriever(search_kwargs={"k": 2})

Chat setup

In [18]:
template = """You are a nice chatbot having a conversation with a human.
Answer the question based only on the following context and previous conversation.
Keep your answers very short and succinct. Give me only 1 question-answer, no more.

Previous conversation:
{chat_history}

Context to answer question:
{context}

New human question: {input}
Response:
"""
#, informative, and clear, so that the couterpart can learn from you.
# chat_history = []

prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
])

doc_retriever = create_history_aware_retriever(
    llm, retriever, prompt
)

doc_chain = create_stuff_documents_chain(llm, prompt)

Chatbot

In [None]:
chain = create_retrieval_chain(
    doc_retriever, doc_chain
)
history = []
# Start the conversation loop
while True:
  user_input = input("\nYou: ")

  # Check for exit condition
  if user_input.lower() == 'end':
      print("Ending the conversation. Goodbye!")
      break

  # Get the response from the conversation chain
  response = chain.invoke({"input":user_input, "chat_history": history, "context": retriever})
  history.extend([{"role": "human", "content": response["input"]},{"role": "assistant", "content":response["answer"]}])
  # Print the chatbot's response
  print(response["answer"])

Streamlit

In [22]:
%%writefile data/CIA_2023_2024_streamlit_app.py

from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain.chains import create_history_aware_retriever
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import streamlit as st

# llm
hf_model = 'mistralai/Mistral-7B-Instruct-v0.3'
# hf_model = 'microsoft/Phi-3.5-mini-instruct'
llm = HuggingFaceEndpoint(repo_id=hf_model)

# embeddings
embedding_model = 'sentence-transformers/all-MiniLM-l6-v2'
embeddings_folder = 'data/cache/'

embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model
    , cache_folder=embeddings_folder)

# load Vector Database
# allow_dangerous_deserialization is needed. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine
vector_db = FAISS.load_local('data/CIA_2023_2024_faiss_index', embeddings, allow_dangerous_deserialization=True)

# retriever
retriever = vector_db.as_retriever(search_kwargs={"k": 2})

# prompt
template = """You are a nice chatbot having a conversation with a human.
Answer the question based only on the following context and previous conversation.
Keep your answers short, succinct. Give only one question-answer.

Previous conversation:
{chat_history}

Context to answer question:
{context}

New human question: {input}
Response:"""
# , informative, and clear, so that the couterpart can learn from you.

prompt = ChatPromptTemplate.from_messages([
    ('system', template),
    MessagesPlaceholder(variable_name='chat_history'),
    ('human', '{input}'),
])

# bot with memory
@st.cache_resource
def init_bot():
    doc_retriever = create_history_aware_retriever(llm, retriever, prompt)
    doc_chain = create_stuff_documents_chain(llm, prompt)
    return create_retrieval_chain(doc_retriever, doc_chain)

rag_bot = init_bot()


##### streamlit #####

st.title('CIA World Factbook 2023-2024')

# Initialise chat history
# Chat history saves the previous messages to be displayed
if 'messages' not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message['role']):
        st.markdown(message['content'])

# React to user input
if prompt := st.chat_input('Want to know CIA secrets?'):

    # Display user message in chat message container
    st.chat_message('human').markdown(prompt)

    # Add user message to chat history
    st.session_state.messages.append({'role': 'human', 'content': prompt})

    # Begin spinner before answering question so it's there for the duration
    with st.spinner('Asking CIA...'):

        # send question to chain to get answer
        answer = rag_bot.invoke({'input': prompt, 'chat_history': st.session_state.messages, 'context': retriever})

        # extract answer from dictionary returned by chain
        response = answer['answer']

        # Display chatbot response in chat message container
        with st.chat_message('assistant'):
            st.markdown(response)

        # Add assistant response to chat history
        st.session_state.messages.append({'role': 'assistant', 'content':  response})

Overwriting data/CIA_2023_2024_streamlit_app.py
