# Title
[]()

In [6]:
import os

# documents
from langchain.document_loaders import DirectoryLoader
# from langchain.document_loaders import TextLoader

from langchain.embeddings.openai import OpenAIEmbeddings
# from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_retriever_tool

# Creating the Agent
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.chat_models import ChatOpenAI

# Create memory 
from langchain.memory import ConversationBufferMemory

# Create the chain
from langchain.chains import (
    StuffDocumentsChain, LLMChain, ConversationalRetrievalChain
)
from langchain.prompts import PromptTemplate


# Initialize Dictionaries

In [2]:
tool_dict = dict()
embeddings_dict = dict()
db_dict = dict()
retriever_dict = dict()
vector_dict = dict()
description_dict = dict()

In [12]:
answer_dict=dict()
conversation_dict = dict()

# iteration 1

In [3]:

def create_documents(directory='../data/', glob='**/*.txt', show_progress=True):
    loader = DirectoryLoader(directory, glob=glob, show_progress=show_progress)
    documents = loader.load()
    print(f'Number of files: {len(documents)}')
    return documents

documents = create_documents()
documents

  0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 2/2 [00:06<00:00,  3.14s/it]

Number of files: 2





[Document(page_content='City of Vancouver\n\nhttps://vancouver.ca/home\n\n\n\nproperty\n\n\n\ndevelopment/apartments\n\n\n\ncondos\n\n\n\nand\n\n\n\ntownhomes.aspx\n\nApartments, condos, and townhomes\n\nWe do not provide waste collection services to most multi-unit buildings.\n\nHowever, we do provide information and resources to building owners and managers to manage waste at apartments, condos, and townhomes.\n\nWaste Management Canada collects your recycling on behalf of Recycle BC External website, opens in new tab Phone Waste Management Canada at 604-282-7961 for questions and issues about your recycling service.', metadata={'source': '..\\data\\Vancouver apartments condos townhomes.txt'}),
 Document(page_content='City of Vancouver\n\nhttps://vancouver.ca/home\n\n\n\nproperty\n\n\n\ndevelopment/single\n\n\n\nfamily\n\n\n\nhomes\n\n\n\nand\n\n\n\nduplexes.aspx\n\nSingle family homes and duplexes We collect residential garbage, food scraps, and yard waste for single family homes an

In [13]:
# https://api.python.langchain.com/en/latest/text_splitter/langchain.text_splitter.CharacterTextSplitter.html?highlight=charactertextsplitter#langchain.text_splitter.CharacterTextSplitter.split_documents
text_splitter = CharacterTextSplitter(chunk_size=30, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
texts

Created a chunk of size 33, which is longer than the specified 30
Created a chunk of size 73, which is longer than the specified 30
Created a chunk of size 134, which is longer than the specified 30
Created a chunk of size 130, which is longer than the specified 30
Created a chunk of size 67, which is longer than the specified 30
Created a chunk of size 234, which is longer than the specified 30


[Document(page_content='City of Vancouver', metadata={'source': '..\\data\\Vancouver apartments condos townhomes.txt'}),
 Document(page_content='https://vancouver.ca/home', metadata={'source': '..\\data\\Vancouver apartments condos townhomes.txt'}),
 Document(page_content='property', metadata={'source': '..\\data\\Vancouver apartments condos townhomes.txt'}),
 Document(page_content='development/apartments\n\ncondos', metadata={'source': '..\\data\\Vancouver apartments condos townhomes.txt'}),
 Document(page_content='and\n\ntownhomes.aspx', metadata={'source': '..\\data\\Vancouver apartments condos townhomes.txt'}),
 Document(page_content='Apartments, condos, and townhomes', metadata={'source': '..\\data\\Vancouver apartments condos townhomes.txt'}),
 Document(page_content='We do not provide waste collection services to most multi-unit buildings.', metadata={'source': '..\\data\\Vancouver apartments condos townhomes.txt'}),
 Document(page_content='However, we do provide information and 

In [15]:
print(type(texts[0]))
print(texts[0].page_content)

<class 'langchain.schema.document.Document'>
City of Vancouver


In [4]:
def create_retriever(documents, site_key, vector_dict=vector_dict, text_splitter=None):
    """
    Parameters:
        - text_splitter (optional): a text splitter object. If None, the documents are not split. 
    """
    embeddings_dict[site_key] = OpenAIEmbeddings()
    if text_splitter is None: # object type is the same (class 'langchain.schema.document.Document') whether or not the documents are split
        texts = documents
    else:
        texts = text_splitter.split_documents(documents)

    vector_dict[site_key] = FAISS.from_documents(texts, embeddings_dict[site_key])
    retriever_dict[site_key] = vector_dict[site_key].as_retriever()
    return retriever_dict

retriever_dict = create_retriever(documents, 'CoV')
retriever_dict


{'CoV': VectorStoreRetriever(tags=['FAISS'], metadata=None, vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x00000226CA1DC5D0>, search_type='similarity', search_kwargs={})}

In [18]:
type(retriever_dict['CoV'])

langchain.vectorstores.base.VectorStoreRetriever

In [24]:
retriever_dict.items()

dict_items([('CoV', VectorStoreRetriever(tags=['FAISS'], metadata=None, vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x0000029FE0CCAF50>, search_type='similarity', search_kwargs={}))])

In [32]:
retriever_dict['CoV']

VectorStoreRetriever(tags=['FAISS'], metadata=None, vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x0000029FE0CCAF50>, search_type='similarity', search_kwargs={})

## Create tools list

In [5]:

def create_tools_list(retriever_dict, description_dict):
    """
    https://api.python.langchain.com/en/latest/agents/langchain.agents.agent_toolkits.conversational_retrieval.tool.create_retriever_tool.html?highlight=create_retriever_tool#langchain.agents.agent_toolkits.conversational_retrieval.tool.create_retriever_tool
    """
    tools_list = []
    for site_key, retriever in retriever_dict.items():
        tool_name = f'search_{site_key}'
        tool = create_retriever_tool(retriever_dict[site_key], tool_name, description_dict[site_key])
        tools_list.append(tool)
    return tools_list

description_dict['CoV'] = 'Searches and returns documents regarding waste and recycling in the City of Vancouver.'

tools_list = create_tools_list(retriever_dict, description_dict)
tools_list



[Tool(name='search_CoV', description='Searches and returns documents regarding waste and recycling in the City of Vancouver.', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, handle_tool_error=False, func=<bound method BaseRetriever.get_relevant_documents of VectorStoreRetriever(tags=['FAISS'], metadata=None, vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x00000226CA1DC5D0>, search_type='similarity', search_kwargs={})>, coroutine=<bound method BaseRetriever.aget_relevant_documents of VectorStoreRetriever(tags=['FAISS'], metadata=None, vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x00000226CA1DC5D0>, search_type='similarity', search_kwargs={})>)]

## Create chatbot
https://api.python.langchain.com/en/latest/chat_models/langchain.chat_models.openai.ChatOpenAI.html?highlight=chatopenai#langchain.chat_models.openai.ChatOpenAI

Need to customize prompt template

In [14]:


def create_chatbot(tools_list=tools_list, verbose=True):

    llm = ChatOpenAI(
        temperature = 0,
        openai_organization=os.environ['openai_organization'],
        openai_api_key=os.environ['openai_api_key'],
        )

    agent_executor = create_conversational_retrieval_agent(llm, tools_list, verbose)
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    template = (
        "Combine the chat history and follow up question into "
        "a standalone question. Chat History: {chat_history}"
        "Follow up question: {question}"
    )
    # prompt = PromptTemplate.from_template(template)
    # question_generator_chain = LLMChain(llm=llm, prompt=prompt)
    # chain = ConversationalRetrievalChain(question_generator=question_generator_chain)
    # qa = chain.from_llm(
    #     llm, vector_dict[site_key].as_retriever(), memory=memory
    #     )
    # result = qa({"question": user_input})
    chain = ConversationalRetrievalChain()
    template = """You are a helpful assistant who provides concise answers to residents in Metro Vancouver, Canada.
    To make your answer more concise, you ask follow up questions if needed so you can provide the most relevant answer.
    Given the following conversation and a follow up question, rephrase the follow up question 
    to be a standalone question, in its original language.\n\n
    Chat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:
    """
    prompt = PromptTemplate(
        input_variables=['chat_history', 'question'], 
        output_parser=None, partial_variables={}, 
        template=template, template_format='f-string', validate_template=True)
    chat = chain.from_llm(
        llm, vector_dict[site_key].as_retriever(), memory=memory,
        condense_question_prompt=prompt
        )

    return chat

def chat_with_chatbot(user_input, chat, verbose=True):
    result = qa({"question": user_input})
    
    return result

conversation_id = 1
input_id = 1

query = "Where do I recycle coffee cups in Vancouver?"
conversation_dict[conversation_id] = create_chatbot()
answer_dict[input_id] = chat_with_chatbot(query, chat_dict[conversation_id])

ValidationError: 3 validation errors for ConversationalRetrievalChain
combine_docs_chain
  field required (type=value_error.missing)
question_generator
  field required (type=value_error.missing)
retriever
  field required (type=value_error.missing)

### iteration 2

In [17]:


def create_chatbot(tools_list=tools_list, verbose=True):

    llm = ChatOpenAI(
        temperature = 0,
        openai_organization=os.environ['openai_organization'],
        openai_api_key=os.environ['openai_api_key'],
        )

    agent_executor = create_conversational_retrieval_agent(llm, tools_list, verbose)
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    template = (
        "Combine the chat history and follow up question into "
        "a standalone question. Chat History: {chat_history}"
        "Follow up question: {question}"
    )
    template = """You are a helpful assistant who provides concise answers to residents in Metro Vancouver, Canada.
    To make your answer more concise, you ask follow up questions if needed so you can provide the most relevant answer.
    Where relevant, you retrieve the relevant information from your documents to answer the resident's question.
    Here is your chat history with the resident: \n\n{chat_history}\n\n
    Respond to the resident's query, which are delimited by triple backticks: ```{question}```
    """
    prompt = PromptTemplate(
        input_variables=['chat_history', 'question'], 
        output_parser=None, partial_variables={}, 
        template=template, template_format='f-string', validate_template=True)
    chat = ConversationalRetrievalChain.from_llm(
        llm, vector_dict[site_key].as_retriever(), memory=memory,
        condense_question_prompt=prompt
        )

    return chat

def chat_with_chatbot(user_input, chat, verbose=True):
    result = qa({"question": user_input})
    
    return result

conversation_id = 1
input_id = 1

query = "Where do I recycle coffee cups in Vancouver?"
conversation_dict[conversation_id] = create_chatbot()
answer_dict[input_id] = chat_with_chatbot(query, chat_dict[conversation_id])

NameError: name 'site_key' is not defined

### iteration 3

In [21]:


def create_chatbot(tools_list=tools_list, vector_store=vector_dict['CoV'], verbose=True):

    llm = ChatOpenAI(
        temperature = 0,
        openai_organization=os.environ['openai_organization'],
        openai_api_key=os.environ['openai_api_key'],
        )

    agent_executor = create_conversational_retrieval_agent(llm, tools_list, verbose)
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    template = (
        "Combine the chat history and follow up question into "
        "a standalone question. Chat History: {chat_history}"
        "Follow up question: {question}"
    )
    template = """You are a helpful assistant who provides concise answers to residents in Metro Vancouver, Canada.
    To make your answer more concise, you ask follow up questions if needed so you can provide the most relevant answer.
    Where relevant, you retrieve the relevant information from your documents to answer the resident's question.
    Here is your chat history with the resident: \n\n{chat_history}\n\n
    Respond to the resident's query, which are delimited by triple backticks: ```{question}```
    """
    prompt = PromptTemplate(
        input_variables=['chat_history', 'question'], 
        output_parser=None, partial_variables={}, 
        template=template, template_format='f-string', validate_template=True)
    chat = ConversationalRetrievalChain.from_llm(
        llm, vector_store.as_retriever(), memory=memory,
        condense_question_prompt=prompt
        )

    return chat

def chat_with_chatbot(user_input, chat, verbose=True):
    result = chat({"question": user_input})
    
    return result

conversation_id = 1
input_id = 1

query = "Where do I recycle coffee cups in Vancouver?"
conversation_dict[conversation_id] = create_chatbot()
answer_dict[input_id] = chat_with_chatbot(query, conversation_dict[conversation_id])

In [22]:
answer_dict[input_id]

{'question': 'Where do I recycle coffee cups in Vancouver?',
 'chat_history': [HumanMessage(content='Where do I recycle coffee cups in Vancouver?', additional_kwargs={}, example=False),
  AIMessage(content='For single-family homes and duplexes in Vancouver, you can recycle coffee cups in your blue bin provided by GFL Environmental. You can contact GFL Environmental at 604-282-7966 or email srrecycle@gflenv.com for any questions or issues regarding your recycling service, including coffee cup recycling.\n\nFor apartments, condos, and townhomes, waste collection services are not provided by the City of Vancouver. Waste Management Canada collects recycling on behalf of Recycle BC for these types of buildings. You can contact Waste Management Canada at 604-282-7961 for questions and issues about recycling, including coffee cup recycling.', additional_kwargs={}, example=False)],
 'answer': 'For single-family homes and duplexes in Vancouver, you can recycle coffee cups in your blue bin provi

In [26]:
print(len(answer_dict[input_id]['answer']))
print(answer_dict[input_id]['answer'])

636
For single-family homes and duplexes in Vancouver, you can recycle coffee cups in your blue bin provided by GFL Environmental. You can contact GFL Environmental at 604-282-7966 or email srrecycle@gflenv.com for any questions or issues regarding your recycling service, including coffee cup recycling.

For apartments, condos, and townhomes, waste collection services are not provided by the City of Vancouver. Waste Management Canada collects recycling on behalf of Recycle BC for these types of buildings. You can contact Waste Management Canada at 604-282-7961 for questions and issues about recycling, including coffee cup recycling.


# *End of Page*